diff --git a/1_Descriptive_Statistics.ipynb b/1_Descriptive_Statistics.ipynb new file mode 100644 index 0000000..0eefa74 --- /dev/null +++ b/1_Descriptive_Statistics.ipynb @@ -0,0 +1,229 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3f41343f-7205-41d9-89dd-88039e301413", + "metadata": {}, + "source": [ + "# Statistiques descriptives" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "abfaf341-7b35-4407-9133-d21336c04027", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import s3fs\n", + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7fb72fa3-7940-496f-ac78-c2837f65eefa", + "metadata": {}, + "outputs": [], + "source": [ + "# Access Key to Minio\n", + "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},key ='WKTGH4YGUBAT3TR0OSUR', secret = 'g8ozi6ZUrBy8DzaAip4F7zOizbr4DKf4RgYNseqU', token = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJXS1RHSDRZR1VCQVQzVFIwT1NVUiIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiaHR0cHM6Ly9vbnl4aWEubGFiLmdyb3VwZS1nZW5lcy5mciJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTcwNzU4NjUwMCwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJhbnRvaW5lLmpvdWJyZWxAZW5zYWUuZnIiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZXhwIjoxNzA3NjczMDQ3LCJmYW1pbHlfbmFtZSI6IkpPVUJSRUwiLCJnaXZlbl9uYW1lIjoiQW50b2luZSIsImdyb3VwcyI6WyJiZGMyMzI0LXRlYW0xIl0sImlhdCI6MTcwNzU4NjY0NywiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiI1MjQ2MDZmMS1lYWM3LTQxZDgtYTEzMy04MGZjMDk0MGVlNzEiLCJuYW1lIjoiQW50b2luZSBKT1VCUkVMIiwicG9saWN5Ijoic3Rzb25seSIsInByZWZlcnJlZF91c2VybmFtZSI6ImFqb3VicmVsLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNlc3Npb25fc3RhdGUiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzaWQiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzdWIiOiIwNWYwZDk3Mi1jNWM4LTQyNmYtODAwZC00NmQ0OGU4NjkwMzUiLCJ0eXAiOiJCZWFyZXIifQ.-imw-N4bk1uCcQGobkxhsRoeBAqxC9rT7PifElbC7ODOStnwIulc7HRR2fmtiqI2PdyrfnVvzfmIPK1g056HbA')" + ] + }, + { + "cell_type": "markdown", + "id": "45d5261f-4d46-49cb-8582-dd2121122b05", + "metadata": {}, + "source": [ + "# 1 - Comportement d'achat" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9376af51-4320-44b6-8f30-1e1234371556", + "metadata": {}, + "outputs": [], + "source": [ + "# Chargement des données temporaires\n", + "BUCKET = \"projet-bdc2324-team1\"\n", + "FILE_KEY_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n", + "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " tickets_kpi = pd.read_csv(file_in, sep=\",\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1855dcca-cfce-4c54-90ae-55d9a1ab5d45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Unnamed: 0 | \n", + "customer_id | \n", + "event_type_id | \n", + "nb_tickets | \n", + "total_amount | \n", + "nb_suppliers | \n", + "vente_internet_max | \n", + "purchase_date_min | \n", + "purchase_date_max | \n", + "time_between_purchase | \n", + "nb_tickets_internet | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "1 | \n", + "2 | \n", + "384226 | \n", + "2686540.5 | \n", + "7 | \n", + "1 | \n", + "2014-12-03 14:55:37+00:00 | \n", + "2023-11-04 15:12:16+00:00 | \n", + "3258 days 00:16:39 | \n", + "51.0 | \n", + "
1 | \n", + "1 | \n", + "1 | \n", + "4 | \n", + "453242 | \n", + "3248965.5 | \n", + "6 | \n", + "1 | \n", + "2013-09-23 14:45:01+00:00 | \n", + "2023-11-03 14:11:01+00:00 | \n", + "3692 days 23:26:00 | \n", + "2988.0 | \n", + "
2 | \n", + "2 | \n", + "1 | \n", + "5 | \n", + "201750 | \n", + "1459190.0 | \n", + "6 | \n", + "1 | \n", + "2013-06-10 10:37:58+00:00 | \n", + "2023-11-08 15:59:45+00:00 | \n", + "3803 days 05:21:47 | \n", + "9.0 | \n", + "
3 | \n", + "3 | \n", + "1 | \n", + "6 | \n", + "217356 | \n", + "1435871.5 | \n", + "5 | \n", + "1 | \n", + "2017-01-01 02:20:08+00:00 | \n", + "2019-12-31 02:20:06+00:00 | \n", + "1093 days 23:59:58 | \n", + "5.0 | \n", + "
4 | \n", + "4 | \n", + "2 | \n", + "2 | \n", + "143 | \n", + "0.0 | \n", + "1 | \n", + "0 | \n", + "2018-04-07 12:55:07+00:00 | \n", + "2020-03-08 12:06:43+00:00 | \n", + "700 days 23:11:36 | \n", + "0.0 | \n", + "