Ajout notebook pour statistiques descriptives
This commit is contained in:
parent
89b05d9366
commit
335c798a65
229
1_Descriptive_Statistics.ipynb
Normal file
229
1_Descriptive_Statistics.ipynb
Normal file
|
@ -0,0 +1,229 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "3f41343f-7205-41d9-89dd-88039e301413",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Statistiques descriptives"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "abfaf341-7b35-4407-9133-d21336c04027",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import os\n",
|
||||||
|
"import s3fs\n",
|
||||||
|
"import re"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "7fb72fa3-7940-496f-ac78-c2837f65eefa",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Access Key to Minio\n",
|
||||||
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},key ='WKTGH4YGUBAT3TR0OSUR', secret = 'g8ozi6ZUrBy8DzaAip4F7zOizbr4DKf4RgYNseqU', token = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJXS1RHSDRZR1VCQVQzVFIwT1NVUiIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiaHR0cHM6Ly9vbnl4aWEubGFiLmdyb3VwZS1nZW5lcy5mciJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTcwNzU4NjUwMCwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJhbnRvaW5lLmpvdWJyZWxAZW5zYWUuZnIiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZXhwIjoxNzA3NjczMDQ3LCJmYW1pbHlfbmFtZSI6IkpPVUJSRUwiLCJnaXZlbl9uYW1lIjoiQW50b2luZSIsImdyb3VwcyI6WyJiZGMyMzI0LXRlYW0xIl0sImlhdCI6MTcwNzU4NjY0NywiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiI1MjQ2MDZmMS1lYWM3LTQxZDgtYTEzMy04MGZjMDk0MGVlNzEiLCJuYW1lIjoiQW50b2luZSBKT1VCUkVMIiwicG9saWN5Ijoic3Rzb25seSIsInByZWZlcnJlZF91c2VybmFtZSI6ImFqb3VicmVsLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNlc3Npb25fc3RhdGUiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzaWQiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzdWIiOiIwNWYwZDk3Mi1jNWM4LTQyNmYtODAwZC00NmQ0OGU4NjkwMzUiLCJ0eXAiOiJCZWFyZXIifQ.-imw-N4bk1uCcQGobkxhsRoeBAqxC9rT7PifElbC7ODOStnwIulc7HRR2fmtiqI2PdyrfnVvzfmIPK1g056HbA')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "45d5261f-4d46-49cb-8582-dd2121122b05",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 1 - Comportement d'achat"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"id": "9376af51-4320-44b6-8f30-1e1234371556",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Chargement des données temporaires\n",
|
||||||
|
"BUCKET = \"projet-bdc2324-team1\"\n",
|
||||||
|
"FILE_KEY_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n",
|
||||||
|
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
|
||||||
|
"\n",
|
||||||
|
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||||
|
" tickets_kpi = pd.read_csv(file_in, sep=\",\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"id": "1855dcca-cfce-4c54-90ae-55d9a1ab5d45",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Unnamed: 0</th>\n",
|
||||||
|
" <th>customer_id</th>\n",
|
||||||
|
" <th>event_type_id</th>\n",
|
||||||
|
" <th>nb_tickets</th>\n",
|
||||||
|
" <th>total_amount</th>\n",
|
||||||
|
" <th>nb_suppliers</th>\n",
|
||||||
|
" <th>vente_internet_max</th>\n",
|
||||||
|
" <th>purchase_date_min</th>\n",
|
||||||
|
" <th>purchase_date_max</th>\n",
|
||||||
|
" <th>time_between_purchase</th>\n",
|
||||||
|
" <th>nb_tickets_internet</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>384226</td>\n",
|
||||||
|
" <td>2686540.5</td>\n",
|
||||||
|
" <td>7</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>2014-12-03 14:55:37+00:00</td>\n",
|
||||||
|
" <td>2023-11-04 15:12:16+00:00</td>\n",
|
||||||
|
" <td>3258 days 00:16:39</td>\n",
|
||||||
|
" <td>51.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>453242</td>\n",
|
||||||
|
" <td>3248965.5</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>2013-09-23 14:45:01+00:00</td>\n",
|
||||||
|
" <td>2023-11-03 14:11:01+00:00</td>\n",
|
||||||
|
" <td>3692 days 23:26:00</td>\n",
|
||||||
|
" <td>2988.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>201750</td>\n",
|
||||||
|
" <td>1459190.0</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
||||||
|
" <td>2023-11-08 15:59:45+00:00</td>\n",
|
||||||
|
" <td>3803 days 05:21:47</td>\n",
|
||||||
|
" <td>9.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>217356</td>\n",
|
||||||
|
" <td>1435871.5</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>2017-01-01 02:20:08+00:00</td>\n",
|
||||||
|
" <td>2019-12-31 02:20:06+00:00</td>\n",
|
||||||
|
" <td>1093 days 23:59:58</td>\n",
|
||||||
|
" <td>5.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>143</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>2018-04-07 12:55:07+00:00</td>\n",
|
||||||
|
" <td>2020-03-08 12:06:43+00:00</td>\n",
|
||||||
|
" <td>700 days 23:11:36</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Unnamed: 0 customer_id event_type_id nb_tickets total_amount \\\n",
|
||||||
|
"0 0 1 2 384226 2686540.5 \n",
|
||||||
|
"1 1 1 4 453242 3248965.5 \n",
|
||||||
|
"2 2 1 5 201750 1459190.0 \n",
|
||||||
|
"3 3 1 6 217356 1435871.5 \n",
|
||||||
|
"4 4 2 2 143 0.0 \n",
|
||||||
|
"\n",
|
||||||
|
" nb_suppliers vente_internet_max purchase_date_min \\\n",
|
||||||
|
"0 7 1 2014-12-03 14:55:37+00:00 \n",
|
||||||
|
"1 6 1 2013-09-23 14:45:01+00:00 \n",
|
||||||
|
"2 6 1 2013-06-10 10:37:58+00:00 \n",
|
||||||
|
"3 5 1 2017-01-01 02:20:08+00:00 \n",
|
||||||
|
"4 1 0 2018-04-07 12:55:07+00:00 \n",
|
||||||
|
"\n",
|
||||||
|
" purchase_date_max time_between_purchase nb_tickets_internet \n",
|
||||||
|
"0 2023-11-04 15:12:16+00:00 3258 days 00:16:39 51.0 \n",
|
||||||
|
"1 2023-11-03 14:11:01+00:00 3692 days 23:26:00 2988.0 \n",
|
||||||
|
"2 2023-11-08 15:59:45+00:00 3803 days 05:21:47 9.0 \n",
|
||||||
|
"3 2019-12-31 02:20:06+00:00 1093 days 23:59:58 5.0 \n",
|
||||||
|
"4 2020-03-08 12:06:43+00:00 700 days 23:11:36 0.0 "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"tickets_kpi.head()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.13"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user