{
"cells": [
{
"cell_type": "markdown",
"id": "56b3d44e-1e3f-4726-9916-0f9af107860e",
"metadata": {},
"source": [
"# Business Data Challenge - Team 1"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "15103481-8d74-404c-aa09-7601fe7730da",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "c3bb0d13-34b2-4e1c-9985-468cd87c5a0e",
"metadata": {},
"source": [
"Configuration de l'accès aux données"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import s3fs\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
{
"cell_type": "markdown",
"id": "f99da24f-0d93-4618-92bc-3ba81dc0445c",
"metadata": {},
"source": [
"## Exemple sur bdc2324-data/11"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "699664b9-eee4-4f8d-a207-e524526560c5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['bdc2324-data/11/11campaign_stats.csv',\n",
" 'bdc2324-data/11/11campaigns.csv',\n",
" 'bdc2324-data/11/11categories.csv',\n",
" 'bdc2324-data/11/11countries.csv',\n",
" 'bdc2324-data/11/11currencies.csv',\n",
" 'bdc2324-data/11/11customer_target_mappings.csv',\n",
" 'bdc2324-data/11/11customersplus.csv',\n",
" 'bdc2324-data/11/11event_types.csv',\n",
" 'bdc2324-data/11/11events.csv',\n",
" 'bdc2324-data/11/11facilities.csv',\n",
" 'bdc2324-data/11/11link_stats.csv',\n",
" 'bdc2324-data/11/11pricing_formulas.csv',\n",
" 'bdc2324-data/11/11product_packs.csv',\n",
" 'bdc2324-data/11/11products.csv',\n",
" 'bdc2324-data/11/11products_groups.csv',\n",
" 'bdc2324-data/11/11purchases.csv',\n",
" 'bdc2324-data/11/11representation_category_capacities.csv',\n",
" 'bdc2324-data/11/11representations.csv',\n",
" 'bdc2324-data/11/11seasons.csv',\n",
" 'bdc2324-data/11/11structure_tag_mappings.csv',\n",
" 'bdc2324-data/11/11suppliers.csv',\n",
" 'bdc2324-data/11/11tags.csv',\n",
" 'bdc2324-data/11/11target_types.csv',\n",
" 'bdc2324-data/11/11targets.csv',\n",
" 'bdc2324-data/11/11tickets.csv']"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"BUCKET = \"bdc2324-data/11\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "6d6201cd-a00b-4984-bcd8-72838717ad13",
"metadata": {},
"outputs": [],
"source": [
"# Chargement de toutes les données\n",
"liste_base = ['customer_target_mappings', 'customersplus', 'target_types', 'tags', 'events', 'tickets', 'representations', 'purchases', 'products']\n",
"\n",
"for nom_base in liste_base:\n",
" FILE_PATH_S3 = 'bdc2324-data/11/11' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "afe548fe-d93c-4634-9f53-881404ec4c6c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id_x | \n",
" purchase_date | \n",
" type_of | \n",
" is_from_subscription | \n",
" amount | \n",
" is_full_price | \n",
" start_date_time | \n",
" event_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 992423 | \n",
" 2023-01-11 17:08:41+01:00 | \n",
" 3 | \n",
" False | \n",
" 13.0 | \n",
" False | \n",
" 2023-02-06 20:00:00+01:00 | \n",
" zaide | \n",
"
\n",
" \n",
" 1 | \n",
" 992423 | \n",
" 2023-01-11 17:08:41+01:00 | \n",
" 3 | \n",
" False | \n",
" 13.0 | \n",
" False | \n",
" 2023-02-06 20:00:00+01:00 | \n",
" zaide | \n",
"
\n",
" \n",
" 2 | \n",
" 1053934 | \n",
" 2023-03-16 16:23:10+01:00 | \n",
" 3 | \n",
" False | \n",
" 62.0 | \n",
" False | \n",
" 2023-03-19 16:00:00+01:00 | \n",
" luisa miller | \n",
"
\n",
" \n",
" 3 | \n",
" 1053934 | \n",
" 2023-03-16 16:23:10+01:00 | \n",
" 3 | \n",
" False | \n",
" 62.0 | \n",
" False | \n",
" 2023-03-19 16:00:00+01:00 | \n",
" luisa miller | \n",
"
\n",
" \n",
" 4 | \n",
" 1189141 | \n",
" 2020-11-26 13:12:53+01:00 | \n",
" 3 | \n",
" False | \n",
" 51.3 | \n",
" False | \n",
" 2020-12-01 20:00:00+01:00 | \n",
" iphigenie en tauride | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 318964 | \n",
" 1090839 | \n",
" 2019-05-19 21:18:36+02:00 | \n",
" 1 | \n",
" False | \n",
" 4.5 | \n",
" False | \n",
" 2019-05-27 20:00:00+02:00 | \n",
" entre femmes | \n",
"
\n",
" \n",
" 318965 | \n",
" 1090839 | \n",
" 2019-05-19 21:18:36+02:00 | \n",
" 1 | \n",
" False | \n",
" 4.5 | \n",
" False | \n",
" 2019-05-27 20:00:00+02:00 | \n",
" entre femmes | \n",
"
\n",
" \n",
" 318966 | \n",
" 1090839 | \n",
" 2019-05-19 21:18:36+02:00 | \n",
" 1 | \n",
" False | \n",
" 4.5 | \n",
" False | \n",
" 2019-05-27 20:00:00+02:00 | \n",
" entre femmes | \n",
"
\n",
" \n",
" 318967 | \n",
" 1244277 | \n",
" 2019-12-31 11:04:07+01:00 | \n",
" 1 | \n",
" False | \n",
" 5.5 | \n",
" False | \n",
" 2020-02-03 20:00:00+01:00 | \n",
" a boire et a manger | \n",
"
\n",
" \n",
" 318968 | \n",
" 1244277 | \n",
" 2019-12-31 11:04:07+01:00 | \n",
" 1 | \n",
" False | \n",
" 5.5 | \n",
" False | \n",
" 2020-02-03 20:00:00+01:00 | \n",
" a boire et a manger | \n",
"
\n",
" \n",
"
\n",
"
318969 rows × 8 columns
\n",
"
"
],
"text/plain": [
" id_x purchase_date type_of is_from_subscription \\\n",
"0 992423 2023-01-11 17:08:41+01:00 3 False \n",
"1 992423 2023-01-11 17:08:41+01:00 3 False \n",
"2 1053934 2023-03-16 16:23:10+01:00 3 False \n",
"3 1053934 2023-03-16 16:23:10+01:00 3 False \n",
"4 1189141 2020-11-26 13:12:53+01:00 3 False \n",
"... ... ... ... ... \n",
"318964 1090839 2019-05-19 21:18:36+02:00 1 False \n",
"318965 1090839 2019-05-19 21:18:36+02:00 1 False \n",
"318966 1090839 2019-05-19 21:18:36+02:00 1 False \n",
"318967 1244277 2019-12-31 11:04:07+01:00 1 False \n",
"318968 1244277 2019-12-31 11:04:07+01:00 1 False \n",
"\n",
" amount is_full_price start_date_time event_name \n",
"0 13.0 False 2023-02-06 20:00:00+01:00 zaide \n",
"1 13.0 False 2023-02-06 20:00:00+01:00 zaide \n",
"2 62.0 False 2023-03-19 16:00:00+01:00 luisa miller \n",
"3 62.0 False 2023-03-19 16:00:00+01:00 luisa miller \n",
"4 51.3 False 2020-12-01 20:00:00+01:00 iphigenie en tauride \n",
"... ... ... ... ... \n",
"318964 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n",
"318965 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n",
"318966 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n",
"318967 5.5 False 2020-02-03 20:00:00+01:00 a boire et a manger \n",
"318968 5.5 False 2020-02-03 20:00:00+01:00 a boire et a manger \n",
"\n",
"[318969 rows x 8 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Jointure\n",
"var_choosed = ['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']\n",
"merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[var_choosed]\n",
"\n",
"var_choosed.extend(['amount', 'is_full_price', 'representation_id'])\n",
"merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[var_choosed]\n",
"\n",
"var_choosed.remove('representation_id')\n",
"var_choosed.extend(['start_date_time', 'event_id'])\n",
"merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[var_choosed]\n",
"\n",
"var_choosed.remove('event_id')\n",
"var_choosed.extend(['name', 'customer_id'])\n",
"merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[var_choosed]\n",
"\n",
"# Changement de nom\n",
"merge_4 = merge_4.rename(columns={'name': 'event_name'})\n",
"var_choosed[var_choosed.index('name')] = \"event_name\"\n",
"\n",
"# Base finale\n",
"var_choosed.extend(['age', 'gender', 'country', 'fidelity', 'profession'])\n",
"df_customer_event = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = 'inner')[var_choosed]\n",
"df_customer_event"
]
},
{
"cell_type": "markdown",
"id": "779da86b-ac61-4c61-88d2-fa1c0c19efce",
"metadata": {},
"source": [
"## Type de client au globale"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7c89d25f-ee42-4478-9ff0-ee64b781d5c8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'customer_id', 'target_id', 'created_at', 'updated_at', 'name',\n",
" 'extra_field'],\n",
" dtype='object')\n",
"(124302, 7)\n",
"\n",
"RangeIndex: 124302 entries, 0 to 124301\n",
"Data columns (total 7 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 124302 non-null int64 \n",
" 1 customer_id 124302 non-null int64 \n",
" 2 target_id 124302 non-null int64 \n",
" 3 created_at 124296 non-null object \n",
" 4 updated_at 124296 non-null object \n",
" 5 name 0 non-null float64\n",
" 6 extra_field 0 non-null float64\n",
"dtypes: float64(2), int64(3), object(2)\n",
"memory usage: 6.6+ MB\n"
]
}
],
"source": [
"# Client\n",
"print(customer_target_mappings.columns)\n",
"print(customer_target_mappings.shape)\n",
"customer_target_mappings.info()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "c4b6bdcc-9f13-449b-9a8b-c5ca794637be",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([nan])"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer_target_mappings['extra_field'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "47bc8453-0693-4838-8bd8-4d800a82c496",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([nan])"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer_target_mappings['name'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "ab3f937b-ef62-499a-8ee2-d47d1d988ace",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'is_import', 'name', 'created_at', 'updated_at', 'identifier'], dtype='object')\n",
"(4, 6)\n",
"\n",
"RangeIndex: 4 entries, 0 to 3\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 4 non-null int64 \n",
" 1 is_import 4 non-null bool \n",
" 2 name 4 non-null object\n",
" 3 created_at 4 non-null object\n",
" 4 updated_at 4 non-null object\n",
" 5 identifier 4 non-null object\n",
"dtypes: bool(1), int64(1), object(4)\n",
"memory usage: 292.0+ bytes\n"
]
}
],
"source": [
"# Segmentation existante\n",
"print(target_types.columns)\n",
"print(target_types.shape)\n",
"target_types.info()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "b70488b9-38fc-40a8-9e2f-3330b3f9eef5",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" is_import | \n",
" name | \n",
" created_at | \n",
" updated_at | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" False | \n",
" manual_static_filter | \n",
" 2021-04-29 13:42:14.111085+02:00 | \n",
" 2021-04-29 13:42:14.111085+02:00 | \n",
" fb27e81baa4debc6a4e1a8639c20e808 | \n",
"
\n",
" \n",
" 1 | \n",
" 3 | \n",
" True | \n",
" manual_structure | \n",
" 2021-05-07 15:20:00.626650+02:00 | \n",
" 2021-05-07 15:20:00.626650+02:00 | \n",
" 382bca214204a2d3462f5ec2728d5d1e | \n",
"
\n",
" \n",
" 2 | \n",
" 6 | \n",
" False | \n",
" manual_dynamic_filter | \n",
" 2021-09-09 14:27:47.641302+02:00 | \n",
" 2021-09-09 14:27:47.641302+02:00 | \n",
" e0f4b8693184850fefd6d2a38f10584e | \n",
"
\n",
" \n",
" 3 | \n",
" 2 | \n",
" True | \n",
" manual_import | \n",
" 2021-04-29 13:49:30.107110+02:00 | \n",
" 2021-04-29 13:49:30.107110+02:00 | \n",
" 12213df2ce68a624e4c0070521437bac | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id is_import name created_at \\\n",
"0 1 False manual_static_filter 2021-04-29 13:42:14.111085+02:00 \n",
"1 3 True manual_structure 2021-05-07 15:20:00.626650+02:00 \n",
"2 6 False manual_dynamic_filter 2021-09-09 14:27:47.641302+02:00 \n",
"3 2 True manual_import 2021-04-29 13:49:30.107110+02:00 \n",
"\n",
" updated_at identifier \n",
"0 2021-04-29 13:42:14.111085+02:00 fb27e81baa4debc6a4e1a8639c20e808 \n",
"1 2021-05-07 15:20:00.626650+02:00 382bca214204a2d3462f5ec2728d5d1e \n",
"2 2021-09-09 14:27:47.641302+02:00 e0f4b8693184850fefd6d2a38f10584e \n",
"3 2021-04-29 13:49:30.107110+02:00 12213df2ce68a624e4c0070521437bac "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"target_types"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "8dd74e87-97c2-493d-b19f-971b684078d3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'name', 'created_at', 'updated_at', 'identifier'], dtype='object')\n",
"(20, 5)\n",
"\n",
"RangeIndex: 20 entries, 0 to 19\n",
"Data columns (total 5 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 20 non-null int64 \n",
" 1 name 19 non-null object\n",
" 2 created_at 20 non-null object\n",
" 3 updated_at 20 non-null object\n",
" 4 identifier 20 non-null object\n",
"dtypes: int64(1), object(4)\n",
"memory usage: 928.0+ bytes\n"
]
}
],
"source": [
"# Tags = clients\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11tags.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" tags = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(tags.columns)\n",
"print(tags.shape)\n",
"tags.info()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "91d54732-666c-4250-ba91-5c9b83d4712a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" name | \n",
" created_at | \n",
" updated_at | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2 | \n",
" ens-écoles | \n",
" 2021-05-07 15:24:19.808501+02:00 | \n",
" 2021-05-07 15:24:19.808501+02:00 | \n",
" b6a360c5f84595940c5774f13fd39cc3 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" NaN | \n",
" 2021-05-07 15:24:19.805589+02:00 | \n",
" 2021-05-07 15:24:19.805589+02:00 | \n",
" d41d8cd98f00b204e9800998ecf8427e | \n",
"
\n",
" \n",
" 2 | \n",
" 4 | \n",
" ecoles primaires rennes | \n",
" 2021-05-07 15:29:06.388415+02:00 | \n",
" 2021-05-07 15:29:06.388415+02:00 | \n",
" ca8649dd64c240d118f60b07d11a7053 | \n",
"
\n",
" \n",
" 3 | \n",
" 5 | \n",
" Angers Nantes Opéra | \n",
" 2023-01-27 15:59:58.187557+01:00 | \n",
" 2023-01-27 15:59:58.187557+01:00 | \n",
" f8f500f937fe312542399299cdc13f7e | \n",
"
\n",
" \n",
" 4 | \n",
" 6 | \n",
" Opéras | \n",
" 2023-01-27 16:03:59.654938+01:00 | \n",
" 2023-01-27 16:03:59.654938+01:00 | \n",
" 22eb2c616983ec7b54a093f84b230505 | \n",
"
\n",
" \n",
" 5 | \n",
" 7 | \n",
" Ministère de la Culture | \n",
" 2023-01-30 11:22:29.636813+01:00 | \n",
" 2023-01-30 11:22:29.636813+01:00 | \n",
" 1b8c5c08fde000d90905a3d14af7763d | \n",
"
\n",
" \n",
" 6 | \n",
" 8 | \n",
" Orchestres | \n",
" 2023-01-30 11:33:56.392799+01:00 | \n",
" 2023-01-30 11:33:56.392799+01:00 | \n",
" 7c2aee0c80642d7e325a450f2dec45e5 | \n",
"
\n",
" \n",
" 7 | \n",
" 9 | \n",
" Cooperative | \n",
" 2023-01-31 14:44:38.471146+01:00 | \n",
" 2023-01-31 14:44:38.471146+01:00 | \n",
" 6c88c36ffaab88d255865aa3111d7686 | \n",
"
\n",
" \n",
" 8 | \n",
" 10 | \n",
" Théâtres | \n",
" 2023-01-31 14:45:17.804428+01:00 | \n",
" 2023-01-31 14:45:17.804428+01:00 | \n",
" b2c19672df82021702b79482c8cda85a | \n",
"
\n",
" \n",
" 9 | \n",
" 11 | \n",
" La co[opera]tive | \n",
" 2023-02-16 17:11:35.004478+01:00 | \n",
" 2023-02-16 17:11:35.004478+01:00 | \n",
" 5dbaa3a1f278c0fcf981d447ad20957a | \n",
"
\n",
" \n",
" 10 | \n",
" 12 | \n",
" Ville de Rennes | \n",
" 2023-02-16 17:37:13.816196+01:00 | \n",
" 2023-02-16 17:37:13.816196+01:00 | \n",
" bc483d04d9c3a08f167a3ce64366ca72 | \n",
"
\n",
" \n",
" 11 | \n",
" 13 | \n",
" Ensembles en résidence | \n",
" 2023-02-16 17:55:54.877374+01:00 | \n",
" 2023-02-16 17:55:54.877374+01:00 | \n",
" e70635e771de13268dccf02bb2abfaf9 | \n",
"
\n",
" \n",
" 12 | \n",
" 14 | \n",
" Ministère | \n",
" 2023-02-17 11:17:54.429462+01:00 | \n",
" 2023-02-17 11:17:54.429462+01:00 | \n",
" a3f0582853fd19f5b57e3651f8a20e7a | \n",
"
\n",
" \n",
" 13 | \n",
" 15 | \n",
" Rennes métropole | \n",
" 2023-02-17 11:53:24.490786+01:00 | \n",
" 2023-02-17 11:53:24.490786+01:00 | \n",
" e98b8db5941b96c29c353b6f2f502055 | \n",
"
\n",
" \n",
" 14 | \n",
" 16 | \n",
" Ville de Rennes - équipements culturels | \n",
" 2023-02-17 12:00:10.649104+01:00 | \n",
" 2023-02-17 12:00:10.649104+01:00 | \n",
" a44edffc7edb852982efa7f4aa6d0e25 | \n",
"
\n",
" \n",
" 15 | \n",
" 17 | \n",
" Structures culturelles rennaises | \n",
" 2023-02-17 12:05:55.583016+01:00 | \n",
" 2023-02-17 12:05:55.583016+01:00 | \n",
" 241550517e4e3b1c926e9aeab0f621cd | \n",
"
\n",
" \n",
" 16 | \n",
" 18 | \n",
" Université Rennes 2 | \n",
" 2023-02-17 14:23:44.832959+01:00 | \n",
" 2023-02-17 14:23:44.832959+01:00 | \n",
" 4057c5cee51c4e10aa819f0cf48adc3f | \n",
"
\n",
" \n",
" 17 | \n",
" 19 | \n",
" Centres chorégraphiques nationaux | \n",
" 2023-02-17 15:29:41.827321+01:00 | \n",
" 2023-02-17 15:29:41.827321+01:00 | \n",
" 41e75941dfb766365498d917abe0102f | \n",
"
\n",
" \n",
" 18 | \n",
" 20 | \n",
" Télévision | \n",
" 2023-02-17 15:46:13.746092+01:00 | \n",
" 2023-02-17 15:46:13.746092+01:00 | \n",
" 36d6409c539dd79c1f3af8c5948603eb | \n",
"
\n",
" \n",
" 19 | \n",
" 21 | \n",
" structures culturelles nationales | \n",
" 2023-02-17 15:56:00.555722+01:00 | \n",
" 2023-02-17 15:56:00.555722+01:00 | \n",
" 5311cf7e42aac53289e1c4a338d5cfa4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id name \\\n",
"0 2 ens-écoles \n",
"1 1 NaN \n",
"2 4 ecoles primaires rennes \n",
"3 5 Angers Nantes Opéra \n",
"4 6 Opéras \n",
"5 7 Ministère de la Culture \n",
"6 8 Orchestres \n",
"7 9 Cooperative \n",
"8 10 Théâtres \n",
"9 11 La co[opera]tive \n",
"10 12 Ville de Rennes \n",
"11 13 Ensembles en résidence \n",
"12 14 Ministère \n",
"13 15 Rennes métropole \n",
"14 16 Ville de Rennes - équipements culturels \n",
"15 17 Structures culturelles rennaises \n",
"16 18 Université Rennes 2 \n",
"17 19 Centres chorégraphiques nationaux \n",
"18 20 Télévision \n",
"19 21 structures culturelles nationales \n",
"\n",
" created_at updated_at \\\n",
"0 2021-05-07 15:24:19.808501+02:00 2021-05-07 15:24:19.808501+02:00 \n",
"1 2021-05-07 15:24:19.805589+02:00 2021-05-07 15:24:19.805589+02:00 \n",
"2 2021-05-07 15:29:06.388415+02:00 2021-05-07 15:29:06.388415+02:00 \n",
"3 2023-01-27 15:59:58.187557+01:00 2023-01-27 15:59:58.187557+01:00 \n",
"4 2023-01-27 16:03:59.654938+01:00 2023-01-27 16:03:59.654938+01:00 \n",
"5 2023-01-30 11:22:29.636813+01:00 2023-01-30 11:22:29.636813+01:00 \n",
"6 2023-01-30 11:33:56.392799+01:00 2023-01-30 11:33:56.392799+01:00 \n",
"7 2023-01-31 14:44:38.471146+01:00 2023-01-31 14:44:38.471146+01:00 \n",
"8 2023-01-31 14:45:17.804428+01:00 2023-01-31 14:45:17.804428+01:00 \n",
"9 2023-02-16 17:11:35.004478+01:00 2023-02-16 17:11:35.004478+01:00 \n",
"10 2023-02-16 17:37:13.816196+01:00 2023-02-16 17:37:13.816196+01:00 \n",
"11 2023-02-16 17:55:54.877374+01:00 2023-02-16 17:55:54.877374+01:00 \n",
"12 2023-02-17 11:17:54.429462+01:00 2023-02-17 11:17:54.429462+01:00 \n",
"13 2023-02-17 11:53:24.490786+01:00 2023-02-17 11:53:24.490786+01:00 \n",
"14 2023-02-17 12:00:10.649104+01:00 2023-02-17 12:00:10.649104+01:00 \n",
"15 2023-02-17 12:05:55.583016+01:00 2023-02-17 12:05:55.583016+01:00 \n",
"16 2023-02-17 14:23:44.832959+01:00 2023-02-17 14:23:44.832959+01:00 \n",
"17 2023-02-17 15:29:41.827321+01:00 2023-02-17 15:29:41.827321+01:00 \n",
"18 2023-02-17 15:46:13.746092+01:00 2023-02-17 15:46:13.746092+01:00 \n",
"19 2023-02-17 15:56:00.555722+01:00 2023-02-17 15:56:00.555722+01:00 \n",
"\n",
" identifier \n",
"0 b6a360c5f84595940c5774f13fd39cc3 \n",
"1 d41d8cd98f00b204e9800998ecf8427e \n",
"2 ca8649dd64c240d118f60b07d11a7053 \n",
"3 f8f500f937fe312542399299cdc13f7e \n",
"4 22eb2c616983ec7b54a093f84b230505 \n",
"5 1b8c5c08fde000d90905a3d14af7763d \n",
"6 7c2aee0c80642d7e325a450f2dec45e5 \n",
"7 6c88c36ffaab88d255865aa3111d7686 \n",
"8 b2c19672df82021702b79482c8cda85a \n",
"9 5dbaa3a1f278c0fcf981d447ad20957a \n",
"10 bc483d04d9c3a08f167a3ce64366ca72 \n",
"11 e70635e771de13268dccf02bb2abfaf9 \n",
"12 a3f0582853fd19f5b57e3651f8a20e7a \n",
"13 e98b8db5941b96c29c353b6f2f502055 \n",
"14 a44edffc7edb852982efa7f4aa6d0e25 \n",
"15 241550517e4e3b1c926e9aeab0f621cd \n",
"16 4057c5cee51c4e10aa819f0cf48adc3f \n",
"17 41e75941dfb766365498d917abe0102f \n",
"18 36d6409c539dd79c1f3af8c5948603eb \n",
"19 5311cf7e42aac53289e1c4a338d5cfa4 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tags"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "4cc9f444-b7e6-4ee5-8ce8-64c63ab7825a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'structure_id', 'tag_id', 'created_at', 'updated_at'], dtype='object')\n",
"(179, 5)\n",
"\n",
"RangeIndex: 179 entries, 0 to 178\n",
"Data columns (total 5 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 179 non-null int64 \n",
" 1 structure_id 179 non-null int64 \n",
" 2 tag_id 179 non-null int64 \n",
" 3 created_at 179 non-null object\n",
" 4 updated_at 179 non-null object\n",
"dtypes: int64(3), object(2)\n",
"memory usage: 7.1+ KB\n"
]
}
],
"source": [
"# Structure = clients\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11structure_tag_mappings.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" structure_tag_mappings = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(structure_tag_mappings.columns)\n",
"print(structure_tag_mappings.shape)\n",
"structure_tag_mappings.info()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "dcf776df-5c8e-4972-b2c1-b41291ba7e66",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" structure_id | \n",
" tag_id | \n",
" created_at | \n",
" updated_at | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 123 | \n",
" 187 | \n",
" 6 | \n",
" 2023-01-27 16:03:59.680222+01:00 | \n",
" 2023-01-27 16:03:59.680222+01:00 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2021-05-07 15:24:19.872895+02:00 | \n",
" 2021-05-07 15:24:19.872895+02:00 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 3 | \n",
" 2 | \n",
" 2021-05-07 15:24:19.873830+02:00 | \n",
" 2021-05-07 15:24:19.873830+02:00 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 4 | \n",
" 2 | \n",
" 2021-05-07 15:24:19.874628+02:00 | \n",
" 2021-05-07 15:24:19.874628+02:00 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 5 | \n",
" 2 | \n",
" 2021-05-07 15:24:19.875421+02:00 | \n",
" 2021-05-07 15:24:19.875421+02:00 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 174 | \n",
" 184 | \n",
" 236 | \n",
" 10 | \n",
" 2023-02-17 16:35:25.041114+01:00 | \n",
" 2023-02-17 16:35:25.041114+01:00 | \n",
"
\n",
" \n",
" 175 | \n",
" 185 | \n",
" 237 | \n",
" 17 | \n",
" 2023-02-17 16:39:10.799478+01:00 | \n",
" 2023-02-17 16:39:10.799478+01:00 | \n",
"
\n",
" \n",
" 176 | \n",
" 186 | \n",
" 238 | \n",
" 19 | \n",
" 2023-02-17 16:53:21.098690+01:00 | \n",
" 2023-02-17 16:53:21.098690+01:00 | \n",
"
\n",
" \n",
" 177 | \n",
" 187 | \n",
" 239 | \n",
" 10 | \n",
" 2023-02-17 16:57:42.623481+01:00 | \n",
" 2023-02-17 16:57:42.623481+01:00 | \n",
"
\n",
" \n",
" 178 | \n",
" 188 | \n",
" 240 | \n",
" 10 | \n",
" 2023-02-17 16:59:22.067723+01:00 | \n",
" 2023-02-17 16:59:22.067723+01:00 | \n",
"
\n",
" \n",
"
\n",
"
179 rows × 5 columns
\n",
"
"
],
"text/plain": [
" id structure_id tag_id created_at \\\n",
"0 123 187 6 2023-01-27 16:03:59.680222+01:00 \n",
"1 2 2 2 2021-05-07 15:24:19.872895+02:00 \n",
"2 3 3 2 2021-05-07 15:24:19.873830+02:00 \n",
"3 4 4 2 2021-05-07 15:24:19.874628+02:00 \n",
"4 5 5 2 2021-05-07 15:24:19.875421+02:00 \n",
".. ... ... ... ... \n",
"174 184 236 10 2023-02-17 16:35:25.041114+01:00 \n",
"175 185 237 17 2023-02-17 16:39:10.799478+01:00 \n",
"176 186 238 19 2023-02-17 16:53:21.098690+01:00 \n",
"177 187 239 10 2023-02-17 16:57:42.623481+01:00 \n",
"178 188 240 10 2023-02-17 16:59:22.067723+01:00 \n",
"\n",
" updated_at \n",
"0 2023-01-27 16:03:59.680222+01:00 \n",
"1 2021-05-07 15:24:19.872895+02:00 \n",
"2 2021-05-07 15:24:19.873830+02:00 \n",
"3 2021-05-07 15:24:19.874628+02:00 \n",
"4 2021-05-07 15:24:19.875421+02:00 \n",
".. ... \n",
"174 2023-02-17 16:35:25.041114+01:00 \n",
"175 2023-02-17 16:39:10.799478+01:00 \n",
"176 2023-02-17 16:53:21.098690+01:00 \n",
"177 2023-02-17 16:57:42.623481+01:00 \n",
"178 2023-02-17 16:59:22.067723+01:00 \n",
"\n",
"[179 rows x 5 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"structure_tag_mappings"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "41bf1529-5a7c-409e-9791-2024c08c11f0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'lastname', 'firstname', 'birthdate', 'email', 'street_id',\n",
" 'created_at', 'updated_at', 'civility', 'is_partner', 'extra',\n",
" 'deleted_at', 'reference', 'gender', 'is_email_true', 'extra_field',\n",
" 'identifier', 'opt_in', 'structure_id', 'note', 'profession',\n",
" 'language', 'mcp_contact_id', 'need_reload', 'last_buying_date',\n",
" 'max_price', 'ticket_sum', 'average_price', 'fidelity',\n",
" 'average_purchase_delay', 'average_price_basket',\n",
" 'average_ticket_basket', 'total_price', 'preferred_category',\n",
" 'preferred_supplier', 'preferred_formula', 'purchase_count',\n",
" 'first_buying_date', 'last_visiting_date', 'zipcode', 'country', 'age',\n",
" 'tenant_id'],\n",
" dtype='object')\n",
"(71307, 43)\n",
"\n",
"RangeIndex: 71307 entries, 0 to 71306\n",
"Data columns (total 43 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 71307 non-null int64 \n",
" 1 lastname 41045 non-null object \n",
" 2 firstname 39140 non-null object \n",
" 3 birthdate 18174 non-null object \n",
" 4 email 58203 non-null object \n",
" 5 street_id 71307 non-null int64 \n",
" 6 created_at 71307 non-null object \n",
" 7 updated_at 71307 non-null object \n",
" 8 civility 0 non-null float64\n",
" 9 is_partner 71307 non-null bool \n",
" 10 extra 0 non-null float64\n",
" 11 deleted_at 0 non-null float64\n",
" 12 reference 0 non-null float64\n",
" 13 gender 71307 non-null int64 \n",
" 14 is_email_true 71307 non-null bool \n",
" 15 extra_field 0 non-null float64\n",
" 16 identifier 71307 non-null object \n",
" 17 opt_in 71307 non-null bool \n",
" 18 structure_id 616 non-null float64\n",
" 19 note 451 non-null object \n",
" 20 profession 812 non-null object \n",
" 21 language 0 non-null float64\n",
" 22 mcp_contact_id 22417 non-null float64\n",
" 23 need_reload 71307 non-null bool \n",
" 24 last_buying_date 34040 non-null object \n",
" 25 max_price 34040 non-null float64\n",
" 26 ticket_sum 71307 non-null int64 \n",
" 27 average_price 68694 non-null float64\n",
" 28 fidelity 71307 non-null int64 \n",
" 29 average_purchase_delay 34040 non-null float64\n",
" 30 average_price_basket 34040 non-null float64\n",
" 31 average_ticket_basket 34040 non-null float64\n",
" 32 total_price 36653 non-null float64\n",
" 33 preferred_category 0 non-null float64\n",
" 34 preferred_supplier 0 non-null float64\n",
" 35 preferred_formula 0 non-null float64\n",
" 36 purchase_count 71307 non-null int64 \n",
" 37 first_buying_date 34040 non-null object \n",
" 38 last_visiting_date 0 non-null float64\n",
" 39 zipcode 33756 non-null object \n",
" 40 country 39910 non-null object \n",
" 41 age 18174 non-null float64\n",
" 42 tenant_id 71307 non-null int64 \n",
"dtypes: bool(4), float64(19), int64(7), object(13)\n",
"memory usage: 21.5+ MB\n"
]
}
],
"source": [
"# Tags = clients\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11customersplus.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" customersplus = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(customersplus.columns)\n",
"print(customersplus.shape)\n",
"customersplus.info()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "948a0b2b-8d1c-4afb-802e-670d67dd8c20",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" lastname | \n",
" firstname | \n",
" birthdate | \n",
" email | \n",
" street_id | \n",
" created_at | \n",
" updated_at | \n",
" civility | \n",
" is_partner | \n",
" ... | \n",
" preferred_category | \n",
" preferred_supplier | \n",
" preferred_formula | \n",
" purchase_count | \n",
" first_buying_date | \n",
" last_visiting_date | \n",
" zipcode | \n",
" country | \n",
" age | \n",
" tenant_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 286834 | \n",
" lastname286834 | \n",
" firstname286834 | \n",
" NaN | \n",
" email286834 | \n",
" 6 | \n",
" 2022-05-19 10:09:09.361137+02:00 | \n",
" 2022-05-19 10:09:09.361137+02:00 | \n",
" NaN | \n",
" False | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" fr | \n",
" NaN | \n",
" 1556 | \n",
"
\n",
" \n",
" 1 | \n",
" 330695 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" email330695 | \n",
" 1 | \n",
" 2022-07-16 04:10:34.135134+02:00 | \n",
" 2022-07-16 04:10:34.156704+02:00 | \n",
" NaN | \n",
" False | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1556 | \n",
"
\n",
" \n",
" 2 | \n",
" 330978 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" email330978 | \n",
" 1 | \n",
" 2022-07-21 22:14:09.811721+02:00 | \n",
" 2022-07-21 22:14:09.836051+02:00 | \n",
" NaN | \n",
" False | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1556 | \n",
"
\n",
" \n",
" 3 | \n",
" 338697 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" email338697 | \n",
" 1 | \n",
" 2022-09-15 19:02:03.950536+02:00 | \n",
" 2022-09-15 19:02:03.985642+02:00 | \n",
" NaN | \n",
" False | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1556 | \n",
"
\n",
" \n",
" 4 | \n",
" 338726 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" email338726 | \n",
" 1 | \n",
" 2022-09-16 01:24:40.719882+02:00 | \n",
" 2022-09-16 01:24:40.742753+02:00 | \n",
" NaN | \n",
" False | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1556 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 71302 | \n",
" 27105 | \n",
" lastname27105 | \n",
" firstname27105 | \n",
" 1957-01-26 | \n",
" email27105 | \n",
" 205024 | \n",
" 2021-04-22 15:12:59.986534+02:00 | \n",
" 2023-09-12 18:59:31.613235+02:00 | \n",
" NaN | \n",
" False | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2 | \n",
" 2018-12-31 18:56:57+01:00 | \n",
" NaN | \n",
" 35700 | \n",
" fr | \n",
" 66.0 | \n",
" 1556 | \n",
"
\n",
" \n",
" 71303 | \n",
" 27108 | \n",
" lastname27108 | \n",
" firstname27108 | \n",
" NaN | \n",
" NaN | \n",
" 205024 | \n",
" 2021-04-22 15:12:59.989197+02:00 | \n",
" 2023-09-12 18:27:34.380843+02:00 | \n",
" NaN | \n",
" False | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 6 | \n",
" 2015-12-29 14:51:46+01:00 | \n",
" NaN | \n",
" 35700 | \n",
" fr | \n",
" NaN | \n",
" 1556 | \n",
"
\n",
" \n",
" 71304 | \n",
" 27110 | \n",
" lastname27110 | \n",
" firstname27110 | \n",
" NaN | \n",
" NaN | \n",
" 6 | \n",
" 2021-04-22 15:12:59.991029+02:00 | \n",
" 2022-04-14 11:41:33.738500+02:00 | \n",
" NaN | \n",
" False | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1 | \n",
" 2018-12-31 19:12:59+01:00 | \n",
" NaN | \n",
" NaN | \n",
" fr | \n",
" NaN | \n",
" 1556 | \n",
"
\n",
" \n",
" 71305 | \n",
" 10607 | \n",
" lastname10607 | \n",
" firstname10607 | \n",
" 1963-01-04 | \n",
" email10607 | \n",
" 313332 | \n",
" 2021-04-22 14:56:45.742226+02:00 | \n",
" 2023-09-12 17:55:17.723195+02:00 | \n",
" NaN | \n",
" False | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 26 | \n",
" 2015-10-10 14:11:21+02:00 | \n",
" NaN | \n",
" 35850 | \n",
" fr | \n",
" 60.0 | \n",
" 1556 | \n",
"
\n",
" \n",
" 71306 | \n",
" 19095 | \n",
" lastname19095 | \n",
" firstname19095 | \n",
" 1979-07-16 | \n",
" email19095 | \n",
" 6 | \n",
" 2021-04-22 15:06:30.120537+02:00 | \n",
" 2023-09-12 18:27:36.904104+02:00 | \n",
" NaN | \n",
" False | \n",
" ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2 | \n",
" 2019-05-19 21:18:36+02:00 | \n",
" NaN | \n",
" NaN | \n",
" fr | \n",
" 44.0 | \n",
" 1556 | \n",
"
\n",
" \n",
"
\n",
"
71307 rows × 43 columns
\n",
"
"
],
"text/plain": [
" id lastname firstname birthdate email \\\n",
"0 286834 lastname286834 firstname286834 NaN email286834 \n",
"1 330695 NaN NaN NaN email330695 \n",
"2 330978 NaN NaN NaN email330978 \n",
"3 338697 NaN NaN NaN email338697 \n",
"4 338726 NaN NaN NaN email338726 \n",
"... ... ... ... ... ... \n",
"71302 27105 lastname27105 firstname27105 1957-01-26 email27105 \n",
"71303 27108 lastname27108 firstname27108 NaN NaN \n",
"71304 27110 lastname27110 firstname27110 NaN NaN \n",
"71305 10607 lastname10607 firstname10607 1963-01-04 email10607 \n",
"71306 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
"\n",
" street_id created_at \\\n",
"0 6 2022-05-19 10:09:09.361137+02:00 \n",
"1 1 2022-07-16 04:10:34.135134+02:00 \n",
"2 1 2022-07-21 22:14:09.811721+02:00 \n",
"3 1 2022-09-15 19:02:03.950536+02:00 \n",
"4 1 2022-09-16 01:24:40.719882+02:00 \n",
"... ... ... \n",
"71302 205024 2021-04-22 15:12:59.986534+02:00 \n",
"71303 205024 2021-04-22 15:12:59.989197+02:00 \n",
"71304 6 2021-04-22 15:12:59.991029+02:00 \n",
"71305 313332 2021-04-22 14:56:45.742226+02:00 \n",
"71306 6 2021-04-22 15:06:30.120537+02:00 \n",
"\n",
" updated_at civility is_partner ... \\\n",
"0 2022-05-19 10:09:09.361137+02:00 NaN False ... \n",
"1 2022-07-16 04:10:34.156704+02:00 NaN False ... \n",
"2 2022-07-21 22:14:09.836051+02:00 NaN False ... \n",
"3 2022-09-15 19:02:03.985642+02:00 NaN False ... \n",
"4 2022-09-16 01:24:40.742753+02:00 NaN False ... \n",
"... ... ... ... ... \n",
"71302 2023-09-12 18:59:31.613235+02:00 NaN False ... \n",
"71303 2023-09-12 18:27:34.380843+02:00 NaN False ... \n",
"71304 2022-04-14 11:41:33.738500+02:00 NaN False ... \n",
"71305 2023-09-12 17:55:17.723195+02:00 NaN False ... \n",
"71306 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
"\n",
" preferred_category preferred_supplier preferred_formula \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"... ... ... ... \n",
"71302 NaN NaN NaN \n",
"71303 NaN NaN NaN \n",
"71304 NaN NaN NaN \n",
"71305 NaN NaN NaN \n",
"71306 NaN NaN NaN \n",
"\n",
" purchase_count first_buying_date last_visiting_date zipcode \\\n",
"0 0 NaN NaN NaN \n",
"1 0 NaN NaN NaN \n",
"2 0 NaN NaN NaN \n",
"3 0 NaN NaN NaN \n",
"4 0 NaN NaN NaN \n",
"... ... ... ... ... \n",
"71302 2 2018-12-31 18:56:57+01:00 NaN 35700 \n",
"71303 6 2015-12-29 14:51:46+01:00 NaN 35700 \n",
"71304 1 2018-12-31 19:12:59+01:00 NaN NaN \n",
"71305 26 2015-10-10 14:11:21+02:00 NaN 35850 \n",
"71306 2 2019-05-19 21:18:36+02:00 NaN NaN \n",
"\n",
" country age tenant_id \n",
"0 fr NaN 1556 \n",
"1 NaN NaN 1556 \n",
"2 NaN NaN 1556 \n",
"3 NaN NaN 1556 \n",
"4 NaN NaN 1556 \n",
"... ... ... ... \n",
"71302 fr 66.0 1556 \n",
"71303 fr NaN 1556 \n",
"71304 fr NaN 1556 \n",
"71305 fr 60.0 1556 \n",
"71306 fr 44.0 1556 \n",
"\n",
"[71307 rows x 43 columns]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customersplus"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c40c44a0-e7c2-4ad1-b700-0d6ea05d62b2",
"metadata": {},
"outputs": [],
"source": [
"# But : lier les caractéristiques socio-demo et les comportements d'achat\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "8259ae6c-353f-43a6-add3-f974fac6e5d4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'number', 'created_at', 'updated_at', 'purchase_id', 'product_id',\n",
" 'is_from_subscription', 'type_of', 'supplier_id', 'barcode',\n",
" 'identifier'],\n",
" dtype='object')\n",
"(318969, 11)\n",
"\n",
"RangeIndex: 318969 entries, 0 to 318968\n",
"Data columns (total 11 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 318969 non-null int64 \n",
" 1 number 318969 non-null object \n",
" 2 created_at 318969 non-null object \n",
" 3 updated_at 318969 non-null object \n",
" 4 purchase_id 318969 non-null int64 \n",
" 5 product_id 318969 non-null int64 \n",
" 6 is_from_subscription 318969 non-null bool \n",
" 7 type_of 318969 non-null int64 \n",
" 8 supplier_id 318969 non-null int64 \n",
" 9 barcode 0 non-null float64\n",
" 10 identifier 318969 non-null object \n",
"dtypes: bool(1), float64(1), int64(5), object(4)\n",
"memory usage: 24.6+ MB\n"
]
}
],
"source": [
"# tickets\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11tickets.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" tickets = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(tickets.columns)\n",
"print(tickets.shape)\n",
"tickets.info()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "f54830cb-1f95-4f71-9b04-358c745fb454",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" number | \n",
" created_at | \n",
" updated_at | \n",
" purchase_id | \n",
" product_id | \n",
" is_from_subscription | \n",
" type_of | \n",
" supplier_id | \n",
" barcode | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2119081 | \n",
" 1433_136_212_68356 | \n",
" 2023-09-12 17:42:45.396336+02:00 | \n",
" 2023-09-12 17:42:45.396336+02:00 | \n",
" 861764 | \n",
" 209879 | \n",
" False | \n",
" 1 | \n",
" 1702 | \n",
" NaN | \n",
" f694c255855ce5643c6fcc7fed5e9237 | \n",
"
\n",
" \n",
" 1 | \n",
" 2119082 | \n",
" 1433_136_194_68356 | \n",
" 2023-09-12 17:42:45.409056+02:00 | \n",
" 2023-09-12 17:42:45.409056+02:00 | \n",
" 861763 | \n",
" 209879 | \n",
" False | \n",
" 1 | \n",
" 1702 | \n",
" NaN | \n",
" 838d6101db2fc8bc80536d8b91b49859 | \n",
"
\n",
" \n",
" 2 | \n",
" 2119083 | \n",
" 33158_158_343_68357 | \n",
" 2023-09-12 17:42:45.409824+02:00 | \n",
" 2023-09-12 17:42:45.409824+02:00 | \n",
" 861769 | \n",
" 209880 | \n",
" False | \n",
" 1 | \n",
" 1702 | \n",
" NaN | \n",
" 8a8d938d66a4dc57bcb44c2773c6fdfa | \n",
"
\n",
" \n",
" 3 | \n",
" 2119084 | \n",
" 33158_158_297_68357 | \n",
" 2023-09-12 17:42:45.410447+02:00 | \n",
" 2023-09-12 17:42:45.410447+02:00 | \n",
" 861767 | \n",
" 209880 | \n",
" False | \n",
" 1 | \n",
" 1702 | \n",
" NaN | \n",
" b7a3dd0794c0957c942d45b8913e5b96 | \n",
"
\n",
" \n",
" 4 | \n",
" 2119085 | \n",
" 33158_158_318_68357 | \n",
" 2023-09-12 17:42:45.411059+02:00 | \n",
" 2023-09-12 17:42:45.411059+02:00 | \n",
" 861768 | \n",
" 209880 | \n",
" False | \n",
" 1 | \n",
" 1702 | \n",
" NaN | \n",
" d7ea7e443581ebe520dd13f6cad31af7 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 318964 | \n",
" 2564021 | \n",
" 44247_204_239_89278 | \n",
" 2023-09-12 18:59:48.750953+02:00 | \n",
" 2023-09-12 18:59:48.750953+02:00 | \n",
" 1244281 | \n",
" 210158 | \n",
" False | \n",
" 1 | \n",
" 1702 | \n",
" NaN | \n",
" 82c9af8b2167f7ac34a5e834242b0239 | \n",
"
\n",
" \n",
" 318965 | \n",
" 2564022 | \n",
" 44247_204_299_89278 | \n",
" 2023-09-12 18:59:48.751441+02:00 | \n",
" 2023-09-12 18:59:48.751441+02:00 | \n",
" 1244284 | \n",
" 210158 | \n",
" False | \n",
" 1 | \n",
" 1702 | \n",
" NaN | \n",
" 235e8e608f066cb72949bbd397d0a76f | \n",
"
\n",
" \n",
" 318966 | \n",
" 2564023 | \n",
" 44247_204_259_89278 | \n",
" 2023-09-12 18:59:48.751924+02:00 | \n",
" 2023-09-12 18:59:48.751924+02:00 | \n",
" 1244282 | \n",
" 210158 | \n",
" False | \n",
" 1 | \n",
" 1702 | \n",
" NaN | \n",
" ec22fa828931f030f7e79a4cc5478c4b | \n",
"
\n",
" \n",
" 318967 | \n",
" 2564024 | \n",
" 44247_204_279_89278 | \n",
" 2023-09-12 18:59:48.752425+02:00 | \n",
" 2023-09-12 18:59:48.752425+02:00 | \n",
" 1244283 | \n",
" 210158 | \n",
" False | \n",
" 1 | \n",
" 1702 | \n",
" NaN | \n",
" 31ec4deaf718e04caf193e1ff8d621ef | \n",
"
\n",
" \n",
" 318968 | \n",
" 2513156 | \n",
" 4854_178_2847_89170 | \n",
" 2023-09-12 18:52:20.331807+02:00 | \n",
" 2023-09-12 18:59:48.752904+02:00 | \n",
" 1244285 | \n",
" 261922 | \n",
" False | \n",
" 3 | \n",
" 1702 | \n",
" NaN | \n",
" 48aef9efab29bfb1537656908863bcc1 | \n",
"
\n",
" \n",
"
\n",
"
318969 rows × 11 columns
\n",
"
"
],
"text/plain": [
" id number created_at \\\n",
"0 2119081 1433_136_212_68356 2023-09-12 17:42:45.396336+02:00 \n",
"1 2119082 1433_136_194_68356 2023-09-12 17:42:45.409056+02:00 \n",
"2 2119083 33158_158_343_68357 2023-09-12 17:42:45.409824+02:00 \n",
"3 2119084 33158_158_297_68357 2023-09-12 17:42:45.410447+02:00 \n",
"4 2119085 33158_158_318_68357 2023-09-12 17:42:45.411059+02:00 \n",
"... ... ... ... \n",
"318964 2564021 44247_204_239_89278 2023-09-12 18:59:48.750953+02:00 \n",
"318965 2564022 44247_204_299_89278 2023-09-12 18:59:48.751441+02:00 \n",
"318966 2564023 44247_204_259_89278 2023-09-12 18:59:48.751924+02:00 \n",
"318967 2564024 44247_204_279_89278 2023-09-12 18:59:48.752425+02:00 \n",
"318968 2513156 4854_178_2847_89170 2023-09-12 18:52:20.331807+02:00 \n",
"\n",
" updated_at purchase_id product_id \\\n",
"0 2023-09-12 17:42:45.396336+02:00 861764 209879 \n",
"1 2023-09-12 17:42:45.409056+02:00 861763 209879 \n",
"2 2023-09-12 17:42:45.409824+02:00 861769 209880 \n",
"3 2023-09-12 17:42:45.410447+02:00 861767 209880 \n",
"4 2023-09-12 17:42:45.411059+02:00 861768 209880 \n",
"... ... ... ... \n",
"318964 2023-09-12 18:59:48.750953+02:00 1244281 210158 \n",
"318965 2023-09-12 18:59:48.751441+02:00 1244284 210158 \n",
"318966 2023-09-12 18:59:48.751924+02:00 1244282 210158 \n",
"318967 2023-09-12 18:59:48.752425+02:00 1244283 210158 \n",
"318968 2023-09-12 18:59:48.752904+02:00 1244285 261922 \n",
"\n",
" is_from_subscription type_of supplier_id barcode \\\n",
"0 False 1 1702 NaN \n",
"1 False 1 1702 NaN \n",
"2 False 1 1702 NaN \n",
"3 False 1 1702 NaN \n",
"4 False 1 1702 NaN \n",
"... ... ... ... ... \n",
"318964 False 1 1702 NaN \n",
"318965 False 1 1702 NaN \n",
"318966 False 1 1702 NaN \n",
"318967 False 1 1702 NaN \n",
"318968 False 3 1702 NaN \n",
"\n",
" identifier \n",
"0 f694c255855ce5643c6fcc7fed5e9237 \n",
"1 838d6101db2fc8bc80536d8b91b49859 \n",
"2 8a8d938d66a4dc57bcb44c2773c6fdfa \n",
"3 b7a3dd0794c0957c942d45b8913e5b96 \n",
"4 d7ea7e443581ebe520dd13f6cad31af7 \n",
"... ... \n",
"318964 82c9af8b2167f7ac34a5e834242b0239 \n",
"318965 235e8e608f066cb72949bbd397d0a76f \n",
"318966 ec22fa828931f030f7e79a4cc5478c4b \n",
"318967 31ec4deaf718e04caf193e1ff8d621ef \n",
"318968 48aef9efab29bfb1537656908863bcc1 \n",
"\n",
"[318969 rows x 11 columns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tickets"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "ad743347-33d1-41f0-852d-f9e6354f82ed",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 3, 0])"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tickets['type_of'].unique()"
]
},
{
"cell_type": "markdown",
"id": "b88808fe-3b4e-49ed-9885-d52910b6f211",
"metadata": {},
"source": [
"## Types d'évenement et client"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ecb03a47-1418-4fb1-8c78-cd222d38b7fd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'created_at', 'updated_at', 'season_id', 'facility_id', 'name',\n",
" 'event_type_id', 'manual_added', 'is_display', 'event_type_key_id',\n",
" 'facility_key_id', 'identifier'],\n",
" dtype='object')\n",
"(403, 12)\n",
"\n",
"RangeIndex: 403 entries, 0 to 402\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 403 non-null int64 \n",
" 1 created_at 403 non-null object\n",
" 2 updated_at 403 non-null object\n",
" 3 season_id 403 non-null int64 \n",
" 4 facility_id 403 non-null int64 \n",
" 5 name 403 non-null object\n",
" 6 event_type_id 403 non-null int64 \n",
" 7 manual_added 403 non-null bool \n",
" 8 is_display 403 non-null bool \n",
" 9 event_type_key_id 403 non-null int64 \n",
" 10 facility_key_id 403 non-null int64 \n",
" 11 identifier 403 non-null object\n",
"dtypes: bool(2), int64(6), object(4)\n",
"memory usage: 32.4+ KB\n"
]
}
],
"source": [
"# Evenement = events.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11events.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" events = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(events.columns)\n",
"print(events.shape)\n",
"events.info()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "19706610-9e90-4e6f-8bd0-da124b87cff7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" created_at | \n",
" updated_at | \n",
" season_id | \n",
" facility_id | \n",
" name | \n",
" event_type_id | \n",
" manual_added | \n",
" is_display | \n",
" event_type_key_id | \n",
" facility_key_id | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20367 | \n",
" 2023-09-13 03:42:45.214293+02:00 | \n",
" 2023-09-13 03:54:30.086969+02:00 | \n",
" 1865 | \n",
" 1054 | \n",
" marelle | \n",
" 1055 | \n",
" False | \n",
" True | \n",
" 1055 | \n",
" 1054 | \n",
" 26d1e9a4acad18b9cf79244334c86c93 | \n",
"
\n",
" \n",
" 1 | \n",
" 20371 | \n",
" 2023-09-13 03:42:45.218728+02:00 | \n",
" 2023-09-13 03:54:30.103943+02:00 | \n",
" 1865 | \n",
" 1054 | \n",
" dialogues | \n",
" 1055 | \n",
" False | \n",
" True | \n",
" 1055 | \n",
" 1054 | \n",
" 60356fc5e8ed6c9c1be9c5ec67e77766 | \n",
"
\n",
" \n",
" 2 | \n",
" 20570 | \n",
" 2023-10-05 04:48:29.374504+02:00 | \n",
" 2023-10-05 04:48:36.562528+02:00 | \n",
" 1865 | \n",
" 1054 | \n",
" les grandes epopees | \n",
" 1055 | \n",
" False | \n",
" True | \n",
" 1055 | \n",
" 1054 | \n",
" f8ab088e06252bf34e1b12ad2ce1a403 | \n",
"
\n",
" \n",
" 3 | \n",
" 20757 | \n",
" 2023-11-01 03:55:20.846196+01:00 | \n",
" 2023-11-01 03:55:28.412457+01:00 | \n",
" 1865 | \n",
" 1054 | \n",
" scolaire marelle | \n",
" 1055 | \n",
" False | \n",
" True | \n",
" 1055 | \n",
" 1054 | \n",
" 447fa80f9a793b7587bb85ebbda6442c | \n",
"
\n",
" \n",
" 4 | \n",
" 20364 | \n",
" 2023-09-13 03:42:45.196791+02:00 | \n",
" 2023-09-13 03:54:30.075456+02:00 | \n",
" 1865 | \n",
" 1054 | \n",
" le couronnement de poppee | \n",
" 1055 | \n",
" False | \n",
" True | \n",
" 1055 | \n",
" 1054 | \n",
" 3b37f5d2cd354cbc422868621ac7ebc2 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 398 | \n",
" 15603 | \n",
" 2023-09-12 17:42:25.327618+02:00 | \n",
" 2023-09-12 19:00:00.893400+02:00 | \n",
" 1706 | \n",
" 1054 | \n",
" marelle | \n",
" 1055 | \n",
" False | \n",
" True | \n",
" 1055 | \n",
" 1054 | \n",
" fde88b72fb82b1fe42fbbfbfc3d6b4d3 | \n",
"
\n",
" \n",
" 399 | \n",
" 15621 | \n",
" 2023-09-12 17:42:25.335792+02:00 | \n",
" 2023-09-12 19:00:00.899622+02:00 | \n",
" 1708 | \n",
" 1054 | \n",
" cartes d'adhesion | \n",
" 1055 | \n",
" False | \n",
" True | \n",
" 1055 | \n",
" 1054 | \n",
" 051b96aad2b720bad4450a59ed7dfbf6 | \n",
"
\n",
" \n",
" 400 | \n",
" 15740 | \n",
" 2023-09-12 17:47:05.112101+02:00 | \n",
" 2023-09-12 19:00:00.906123+02:00 | \n",
" 1711 | \n",
" 1054 | \n",
" repetition le medecin malgre lui | \n",
" 1055 | \n",
" False | \n",
" True | \n",
" 1055 | \n",
" 1054 | \n",
" addd6885bea5ddf60ec3539dfc3e79e8 | \n",
"
\n",
" \n",
" 401 | \n",
" 15520 | \n",
" 2023-09-12 17:42:25.290280+02:00 | \n",
" 2023-09-12 19:00:00.835625+02:00 | \n",
" 1708 | \n",
" 1054 | \n",
" opera au village | \n",
" 1055 | \n",
" False | \n",
" True | \n",
" 1055 | \n",
" 1054 | \n",
" 94f250d10d4a56358ceab23b384439ff | \n",
"
\n",
" \n",
" 402 | \n",
" 15439 | \n",
" 2023-09-12 17:42:25.252747+02:00 | \n",
" 2023-09-12 19:00:00.735990+02:00 | \n",
" 1708 | \n",
" 1054 | \n",
" florilege | \n",
" 1055 | \n",
" False | \n",
" True | \n",
" 1055 | \n",
" 1054 | \n",
" 4f015946bcbd856aa573cadb7ac42b9f | \n",
"
\n",
" \n",
"
\n",
"
403 rows × 12 columns
\n",
"
"
],
"text/plain": [
" id created_at \\\n",
"0 20367 2023-09-13 03:42:45.214293+02:00 \n",
"1 20371 2023-09-13 03:42:45.218728+02:00 \n",
"2 20570 2023-10-05 04:48:29.374504+02:00 \n",
"3 20757 2023-11-01 03:55:20.846196+01:00 \n",
"4 20364 2023-09-13 03:42:45.196791+02:00 \n",
".. ... ... \n",
"398 15603 2023-09-12 17:42:25.327618+02:00 \n",
"399 15621 2023-09-12 17:42:25.335792+02:00 \n",
"400 15740 2023-09-12 17:47:05.112101+02:00 \n",
"401 15520 2023-09-12 17:42:25.290280+02:00 \n",
"402 15439 2023-09-12 17:42:25.252747+02:00 \n",
"\n",
" updated_at season_id facility_id \\\n",
"0 2023-09-13 03:54:30.086969+02:00 1865 1054 \n",
"1 2023-09-13 03:54:30.103943+02:00 1865 1054 \n",
"2 2023-10-05 04:48:36.562528+02:00 1865 1054 \n",
"3 2023-11-01 03:55:28.412457+01:00 1865 1054 \n",
"4 2023-09-13 03:54:30.075456+02:00 1865 1054 \n",
".. ... ... ... \n",
"398 2023-09-12 19:00:00.893400+02:00 1706 1054 \n",
"399 2023-09-12 19:00:00.899622+02:00 1708 1054 \n",
"400 2023-09-12 19:00:00.906123+02:00 1711 1054 \n",
"401 2023-09-12 19:00:00.835625+02:00 1708 1054 \n",
"402 2023-09-12 19:00:00.735990+02:00 1708 1054 \n",
"\n",
" name event_type_id manual_added \\\n",
"0 marelle 1055 False \n",
"1 dialogues 1055 False \n",
"2 les grandes epopees 1055 False \n",
"3 scolaire marelle 1055 False \n",
"4 le couronnement de poppee 1055 False \n",
".. ... ... ... \n",
"398 marelle 1055 False \n",
"399 cartes d'adhesion 1055 False \n",
"400 repetition le medecin malgre lui 1055 False \n",
"401 opera au village 1055 False \n",
"402 florilege 1055 False \n",
"\n",
" is_display event_type_key_id facility_key_id \\\n",
"0 True 1055 1054 \n",
"1 True 1055 1054 \n",
"2 True 1055 1054 \n",
"3 True 1055 1054 \n",
"4 True 1055 1054 \n",
".. ... ... ... \n",
"398 True 1055 1054 \n",
"399 True 1055 1054 \n",
"400 True 1055 1054 \n",
"401 True 1055 1054 \n",
"402 True 1055 1054 \n",
"\n",
" identifier \n",
"0 26d1e9a4acad18b9cf79244334c86c93 \n",
"1 60356fc5e8ed6c9c1be9c5ec67e77766 \n",
"2 f8ab088e06252bf34e1b12ad2ce1a403 \n",
"3 447fa80f9a793b7587bb85ebbda6442c \n",
"4 3b37f5d2cd354cbc422868621ac7ebc2 \n",
".. ... \n",
"398 fde88b72fb82b1fe42fbbfbfc3d6b4d3 \n",
"399 051b96aad2b720bad4450a59ed7dfbf6 \n",
"400 addd6885bea5ddf60ec3539dfc3e79e8 \n",
"401 94f250d10d4a56358ceab23b384439ff \n",
"402 4f015946bcbd856aa573cadb7ac42b9f \n",
"\n",
"[403 rows x 12 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"events"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "6cb04679-26e7-4ed8-bfc1-42285da96374",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"357"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"events['name'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "c10297e8-a8f9-45f9-8553-17e3fdb6f8c1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'serial', 'event_id', 'created_at', 'updated_at',\n",
" 'start_date_time', 'open', 'satisfaction', 'end_date_time', 'name',\n",
" 'is_display', 'representation_type_id', 'expected_filling',\n",
" 'max_filling', 'extra_field', 'identifier'],\n",
" dtype='object')\n",
"(996, 16)\n",
"\n",
"RangeIndex: 996 entries, 0 to 995\n",
"Data columns (total 16 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 996 non-null int64 \n",
" 1 serial 0 non-null float64\n",
" 2 event_id 996 non-null int64 \n",
" 3 created_at 996 non-null object \n",
" 4 updated_at 996 non-null object \n",
" 5 start_date_time 996 non-null object \n",
" 6 open 996 non-null bool \n",
" 7 satisfaction 0 non-null float64\n",
" 8 end_date_time 996 non-null object \n",
" 9 name 0 non-null float64\n",
" 10 is_display 996 non-null bool \n",
" 11 representation_type_id 0 non-null float64\n",
" 12 expected_filling 24 non-null float64\n",
" 13 max_filling 24 non-null float64\n",
" 14 extra_field 0 non-null float64\n",
" 15 identifier 996 non-null object \n",
"dtypes: bool(2), float64(7), int64(2), object(5)\n",
"memory usage: 111.0+ KB\n"
]
}
],
"source": [
"# Représentation des évenements = representations.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11representations.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" representations = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(representations.columns)\n",
"print(representations.shape)\n",
"representations.info()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "41ef6a1b-e99e-4c73-a2ae-ba7d438d90c2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" serial | \n",
" event_id | \n",
" created_at | \n",
" updated_at | \n",
" start_date_time | \n",
" open | \n",
" satisfaction | \n",
" end_date_time | \n",
" name | \n",
" is_display | \n",
" representation_type_id | \n",
" expected_filling | \n",
" max_filling | \n",
" extra_field | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 44351 | \n",
" NaN | \n",
" 20371 | \n",
" 2023-09-13 03:42:45.245879+02:00 | \n",
" 2023-09-13 03:42:45.245879+02:00 | \n",
" 2023-12-21 20:00:00+01:00 | \n",
" True | \n",
" NaN | \n",
" 1901-01-01 00:09:21+00:09 | \n",
" NaN | \n",
" True | \n",
" NaN | \n",
" 550.0 | \n",
" 550.0 | \n",
" NaN | \n",
" 33520762e8cc28982e3841cbc2be8ce2 | \n",
"
\n",
" \n",
" 1 | \n",
" 45497 | \n",
" NaN | \n",
" 20757 | \n",
" 2023-11-01 03:55:20.875712+01:00 | \n",
" 2023-11-01 03:55:20.875712+01:00 | \n",
" 2023-11-28 10:00:00+01:00 | \n",
" True | \n",
" NaN | \n",
" 1901-01-01 00:09:21+00:09 | \n",
" NaN | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 5c34b84e3d11276e0995d984c94cd28d | \n",
"
\n",
" \n",
" 2 | \n",
" 44383 | \n",
" NaN | \n",
" 20383 | \n",
" 2023-09-13 10:41:08.964302+02:00 | \n",
" 2023-09-13 10:41:08.964302+02:00 | \n",
" 2023-06-04 17:00:00+02:00 | \n",
" True | \n",
" NaN | \n",
" 1901-01-01 00:09:21+00:09 | \n",
" NaN | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" bf3c65a1dfefbd747dcc2360e6887eac | \n",
"
\n",
" \n",
" 3 | \n",
" 44384 | \n",
" NaN | \n",
" 20383 | \n",
" 2023-09-13 10:41:08.972401+02:00 | \n",
" 2023-09-13 10:41:08.972401+02:00 | \n",
" 2023-06-03 17:30:00+02:00 | \n",
" True | \n",
" NaN | \n",
" 1901-01-01 00:09:21+00:09 | \n",
" NaN | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" b0e69ae8b78ebab3066aac83de22d239 | \n",
"
\n",
" \n",
" 4 | \n",
" 44385 | \n",
" NaN | \n",
" 20384 | \n",
" 2023-09-13 10:41:08.973290+02:00 | \n",
" 2023-09-13 10:41:08.973290+02:00 | \n",
" 2023-06-03 16:15:00+02:00 | \n",
" True | \n",
" NaN | \n",
" 1901-01-01 00:09:21+00:09 | \n",
" NaN | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 9fb91c8b1cf9e444111c511e212ac5c1 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 991 | \n",
" 33894 | \n",
" NaN | \n",
" 15647 | \n",
" 2023-09-12 17:42:25.564297+02:00 | \n",
" 2023-09-12 17:42:25.564297+02:00 | \n",
" 2022-11-08 20:00:00+01:00 | \n",
" True | \n",
" NaN | \n",
" 1901-01-01 00:09:21+00:09 | \n",
" NaN | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 44bbcecfd007ceaad05805391beccabb | \n",
"
\n",
" \n",
" 992 | \n",
" 33873 | \n",
" NaN | \n",
" 15640 | \n",
" 2023-09-12 17:42:25.554863+02:00 | \n",
" 2023-09-12 17:42:25.554863+02:00 | \n",
" 2022-11-14 20:00:00+01:00 | \n",
" True | \n",
" NaN | \n",
" 1901-01-01 00:09:21+00:09 | \n",
" NaN | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 151edbec8e0a3cd80071038e857f3493 | \n",
"
\n",
" \n",
" 993 | \n",
" 33610 | \n",
" NaN | \n",
" 15520 | \n",
" 2023-09-12 17:42:25.442979+02:00 | \n",
" 2023-09-12 17:42:25.442979+02:00 | \n",
" 2023-06-19 18:00:00+02:00 | \n",
" True | \n",
" NaN | \n",
" 1901-01-01 00:09:21+00:09 | \n",
" NaN | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 9e9e38d527427e1b6f67e0c3f12b82fc | \n",
"
\n",
" \n",
" 994 | \n",
" 33953 | \n",
" NaN | \n",
" 15520 | \n",
" 2023-09-12 17:42:25.590746+02:00 | \n",
" 2023-09-12 17:42:25.590746+02:00 | \n",
" 2023-06-19 20:00:00+02:00 | \n",
" True | \n",
" NaN | \n",
" 1901-01-01 00:09:21+00:09 | \n",
" NaN | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 7bf0978aabb6cac1bb4cd2784afb2b6b | \n",
"
\n",
" \n",
" 995 | \n",
" 33639 | \n",
" NaN | \n",
" 15533 | \n",
" 2023-09-12 17:42:25.455708+02:00 | \n",
" 2023-09-12 17:42:25.455708+02:00 | \n",
" 2023-04-15 17:30:00+02:00 | \n",
" True | \n",
" NaN | \n",
" 1901-01-01 00:09:21+00:09 | \n",
" NaN | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" fae68f1e09710ec8747957af6e22f61d | \n",
"
\n",
" \n",
"
\n",
"
996 rows × 16 columns
\n",
"
"
],
"text/plain": [
" id serial event_id created_at \\\n",
"0 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n",
"1 45497 NaN 20757 2023-11-01 03:55:20.875712+01:00 \n",
"2 44383 NaN 20383 2023-09-13 10:41:08.964302+02:00 \n",
"3 44384 NaN 20383 2023-09-13 10:41:08.972401+02:00 \n",
"4 44385 NaN 20384 2023-09-13 10:41:08.973290+02:00 \n",
".. ... ... ... ... \n",
"991 33894 NaN 15647 2023-09-12 17:42:25.564297+02:00 \n",
"992 33873 NaN 15640 2023-09-12 17:42:25.554863+02:00 \n",
"993 33610 NaN 15520 2023-09-12 17:42:25.442979+02:00 \n",
"994 33953 NaN 15520 2023-09-12 17:42:25.590746+02:00 \n",
"995 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n",
"\n",
" updated_at start_date_time open \\\n",
"0 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n",
"1 2023-11-01 03:55:20.875712+01:00 2023-11-28 10:00:00+01:00 True \n",
"2 2023-09-13 10:41:08.964302+02:00 2023-06-04 17:00:00+02:00 True \n",
"3 2023-09-13 10:41:08.972401+02:00 2023-06-03 17:30:00+02:00 True \n",
"4 2023-09-13 10:41:08.973290+02:00 2023-06-03 16:15:00+02:00 True \n",
".. ... ... ... \n",
"991 2023-09-12 17:42:25.564297+02:00 2022-11-08 20:00:00+01:00 True \n",
"992 2023-09-12 17:42:25.554863+02:00 2022-11-14 20:00:00+01:00 True \n",
"993 2023-09-12 17:42:25.442979+02:00 2023-06-19 18:00:00+02:00 True \n",
"994 2023-09-12 17:42:25.590746+02:00 2023-06-19 20:00:00+02:00 True \n",
"995 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n",
"\n",
" satisfaction end_date_time name is_display \\\n",
"0 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
"1 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
"2 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
"3 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
"4 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
".. ... ... ... ... \n",
"991 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
"992 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
"993 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
"994 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
"995 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
"\n",
" representation_type_id expected_filling max_filling extra_field \\\n",
"0 NaN 550.0 550.0 NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
".. ... ... ... ... \n",
"991 NaN NaN NaN NaN \n",
"992 NaN NaN NaN NaN \n",
"993 NaN NaN NaN NaN \n",
"994 NaN NaN NaN NaN \n",
"995 NaN NaN NaN NaN \n",
"\n",
" identifier \n",
"0 33520762e8cc28982e3841cbc2be8ce2 \n",
"1 5c34b84e3d11276e0995d984c94cd28d \n",
"2 bf3c65a1dfefbd747dcc2360e6887eac \n",
"3 b0e69ae8b78ebab3066aac83de22d239 \n",
"4 9fb91c8b1cf9e444111c511e212ac5c1 \n",
".. ... \n",
"991 44bbcecfd007ceaad05805391beccabb \n",
"992 151edbec8e0a3cd80071038e857f3493 \n",
"993 9e9e38d527427e1b6f67e0c3f12b82fc \n",
"994 7bf0978aabb6cac1bb4cd2784afb2b6b \n",
"995 fae68f1e09710ec8747957af6e22f61d \n",
"\n",
"[996 rows x 16 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"representations"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "ae6cdad3-2184-4ae7-928c-2f8bd7769a5b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'amount', 'is_full_price', 'representation_id',\n",
" 'pricing_formula_id', 'created_at', 'updated_at', 'category_id',\n",
" 'apply_price', 'products_group_id', 'product_pack_id', 'extra_field',\n",
" 'amount_consumption', 'identifier'],\n",
" dtype='object')\n",
"(14648, 14)\n",
"\n",
"RangeIndex: 14648 entries, 0 to 14647\n",
"Data columns (total 14 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 14648 non-null int64 \n",
" 1 amount 14648 non-null float64\n",
" 2 is_full_price 14648 non-null bool \n",
" 3 representation_id 14648 non-null int64 \n",
" 4 pricing_formula_id 14648 non-null int64 \n",
" 5 created_at 14648 non-null object \n",
" 6 updated_at 14648 non-null object \n",
" 7 category_id 14648 non-null int64 \n",
" 8 apply_price 14648 non-null float64\n",
" 9 products_group_id 14648 non-null int64 \n",
" 10 product_pack_id 14648 non-null int64 \n",
" 11 extra_field 0 non-null float64\n",
" 12 amount_consumption 0 non-null float64\n",
" 13 identifier 14648 non-null object \n",
"dtypes: bool(1), float64(4), int64(6), object(3)\n",
"memory usage: 1.5+ MB\n"
]
}
],
"source": [
"# Produits vendues = products.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11products.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" products = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(products.columns)\n",
"print(products.shape)\n",
"products.info()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "34f1825d-148a-4a6e-88d6-61449fee3ee4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" amount | \n",
" is_full_price | \n",
" representation_id | \n",
" pricing_formula_id | \n",
" created_at | \n",
" updated_at | \n",
" category_id | \n",
" apply_price | \n",
" products_group_id | \n",
" product_pack_id | \n",
" extra_field | \n",
" amount_consumption | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 268325 | \n",
" 18.0 | \n",
" False | \n",
" 44332 | \n",
" 20477 | \n",
" 2023-09-13 03:42:45.415594+02:00 | \n",
" 2023-09-13 03:42:45.415594+02:00 | \n",
" 4972 | \n",
" 0.0 | \n",
" 268108 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" b823bbea3ba837da2ef8efaf1287272d | \n",
"
\n",
" \n",
" 1 | \n",
" 274118 | \n",
" 36.8 | \n",
" False | \n",
" 44340 | \n",
" 20502 | \n",
" 2023-10-25 03:26:57.430694+02:00 | \n",
" 2023-10-25 03:26:57.430694+02:00 | \n",
" 4969 | \n",
" 0.0 | \n",
" 273901 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 81e8b7991f6948e3ef7cfe5011d13532 | \n",
"
\n",
" \n",
" 2 | \n",
" 268338 | \n",
" 39.1 | \n",
" False | \n",
" 44340 | \n",
" 20497 | \n",
" 2023-09-13 03:42:45.430942+02:00 | \n",
" 2023-09-13 03:42:45.430942+02:00 | \n",
" 4969 | \n",
" 0.0 | \n",
" 268121 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" be8bc0399db4d04aefa9f44afd4d5efa | \n",
"
\n",
" \n",
" 3 | \n",
" 209883 | \n",
" 0.0 | \n",
" False | \n",
" 33443 | \n",
" 20475 | \n",
" 2023-09-12 17:42:27.595998+02:00 | \n",
" 2023-09-12 17:42:27.595998+02:00 | \n",
" 4970 | \n",
" 0.0 | \n",
" 209706 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 01a9eea5f8ad53491faa864bfac44183 | \n",
"
\n",
" \n",
" 4 | \n",
" 268326 | \n",
" 63.0 | \n",
" False | \n",
" 44333 | \n",
" 20477 | \n",
" 2023-09-13 03:42:45.417283+02:00 | \n",
" 2023-09-13 03:42:45.417283+02:00 | \n",
" 4969 | \n",
" 0.0 | \n",
" 268109 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 781a917ecfdabb14169701d7b143bbe4 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 14643 | \n",
" 217878 | \n",
" 33.6 | \n",
" False | \n",
" 33919 | \n",
" 20489 | \n",
" 2023-09-12 17:51:11.572882+02:00 | \n",
" 2023-09-12 17:51:11.572882+02:00 | \n",
" 4971 | \n",
" 0.0 | \n",
" 217695 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 82bba69321466069411b3023343b44a4 | \n",
"
\n",
" \n",
" 14644 | \n",
" 268315 | \n",
" 10.0 | \n",
" False | \n",
" 33919 | \n",
" 20504 | \n",
" 2023-09-12 18:59:29.995176+02:00 | \n",
" 2023-09-12 18:59:29.995176+02:00 | \n",
" 4969 | \n",
" 0.0 | \n",
" 268098 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" eae56a8eb0a4315c5713b2053103d595 | \n",
"
\n",
" \n",
" 14645 | \n",
" 210148 | \n",
" 5.0 | \n",
" False | \n",
" 33531 | \n",
" 20473 | \n",
" 2023-09-12 17:42:27.733260+02:00 | \n",
" 2023-09-12 17:42:27.733260+02:00 | \n",
" 4975 | \n",
" 0.0 | \n",
" 209971 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 449f86c1ef2b478d3389f7d0e27d0e6b | \n",
"
\n",
" \n",
" 14646 | \n",
" 212054 | \n",
" 30.0 | \n",
" False | \n",
" 33810 | \n",
" 20473 | \n",
" 2023-09-12 17:42:28.724681+02:00 | \n",
" 2023-09-12 17:42:28.724681+02:00 | \n",
" 4972 | \n",
" 0.0 | \n",
" 211876 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 2090203e2c0b58ea8f505089faee6d62 | \n",
"
\n",
" \n",
" 14647 | \n",
" 261922 | \n",
" 21.0 | \n",
" False | \n",
" 33766 | \n",
" 20488 | \n",
" 2023-09-12 18:52:00.519838+02:00 | \n",
" 2023-09-12 18:52:00.519838+02:00 | \n",
" 4972 | \n",
" 0.0 | \n",
" 261709 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 9139ee36a92bed766ae95372cca77336 | \n",
"
\n",
" \n",
"
\n",
"
14648 rows × 14 columns
\n",
"
"
],
"text/plain": [
" id amount is_full_price representation_id pricing_formula_id \\\n",
"0 268325 18.0 False 44332 20477 \n",
"1 274118 36.8 False 44340 20502 \n",
"2 268338 39.1 False 44340 20497 \n",
"3 209883 0.0 False 33443 20475 \n",
"4 268326 63.0 False 44333 20477 \n",
"... ... ... ... ... ... \n",
"14643 217878 33.6 False 33919 20489 \n",
"14644 268315 10.0 False 33919 20504 \n",
"14645 210148 5.0 False 33531 20473 \n",
"14646 212054 30.0 False 33810 20473 \n",
"14647 261922 21.0 False 33766 20488 \n",
"\n",
" created_at updated_at \\\n",
"0 2023-09-13 03:42:45.415594+02:00 2023-09-13 03:42:45.415594+02:00 \n",
"1 2023-10-25 03:26:57.430694+02:00 2023-10-25 03:26:57.430694+02:00 \n",
"2 2023-09-13 03:42:45.430942+02:00 2023-09-13 03:42:45.430942+02:00 \n",
"3 2023-09-12 17:42:27.595998+02:00 2023-09-12 17:42:27.595998+02:00 \n",
"4 2023-09-13 03:42:45.417283+02:00 2023-09-13 03:42:45.417283+02:00 \n",
"... ... ... \n",
"14643 2023-09-12 17:51:11.572882+02:00 2023-09-12 17:51:11.572882+02:00 \n",
"14644 2023-09-12 18:59:29.995176+02:00 2023-09-12 18:59:29.995176+02:00 \n",
"14645 2023-09-12 17:42:27.733260+02:00 2023-09-12 17:42:27.733260+02:00 \n",
"14646 2023-09-12 17:42:28.724681+02:00 2023-09-12 17:42:28.724681+02:00 \n",
"14647 2023-09-12 18:52:00.519838+02:00 2023-09-12 18:52:00.519838+02:00 \n",
"\n",
" category_id apply_price products_group_id product_pack_id \\\n",
"0 4972 0.0 268108 1 \n",
"1 4969 0.0 273901 1 \n",
"2 4969 0.0 268121 1 \n",
"3 4970 0.0 209706 1 \n",
"4 4969 0.0 268109 1 \n",
"... ... ... ... ... \n",
"14643 4971 0.0 217695 1 \n",
"14644 4969 0.0 268098 1 \n",
"14645 4975 0.0 209971 1 \n",
"14646 4972 0.0 211876 1 \n",
"14647 4972 0.0 261709 1 \n",
"\n",
" extra_field amount_consumption identifier \n",
"0 NaN NaN b823bbea3ba837da2ef8efaf1287272d \n",
"1 NaN NaN 81e8b7991f6948e3ef7cfe5011d13532 \n",
"2 NaN NaN be8bc0399db4d04aefa9f44afd4d5efa \n",
"3 NaN NaN 01a9eea5f8ad53491faa864bfac44183 \n",
"4 NaN NaN 781a917ecfdabb14169701d7b143bbe4 \n",
"... ... ... ... \n",
"14643 NaN NaN 82bba69321466069411b3023343b44a4 \n",
"14644 NaN NaN eae56a8eb0a4315c5713b2053103d595 \n",
"14645 NaN NaN 449f86c1ef2b478d3389f7d0e27d0e6b \n",
"14646 NaN NaN 2090203e2c0b58ea8f505089faee6d62 \n",
"14647 NaN NaN 9139ee36a92bed766ae95372cca77336 \n",
"\n",
"[14648 rows x 14 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "6735b338-26b5-479d-825d-677ea533dad5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'name', 'created_at', 'updated_at', 'street_id', 'fixed_capacity',\n",
" 'identifier'],\n",
" dtype='object')\n",
"(1, 7)\n",
"\n",
"RangeIndex: 1 entries, 0 to 0\n",
"Data columns (total 7 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 1 non-null int64 \n",
" 1 name 0 non-null float64\n",
" 2 created_at 1 non-null object \n",
" 3 updated_at 1 non-null object \n",
" 4 street_id 1 non-null int64 \n",
" 5 fixed_capacity 0 non-null float64\n",
" 6 identifier 1 non-null object \n",
"dtypes: float64(2), int64(2), object(3)\n",
"memory usage: 184.0+ bytes\n"
]
}
],
"source": [
"# Lieu = facilities.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11facilities.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" facilities = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(facilities.columns)\n",
"print(facilities.shape)\n",
"facilities.info()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "428b86c2-50f4-42a5-9bbb-a17ffe820bf9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" name | \n",
" created_at | \n",
" updated_at | \n",
" street_id | \n",
" fixed_capacity | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1054 | \n",
" NaN | \n",
" 2023-09-12 17:42:25.223064+02:00 | \n",
" 2023-09-12 17:42:25.223064+02:00 | \n",
" 1 | \n",
" NaN | \n",
" d41d8cd98f00b204e9800998ecf8427e | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id name created_at \\\n",
"0 1054 NaN 2023-09-12 17:42:25.223064+02:00 \n",
"\n",
" updated_at street_id fixed_capacity \\\n",
"0 2023-09-12 17:42:25.223064+02:00 1 NaN \n",
"\n",
" identifier \n",
"0 d41d8cd98f00b204e9800998ecf8427e "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"facilities"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "f6b26ad5-a4cc-4219-a0b0-406d9b025458",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'name', 'created_at', 'updated_at', 'start_date_time',\n",
" 'identifier'],\n",
" dtype='object')\n",
"(9, 6)\n",
"\n",
"RangeIndex: 9 entries, 0 to 8\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 9 non-null int64 \n",
" 1 name 9 non-null object \n",
" 2 created_at 9 non-null object \n",
" 3 updated_at 9 non-null object \n",
" 4 start_date_time 0 non-null float64\n",
" 5 identifier 9 non-null object \n",
"dtypes: float64(1), int64(1), object(4)\n",
"memory usage: 560.0+ bytes\n"
]
}
],
"source": [
"# Saisons = seasons.csv période sur deux années consécutives\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11seasons.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" seasons = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(seasons.columns)\n",
"print(seasons.shape)\n",
"seasons.info()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "75c8c0ef-4ff5-45b1-a791-8ba2e9a4437e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['saison 2023-2024', 'saison 2021-2022', 'saison 2015-2016',\n",
" 'saison 2016-2017', 'saison 2017-2018', 'saison 2018-2019',\n",
" 'saison 2020-2021', 'saison 2019-2020', 'saison 2022-2023'],\n",
" dtype=object)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"seasons['name'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "cd0d10df-10cc-4f75-8b88-35f676c91f5b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'purchase_date', 'customer_id', 'created_at', 'updated_at',\n",
" 'number', 'identifier'],\n",
" dtype='object')\n",
"(410695, 7)\n",
"\n",
"RangeIndex: 410695 entries, 0 to 410694\n",
"Data columns (total 7 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 410695 non-null int64 \n",
" 1 purchase_date 410695 non-null object \n",
" 2 customer_id 410695 non-null int64 \n",
" 3 created_at 410695 non-null object \n",
" 4 updated_at 410695 non-null object \n",
" 5 number 0 non-null float64\n",
" 6 identifier 410695 non-null object \n",
"dtypes: float64(1), int64(2), object(4)\n",
"memory usage: 21.9+ MB\n"
]
}
],
"source": [
"# Achats = purchases.csv \n",
"FILE_PATH_S3 = 'bdc2324-data/11/11purchases.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" purchases = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(purchases.columns)\n",
"print(purchases.shape)\n",
"purchases.info()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "8f986fdb-ca37-4cbb-b526-2a6d0ce7ca2c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" purchase_date | \n",
" customer_id | \n",
" created_at | \n",
" updated_at | \n",
" number | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 861761 | \n",
" 2019-03-01 16:28:49+01:00 | \n",
" 4966 | \n",
" 2023-09-12 17:42:37.564150+02:00 | \n",
" 2023-09-12 17:42:37.564150+02:00 | \n",
" NaN | \n",
" d20eb0c3a7efec0bbe338dee40dc3378 | \n",
"
\n",
" \n",
" 1 | \n",
" 861762 | \n",
" 2019-03-01 16:29:11+01:00 | \n",
" 4966 | \n",
" 2023-09-12 17:42:37.571159+02:00 | \n",
" 2023-09-12 17:42:37.571159+02:00 | \n",
" NaN | \n",
" cff3abfc018517bce5ccfc58f5cacf40 | \n",
"
\n",
" \n",
" 2 | \n",
" 861763 | \n",
" 2019-03-01 16:29:17+01:00 | \n",
" 4966 | \n",
" 2023-09-12 17:42:37.571646+02:00 | \n",
" 2023-09-12 17:42:37.571646+02:00 | \n",
" NaN | \n",
" e1155cf26b34f792bdb23e49244d7264 | \n",
"
\n",
" \n",
" 3 | \n",
" 861764 | \n",
" 2019-03-01 16:29:19+01:00 | \n",
" 4966 | \n",
" 2023-09-12 17:42:37.572063+02:00 | \n",
" 2023-09-12 17:42:37.572063+02:00 | \n",
" NaN | \n",
" e8b95cc6a1a8b103ffa39755ce3bfc4d | \n",
"
\n",
" \n",
" 4 | \n",
" 861765 | \n",
" 2019-03-01 16:32:08+01:00 | \n",
" 405994 | \n",
" 2023-09-12 17:42:37.572470+02:00 | \n",
" 2023-09-12 17:42:37.572470+02:00 | \n",
" NaN | \n",
" 1b763278914f1309e357abe5033a3f0f | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 410690 | \n",
" 1285964 | \n",
" 2023-10-21 21:46:41+02:00 | \n",
" 517309 | \n",
" 2023-10-23 03:43:16.457501+02:00 | \n",
" 2023-10-23 03:43:16.457501+02:00 | \n",
" NaN | \n",
" 72c4e90c2b151dcffc87b19ea8a0c4f1 | \n",
"
\n",
" \n",
" 410691 | \n",
" 1285965 | \n",
" 2023-10-21 21:47:07+02:00 | \n",
" 517309 | \n",
" 2023-10-23 03:43:16.458458+02:00 | \n",
" 2023-10-23 03:43:16.458458+02:00 | \n",
" NaN | \n",
" ee65532087132145daa6154fbae050ea | \n",
"
\n",
" \n",
" 410692 | \n",
" 1285966 | \n",
" 2023-10-21 21:47:20+02:00 | \n",
" 517309 | \n",
" 2023-10-23 03:43:16.458811+02:00 | \n",
" 2023-10-23 03:43:16.458811+02:00 | \n",
" NaN | \n",
" 7e825dd352bc6a11ab81cb8068e325e6 | \n",
"
\n",
" \n",
" 410693 | \n",
" 1285967 | \n",
" 2023-10-21 23:07:06+02:00 | \n",
" 399969 | \n",
" 2023-10-23 03:43:16.459738+02:00 | \n",
" 2023-10-23 03:43:16.459738+02:00 | \n",
" NaN | \n",
" fdb92627a48d6ba8fa817d60a83dbea8 | \n",
"
\n",
" \n",
" 410694 | \n",
" 1285968 | \n",
" 2023-10-21 23:07:39+02:00 | \n",
" 399969 | \n",
" 2023-10-23 03:43:16.462409+02:00 | \n",
" 2023-10-23 03:43:16.462409+02:00 | \n",
" NaN | \n",
" e9dbaff4f7037a5b0efa11263584dfad | \n",
"
\n",
" \n",
"
\n",
"
410695 rows × 7 columns
\n",
"
"
],
"text/plain": [
" id purchase_date customer_id \\\n",
"0 861761 2019-03-01 16:28:49+01:00 4966 \n",
"1 861762 2019-03-01 16:29:11+01:00 4966 \n",
"2 861763 2019-03-01 16:29:17+01:00 4966 \n",
"3 861764 2019-03-01 16:29:19+01:00 4966 \n",
"4 861765 2019-03-01 16:32:08+01:00 405994 \n",
"... ... ... ... \n",
"410690 1285964 2023-10-21 21:46:41+02:00 517309 \n",
"410691 1285965 2023-10-21 21:47:07+02:00 517309 \n",
"410692 1285966 2023-10-21 21:47:20+02:00 517309 \n",
"410693 1285967 2023-10-21 23:07:06+02:00 399969 \n",
"410694 1285968 2023-10-21 23:07:39+02:00 399969 \n",
"\n",
" created_at updated_at \\\n",
"0 2023-09-12 17:42:37.564150+02:00 2023-09-12 17:42:37.564150+02:00 \n",
"1 2023-09-12 17:42:37.571159+02:00 2023-09-12 17:42:37.571159+02:00 \n",
"2 2023-09-12 17:42:37.571646+02:00 2023-09-12 17:42:37.571646+02:00 \n",
"3 2023-09-12 17:42:37.572063+02:00 2023-09-12 17:42:37.572063+02:00 \n",
"4 2023-09-12 17:42:37.572470+02:00 2023-09-12 17:42:37.572470+02:00 \n",
"... ... ... \n",
"410690 2023-10-23 03:43:16.457501+02:00 2023-10-23 03:43:16.457501+02:00 \n",
"410691 2023-10-23 03:43:16.458458+02:00 2023-10-23 03:43:16.458458+02:00 \n",
"410692 2023-10-23 03:43:16.458811+02:00 2023-10-23 03:43:16.458811+02:00 \n",
"410693 2023-10-23 03:43:16.459738+02:00 2023-10-23 03:43:16.459738+02:00 \n",
"410694 2023-10-23 03:43:16.462409+02:00 2023-10-23 03:43:16.462409+02:00 \n",
"\n",
" number identifier \n",
"0 NaN d20eb0c3a7efec0bbe338dee40dc3378 \n",
"1 NaN cff3abfc018517bce5ccfc58f5cacf40 \n",
"2 NaN e1155cf26b34f792bdb23e49244d7264 \n",
"3 NaN e8b95cc6a1a8b103ffa39755ce3bfc4d \n",
"4 NaN 1b763278914f1309e357abe5033a3f0f \n",
"... ... ... \n",
"410690 NaN 72c4e90c2b151dcffc87b19ea8a0c4f1 \n",
"410691 NaN ee65532087132145daa6154fbae050ea \n",
"410692 NaN 7e825dd352bc6a11ab81cb8068e325e6 \n",
"410693 NaN fdb92627a48d6ba8fa817d60a83dbea8 \n",
"410694 NaN e9dbaff4f7037a5b0efa11263584dfad \n",
"\n",
"[410695 rows x 7 columns]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}