diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb
index b2c2018..99d5ea7 100644
--- a/0_Cleaning_and_merge.ipynb
+++ b/0_Cleaning_and_merge.ipynb
@@ -79,7 +79,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_3658/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ "/tmp/ipykernel_15815/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in)\n"
]
}
@@ -242,17 +242,17 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_3658/1591303091.py:5: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_15815/1591303091.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
- "/tmp/ipykernel_3658/1591303091.py:9: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_15815/1591303091.py:9: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
- "/tmp/ipykernel_3658/1591303091.py:13: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_15815/1591303091.py:13: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -386,169 +386,6 @@
"df1_ticket_information.head()"
]
},
- {
- "cell_type": "markdown",
- "id": "37499eae-1a7f-4dce-83b0-ff942ccf7a9d",
- "metadata": {},
- "source": [
- "### KPI tickets"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "043303fe-e90f-4689-a2a9-5d690555a045",
- "metadata": {},
- "outputs": [],
- "source": [
- "def tickets_kpi_function(tickets_information = None):\n",
- " tickets_information_copy = tickets_information.copy()\n",
- " tickets_information_copy['purchase_date_max'] = tickets_information_copy['purchase_date']\n",
- " tickets_kpi = (tickets_information_copy[['product_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'purchase_date_max']]\n",
- " .groupby(['product_id', 'customer_id'])\n",
- " .agg({'ticket_id': 'count', \n",
- " 'supplier_name': 'nunique',\n",
- " 'purchase_date_max' : 'max',\n",
- " 'purchase_date' : 'min'})\n",
- " .reset_index()\n",
- " )\n",
- " \n",
- " tickets_kpi.rename(columns = {'ticket_id' : 'nb_tickets', \n",
- " 'supplier_name' : 'nb_suppliers', \n",
- " 'purchase_date' : 'purchase_date_min'}, inplace = True)\n",
- " \n",
- " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n",
- " \n",
- " return tickets_kpi\n",
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "id": "5882234a-1ed5-4269-87a6-0d75613476e3",
- "metadata": {},
- "outputs": [],
- "source": [
- "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_ticket_information)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " product_id | \n",
- " customer_id | \n",
- " nb_tickets | \n",
- " nb_suppliers | \n",
- " purchase_date_max | \n",
- " purchase_date_min | \n",
- " time_between_purchase | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 107310 | \n",
- " 2805 | \n",
- " 4 | \n",
- " 2 | \n",
- " 2019-06-05 14:37:13+00:00 | \n",
- " 2019-06-05 14:18:38+00:00 | \n",
- " 0 days 00:18:35 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 110089 | \n",
- " 54355 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2017-02-17 13:32:51+00:00 | \n",
- " 2017-02-17 13:32:51+00:00 | \n",
- " 0 days 00:00:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 110089 | \n",
- " 54356 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2017-03-02 14:36:16+00:00 | \n",
- " 2017-03-02 14:36:16+00:00 | \n",
- " 0 days 00:00:00 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 110089 | \n",
- " 54357 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2017-03-06 15:16:41+00:00 | \n",
- " 2017-03-06 15:16:41+00:00 | \n",
- " 0 days 00:00:00 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 110089 | \n",
- " 54358 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2017-03-13 16:07:27+00:00 | \n",
- " 2017-03-13 16:07:27+00:00 | \n",
- " 0 days 00:00:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " product_id customer_id nb_tickets nb_suppliers \\\n",
- "0 107310 2805 4 2 \n",
- "1 110089 54355 1 1 \n",
- "2 110089 54356 1 1 \n",
- "3 110089 54357 1 1 \n",
- "4 110089 54358 1 1 \n",
- "\n",
- " purchase_date_max purchase_date_min time_between_purchase \n",
- "0 2019-06-05 14:37:13+00:00 2019-06-05 14:18:38+00:00 0 days 00:18:35 \n",
- "1 2017-02-17 13:32:51+00:00 2017-02-17 13:32:51+00:00 0 days 00:00:00 \n",
- "2 2017-03-02 14:36:16+00:00 2017-03-02 14:36:16+00:00 0 days 00:00:00 \n",
- "3 2017-03-06 15:16:41+00:00 2017-03-06 15:16:41+00:00 0 days 00:00:00 \n",
- "4 2017-03-13 16:07:27+00:00 2017-03-13 16:07:27+00:00 0 days 00:00:00 "
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df1_tickets_kpi.head()"
- ]
- },
{
"cell_type": "markdown",
"id": "096e47f4-1d65-4575-989d-83227eedad2b",
@@ -559,7 +396,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 11,
"id": "baed146a-9d3a-4397-a812-3d50c9a2f038",
"metadata": {},
"outputs": [],
@@ -588,7 +425,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 12,
"id": "5fbfd88b-b94c-489c-9201-670e96e453e7",
"metadata": {},
"outputs": [
@@ -596,7 +433,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_3658/3848597476.py:4: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_15815/3848597476.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -610,7 +447,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 13,
"id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09",
"metadata": {},
"outputs": [
@@ -677,7 +514,7 @@
"consentement optout b2c 34523"
]
},
- "execution_count": 16,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -688,7 +525,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 14,
"id": "4417ff51-f501-4ab9-a192-4ab75764a8ed",
"metadata": {
"scrolled": true
@@ -757,7 +594,7 @@
"DDCP MD Procès du Siècle 1684"
]
},
- "execution_count": 17,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -777,7 +614,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 15,
"id": "d883cc7b-ac43-4485-b86f-eaf595fbad85",
"metadata": {},
"outputs": [],
@@ -802,7 +639,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 16,
"id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f",
"metadata": {},
"outputs": [
@@ -810,19 +647,19 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_15815/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
- "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_15815/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
- "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_15815/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@@ -837,7 +674,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 17,
"id": "c24457e7-3cad-451a-a65b-7373b656bd6e",
"metadata": {
"scrolled": true
@@ -957,7 +794,7 @@
"4 404 2021-03-27 23:00:00+00:00 "
]
},
- "execution_count": 20,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -968,7 +805,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 18,
"id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
"metadata": {},
"outputs": [],
@@ -1002,7 +839,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 19,
"id": "24537647-bc29-4777-9848-ac4120a4aa60",
"metadata": {},
"outputs": [
@@ -1010,7 +847,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_3658/3700263836.py:11: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_15815/3700263836.py:11: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -1024,7 +861,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 20,
"id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
"metadata": {},
"outputs": [
@@ -1104,7 +941,7 @@
"4 6 20 0.0 NaT"
]
},
- "execution_count": 23,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -1131,7 +968,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 21,
"id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d",
"metadata": {},
"outputs": [],
@@ -1142,7 +979,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 22,
"id": "607eb4b4-eed9-4b50-b823-f75c116dd37c",
"metadata": {},
"outputs": [],
@@ -1213,7 +1050,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 23,
"id": "350b09b9-451f-4d47-81fe-f34b892db027",
"metadata": {},
"outputs": [],
@@ -1301,7 +1138,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 24,
"id": "0fccc8ef-e575-4857-a401-94a7274394df",
"metadata": {},
"outputs": [
@@ -1454,7 +1291,7 @@
"4 indiv entrées tp "
]
},
- "execution_count": 27,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -1466,7 +1303,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 25,
"id": "779d8aaf-6668-4f66-8852-847304407ea3",
"metadata": {},
"outputs": [
@@ -1636,7 +1473,7 @@
"4 spectacle vivant mucem "
]
},
- "execution_count": 28,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -1648,7 +1485,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 26,
"id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0",
"metadata": {},
"outputs": [
@@ -1747,7 +1584,7 @@
"4 37 383 269 1"
]
},
- "execution_count": 29,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -1767,7 +1604,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 27,
"id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba",
"metadata": {},
"outputs": [],
@@ -1789,13 +1626,13 @@
" products_global = order_columns_id(products_global)\n",
"\n",
" # remove useless columns \n",
- " products_global = products_global.drop(columns = ['type_of_id', 'name_events', 'name_seasons', 'name_categories'])\n",
+ " products_global = products_global.drop(columns = ['type_of_id']) # 'name_events', 'name_seasons', 'name_categories'\n",
" return products_global"
]
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 28,
"id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951",
"metadata": {},
"outputs": [
@@ -1849,12 +1686,15 @@
" id_representation_cap | \n",
" season_id | \n",
" facility_id | \n",
- " event_type_id | \n",
+ " ... | \n",
" event_type_key_id | \n",
" facility_key_id | \n",
" street_id | \n",
" amount | \n",
" is_full_price | \n",
+ " name_categories | \n",
+ " name_events | \n",
+ " name_seasons | \n",
" name_event_types | \n",
" name_facilities | \n",
" \n",
@@ -1872,12 +1712,15 @@
" 8789 | \n",
" 4 | \n",
" 1 | \n",
- " 2 | \n",
+ " ... | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 9.0 | \n",
" False | \n",
+ " indiv activité tr | \n",
+ " visite-jeu \"le classico des minots\" (1h30) | \n",
+ " 2017 | \n",
" offre muséale individuel | \n",
" mucem | \n",
" \n",
@@ -1893,12 +1736,15 @@
" 390 | \n",
" 2 | \n",
" 1 | \n",
- " 2 | \n",
+ " ... | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" 9.5 | \n",
" False | \n",
+ " indiv entrées tp | \n",
+ " billet mucem picasso | \n",
+ " 2016 | \n",
" offre muséale individuel | \n",
" mucem | \n",
" \n",
@@ -1914,12 +1760,15 @@
" 395 | \n",
" 2 | \n",
" 1 | \n",
- " 2 | \n",
+ " ... | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" 11.5 | \n",
" False | \n",
+ " indiv entrées tp | \n",
+ " billet mucem picasso | \n",
+ " 2016 | \n",
" offre muséale individuel | \n",
" mucem | \n",
" \n",
@@ -1935,12 +1784,15 @@
" 120199 | \n",
" 1754 | \n",
" 1 | \n",
- " 2 | \n",
+ " ... | \n",
" 4 | \n",
" 1 | \n",
" 1 | \n",
" 8.0 | \n",
" False | \n",
+ " indiv entrées tr | \n",
+ " NaN | \n",
+ " NaN | \n",
" offre muséale individuel | \n",
" mucem | \n",
" \n",
@@ -1956,17 +1808,21 @@
" 21 | \n",
" 4 | \n",
" 1 | \n",
- " 3 | \n",
+ " ... | \n",
" 6 | \n",
" 1 | \n",
" 1 | \n",
" 8.5 | \n",
" False | \n",
+ " indiv entrées tp | \n",
+ " non défini | \n",
+ " 2017 | \n",
" non défini | \n",
" mucem | \n",
" \n",
" \n",
"\n",
+ "5 rows × 21 columns
\n",
""
],
"text/plain": [
@@ -1984,19 +1840,114 @@
"3 156773 1 12365 120199 \n",
"4 1175 1 8 21 \n",
"\n",
- " season_id facility_id event_type_id event_type_key_id facility_key_id \\\n",
- "0 4 1 2 5 1 \n",
- "1 2 1 2 2 1 \n",
- "2 2 1 2 2 1 \n",
- "3 1754 1 2 4 1 \n",
- "4 4 1 3 6 1 \n",
+ " season_id facility_id ... event_type_key_id facility_key_id street_id \\\n",
+ "0 4 1 ... 5 1 1 \n",
+ "1 2 1 ... 2 1 1 \n",
+ "2 2 1 ... 2 1 1 \n",
+ "3 1754 1 ... 4 1 1 \n",
+ "4 4 1 ... 6 1 1 \n",
"\n",
- " street_id amount is_full_price name_event_types name_facilities \n",
- "0 1 9.0 False offre muséale individuel mucem \n",
- "1 1 9.5 False offre muséale individuel mucem \n",
- "2 1 11.5 False offre muséale individuel mucem \n",
- "3 1 8.0 False offre muséale individuel mucem \n",
- "4 1 8.5 False non défini mucem "
+ " amount is_full_price name_categories \\\n",
+ "0 9.0 False indiv activité tr \n",
+ "1 9.5 False indiv entrées tp \n",
+ "2 11.5 False indiv entrées tp \n",
+ "3 8.0 False indiv entrées tr \n",
+ "4 8.5 False indiv entrées tp \n",
+ "\n",
+ " name_events name_seasons \\\n",
+ "0 visite-jeu \"le classico des minots\" (1h30) 2017 \n",
+ "1 billet mucem picasso 2016 \n",
+ "2 billet mucem picasso 2016 \n",
+ "3 NaN NaN \n",
+ "4 non défini 2017 \n",
+ "\n",
+ " name_event_types name_facilities \n",
+ "0 offre muséale individuel mucem \n",
+ "1 offre muséale individuel mucem \n",
+ "2 offre muséale individuel mucem \n",
+ "3 offre muséale individuel mucem \n",
+ "4 non défini mucem \n",
+ "\n",
+ "[5 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "products_global = uniform_product_df()\n",
+ "products_global.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "98f78cd5-b694-4cc6-b033-20170aa13e8d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Fusion liée au product\n",
+ "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "52db7bcb-3fb7-48e5-b612-4e22bdab4a94",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d4dcfbe0-c6ce-497e-b75e-dc9e938801b2",
+ "metadata": {},
+ "source": [
+ "### KPI tickets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "665a5925-9c0e-425a-8f11-c33a0a9ec444",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['ticket_id', 'product_id', 'is_from_subscription', 'supplier_name',\n",
+ " 'type_of_ticket_name', 'children', 'purchase_date', 'customer_id',\n",
+ " 'id_products', 'representation_id', 'pricing_formula_id', 'category_id',\n",
+ " 'products_group_id', 'product_pack_id', 'event_id',\n",
+ " 'id_representation_cap', 'season_id', 'facility_id', 'event_type_id',\n",
+ " 'event_type_key_id', 'facility_key_id', 'street_id', 'amount',\n",
+ " 'is_full_price', 'name_categories', 'name_events', 'name_seasons',\n",
+ " 'name_event_types', 'name_facilities'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_products_purchased.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "b913a69e-3146-4919-b5f6-a6108532bffa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['spectacle vivant', 'offre muséale individuel', 'formule adhésion',\n",
+ " 'offre muséale groupe'], dtype=object)"
]
},
"execution_count": 31,
@@ -2005,8 +1956,819 @@
}
],
"source": [
- "products_global = uniform_product_df()\n",
- "products_global.head()"
+ "df1_products_purchased['name_event_types'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "e01e8cf9-1187-4a4b-993d-b7b4321cd8f0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "3d8b0875-b409-44ce-b688-d9d6758782d3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ticket_id | \n",
+ " customer_id | \n",
+ " event_type_id | \n",
+ " supplier_name | \n",
+ " purchase_date | \n",
+ " type_of_ticket_name | \n",
+ " amount | \n",
+ " children | \n",
+ " is_full_price | \n",
+ " name_event_types | \n",
+ " name_facilities | \n",
+ " name_categories | \n",
+ " name_events | \n",
+ " name_seasons | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 13070859 | \n",
+ " 48187 | \n",
+ " 4 | \n",
+ " vente en ligne | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " Atelier | \n",
+ " 8.0 | \n",
+ " pricing_formula | \n",
+ " False | \n",
+ " spectacle vivant | \n",
+ " mucem | \n",
+ " indiv prog enfant | \n",
+ " l'école des magiciens | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 13070855 | \n",
+ " 48187 | \n",
+ " 4 | \n",
+ " vente en ligne | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " Atelier | \n",
+ " 8.0 | \n",
+ " pricing_formula | \n",
+ " False | \n",
+ " spectacle vivant | \n",
+ " mucem | \n",
+ " indiv prog enfant | \n",
+ " l'école des magiciens | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13070856 | \n",
+ " 48187 | \n",
+ " 4 | \n",
+ " vente en ligne | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " Atelier | \n",
+ " 8.0 | \n",
+ " pricing_formula | \n",
+ " False | \n",
+ " spectacle vivant | \n",
+ " mucem | \n",
+ " indiv prog enfant | \n",
+ " l'école des magiciens | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 13070857 | \n",
+ " 48187 | \n",
+ " 4 | \n",
+ " vente en ligne | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " Atelier | \n",
+ " 8.0 | \n",
+ " pricing_formula | \n",
+ " False | \n",
+ " spectacle vivant | \n",
+ " mucem | \n",
+ " indiv prog enfant | \n",
+ " l'école des magiciens | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 13070858 | \n",
+ " 48187 | \n",
+ " 4 | \n",
+ " vente en ligne | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " Atelier | \n",
+ " 8.0 | \n",
+ " pricing_formula | \n",
+ " False | \n",
+ " spectacle vivant | \n",
+ " mucem | \n",
+ " indiv prog enfant | \n",
+ " l'école des magiciens | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1826667 | \n",
+ " 18643494 | \n",
+ " 81 | \n",
+ " 4 | \n",
+ " vad | \n",
+ " 2022-08-02 12:18:16+00:00 | \n",
+ " Billet en nombre | \n",
+ " 11.0 | \n",
+ " pricing_formula | \n",
+ " False | \n",
+ " spectacle vivant | \n",
+ " mucem | \n",
+ " en nb entrées tr | \n",
+ " NaN | \n",
+ " 2022 | \n",
+ "
\n",
+ " \n",
+ " 1826668 | \n",
+ " 18643495 | \n",
+ " 81 | \n",
+ " 4 | \n",
+ " vad | \n",
+ " 2022-08-02 12:18:16+00:00 | \n",
+ " Billet en nombre | \n",
+ " 11.0 | \n",
+ " pricing_formula | \n",
+ " False | \n",
+ " spectacle vivant | \n",
+ " mucem | \n",
+ " en nb entrées tr | \n",
+ " NaN | \n",
+ " 2022 | \n",
+ "
\n",
+ " \n",
+ " 1826669 | \n",
+ " 18643496 | \n",
+ " 81 | \n",
+ " 4 | \n",
+ " vad | \n",
+ " 2022-08-02 12:18:16+00:00 | \n",
+ " Billet en nombre | \n",
+ " 11.0 | \n",
+ " pricing_formula | \n",
+ " False | \n",
+ " spectacle vivant | \n",
+ " mucem | \n",
+ " en nb entrées tr | \n",
+ " NaN | \n",
+ " 2022 | \n",
+ "
\n",
+ " \n",
+ " 1826670 | \n",
+ " 18643497 | \n",
+ " 81 | \n",
+ " 4 | \n",
+ " vad | \n",
+ " 2022-08-02 12:18:16+00:00 | \n",
+ " Billet en nombre | \n",
+ " 11.0 | \n",
+ " pricing_formula | \n",
+ " False | \n",
+ " spectacle vivant | \n",
+ " mucem | \n",
+ " en nb entrées tr | \n",
+ " NaN | \n",
+ " 2022 | \n",
+ "
\n",
+ " \n",
+ " 1826671 | \n",
+ " 19853111 | \n",
+ " 62763 | \n",
+ " 4 | \n",
+ " vad | \n",
+ " 2022-11-04 14:25:42+00:00 | \n",
+ " Billet en nombre | \n",
+ " 0.0 | \n",
+ " pricing_formula | \n",
+ " False | \n",
+ " spectacle vivant | \n",
+ " mucem | \n",
+ " indiv entrées gr | \n",
+ " NaN | \n",
+ " 2022 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1826672 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ticket_id customer_id event_type_id supplier_name \\\n",
+ "0 13070859 48187 4 vente en ligne \n",
+ "1 13070855 48187 4 vente en ligne \n",
+ "2 13070856 48187 4 vente en ligne \n",
+ "3 13070857 48187 4 vente en ligne \n",
+ "4 13070858 48187 4 vente en ligne \n",
+ "... ... ... ... ... \n",
+ "1826667 18643494 81 4 vad \n",
+ "1826668 18643495 81 4 vad \n",
+ "1826669 18643496 81 4 vad \n",
+ "1826670 18643497 81 4 vad \n",
+ "1826671 19853111 62763 4 vad \n",
+ "\n",
+ " purchase_date type_of_ticket_name amount \\\n",
+ "0 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
+ "1 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
+ "2 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
+ "3 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
+ "4 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
+ "... ... ... ... \n",
+ "1826667 2022-08-02 12:18:16+00:00 Billet en nombre 11.0 \n",
+ "1826668 2022-08-02 12:18:16+00:00 Billet en nombre 11.0 \n",
+ "1826669 2022-08-02 12:18:16+00:00 Billet en nombre 11.0 \n",
+ "1826670 2022-08-02 12:18:16+00:00 Billet en nombre 11.0 \n",
+ "1826671 2022-11-04 14:25:42+00:00 Billet en nombre 0.0 \n",
+ "\n",
+ " children is_full_price name_event_types name_facilities \\\n",
+ "0 pricing_formula False spectacle vivant mucem \n",
+ "1 pricing_formula False spectacle vivant mucem \n",
+ "2 pricing_formula False spectacle vivant mucem \n",
+ "3 pricing_formula False spectacle vivant mucem \n",
+ "4 pricing_formula False spectacle vivant mucem \n",
+ "... ... ... ... ... \n",
+ "1826667 pricing_formula False spectacle vivant mucem \n",
+ "1826668 pricing_formula False spectacle vivant mucem \n",
+ "1826669 pricing_formula False spectacle vivant mucem \n",
+ "1826670 pricing_formula False spectacle vivant mucem \n",
+ "1826671 pricing_formula False spectacle vivant mucem \n",
+ "\n",
+ " name_categories name_events name_seasons \n",
+ "0 indiv prog enfant l'école des magiciens 2018 \n",
+ "1 indiv prog enfant l'école des magiciens 2018 \n",
+ "2 indiv prog enfant l'école des magiciens 2018 \n",
+ "3 indiv prog enfant l'école des magiciens 2018 \n",
+ "4 indiv prog enfant l'école des magiciens 2018 \n",
+ "... ... ... ... \n",
+ "1826667 en nb entrées tr NaN 2022 \n",
+ "1826668 en nb entrées tr NaN 2022 \n",
+ "1826669 en nb entrées tr NaN 2022 \n",
+ "1826670 en nb entrées tr NaN 2022 \n",
+ "1826671 indiv entrées gr NaN 2022 \n",
+ "\n",
+ "[1826672 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Importance des suppliers\n",
+ "df1_products_purchased_reduced"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "2bda0b97-b28b-4070-a57d-aeab0e2f7dfe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Nombre de client assistant à plus de 2 type d'événement\n",
+ "nb_event_types = df1_products_purchased_reduced[['customer_id', 'name_event_types']].groupby('customer_id').nunique()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "043303fe-e90f-4689-a2a9-5d690555a045",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def tickets_kpi_function(tickets_information = None):\n",
+ " tickets_information_copy = tickets_information.copy()\n",
+ " tickets_information_copy['purchase_date_max'] = tickets_information_copy['purchase_date']\n",
+ " tickets_kpi = (tickets_information_copy[['event_type_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'purchase_date_max', 'amount']]\n",
+ " .groupby([ 'customer_id']) # 'event_type_id',\n",
+ " .agg({'ticket_id': 'count', \n",
+ " 'amount' : 'sum',\n",
+ " 'supplier_name': 'nunique',\n",
+ " 'purchase_date_max' : 'max',\n",
+ " 'purchase_date' : 'min'})\n",
+ " .reset_index()\n",
+ " )\n",
+ " \n",
+ " tickets_kpi.rename(columns = {'ticket_id' : 'nb_tickets', \n",
+ " 'amount' : 'total_amount',\n",
+ " 'supplier_name' : 'nb_suppliers', \n",
+ " 'purchase_date' : 'purchase_date_min'}, inplace = True)\n",
+ " \n",
+ " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n",
+ " \n",
+ " return tickets_kpi\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "5882234a-1ed5-4269-87a6-0d75613476e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_products_purchased_reduced)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " nb_tickets | \n",
+ " total_amount | \n",
+ " nb_suppliers | \n",
+ " purchase_date_max | \n",
+ " purchase_date_min | \n",
+ " time_between_purchase | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1256574 | \n",
+ " 8830567.5 | \n",
+ " 7 | \n",
+ " 2023-11-08 15:59:45+00:00 | \n",
+ " 2013-06-10 10:37:58+00:00 | \n",
+ " 3803 days 05:21:47 | \n",
+ "
\n",
+ " \n",
+ " 3615 | \n",
+ " 6733 | \n",
+ " 35527 | \n",
+ " 1188.0 | \n",
+ " 4 | \n",
+ " 2023-11-03 09:42:40+00:00 | \n",
+ " 2015-09-09 13:48:38+00:00 | \n",
+ " 2976 days 19:54:02 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 41 | \n",
+ " 16263 | \n",
+ " 37642.0 | \n",
+ " 6 | \n",
+ " 2023-10-25 09:13:16+00:00 | \n",
+ " 2014-01-23 16:56:57+00:00 | \n",
+ " 3561 days 16:16:19 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 5871 | \n",
+ " 38767.0 | \n",
+ " 2 | \n",
+ " 2023-11-04 13:46:59+00:00 | \n",
+ " 2018-04-04 07:46:31+00:00 | \n",
+ " 2040 days 06:00:28 | \n",
+ "
\n",
+ " \n",
+ " 32809 | \n",
+ " 63488 | \n",
+ " 5851 | \n",
+ " 64350.0 | \n",
+ " 1 | \n",
+ " 2022-08-25 13:08:38+00:00 | \n",
+ " 2020-08-18 08:32:57+00:00 | \n",
+ " 737 days 04:35:41 | \n",
+ "
\n",
+ " \n",
+ " 3708 | \n",
+ " 6916 | \n",
+ " 5482 | \n",
+ " 51489.5 | \n",
+ " 2 | \n",
+ " 2021-08-26 12:49:17+00:00 | \n",
+ " 2018-03-26 11:13:43+00:00 | \n",
+ " 1249 days 01:35:34 | \n",
+ "
\n",
+ " \n",
+ " 32616 | \n",
+ " 63194 | \n",
+ " 4507 | \n",
+ " 13232.0 | \n",
+ " 3 | \n",
+ " 2022-09-07 12:55:33+00:00 | \n",
+ " 2017-11-28 13:52:15+00:00 | \n",
+ " 1743 days 23:03:18 | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " 81 | \n",
+ " 3562 | \n",
+ " 38746.0 | \n",
+ " 1 | \n",
+ " 2022-08-30 11:51:34+00:00 | \n",
+ " 2017-01-05 13:04:58+00:00 | \n",
+ " 2062 days 22:46:36 | \n",
+ "
\n",
+ " \n",
+ " 35295 | \n",
+ " 84002 | \n",
+ " 3403 | \n",
+ " 19830.0 | \n",
+ " 4 | \n",
+ " 2023-11-06 15:59:22+00:00 | \n",
+ " 2021-05-28 10:22:33+00:00 | \n",
+ " 892 days 05:36:49 | \n",
+ "
\n",
+ " \n",
+ " 3377 | \n",
+ " 5618 | \n",
+ " 3294 | \n",
+ " 31684.5 | \n",
+ " 1 | \n",
+ " 2022-02-24 07:47:20+00:00 | \n",
+ " 2018-10-25 11:04:24+00:00 | \n",
+ " 1217 days 20:42:56 | \n",
+ "
\n",
+ " \n",
+ " 30011 | \n",
+ " 59259 | \n",
+ " 2591 | \n",
+ " 4350.0 | \n",
+ " 3 | \n",
+ " 2023-06-12 14:05:19+00:00 | \n",
+ " 2019-11-25 08:52:48+00:00 | \n",
+ " 1295 days 05:12:31 | \n",
+ "
\n",
+ " \n",
+ " 34937 | \n",
+ " 74876 | \n",
+ " 2571 | \n",
+ " 2600.0 | \n",
+ " 2 | \n",
+ " 2023-10-02 08:13:05+00:00 | \n",
+ " 2018-02-08 12:54:01+00:00 | \n",
+ " 2061 days 19:19:04 | \n",
+ "
\n",
+ " \n",
+ " 270 | \n",
+ " 295 | \n",
+ " 2570 | \n",
+ " 17678.5 | \n",
+ " 6 | \n",
+ " 2023-10-16 10:19:22+00:00 | \n",
+ " 2014-01-24 15:16:17+00:00 | \n",
+ " 3551 days 19:03:05 | \n",
+ "
\n",
+ " \n",
+ " 866 | \n",
+ " 1221 | \n",
+ " 2320 | \n",
+ " 9652.0 | \n",
+ " 2 | \n",
+ " 2022-09-19 12:55:15+00:00 | \n",
+ " 2017-03-29 08:00:09+00:00 | \n",
+ " 2000 days 04:55:06 | \n",
+ "
\n",
+ " \n",
+ " 1022 | \n",
+ " 1429 | \n",
+ " 2249 | \n",
+ " 3500.0 | \n",
+ " 4 | \n",
+ " 2023-11-06 08:30:37+00:00 | \n",
+ " 2014-12-03 14:56:38+00:00 | \n",
+ " 3259 days 17:33:59 | \n",
+ "
\n",
+ " \n",
+ " 3922 | \n",
+ " 7249 | \n",
+ " 1827 | \n",
+ " 13385.0 | \n",
+ " 1 | \n",
+ " 2021-10-26 12:28:40+00:00 | \n",
+ " 2019-05-07 12:34:56+00:00 | \n",
+ " 902 days 23:53:44 | \n",
+ "
\n",
+ " \n",
+ " 54425 | \n",
+ " 1070539 | \n",
+ " 1800 | \n",
+ " 19800.0 | \n",
+ " 1 | \n",
+ " 2022-07-25 12:49:27+00:00 | \n",
+ " 2022-05-02 16:09:03+00:00 | \n",
+ " 83 days 20:40:24 | \n",
+ "
\n",
+ " \n",
+ " 69520 | \n",
+ " 1216801 | \n",
+ " 1623 | \n",
+ " 12562.0 | \n",
+ " 2 | \n",
+ " 2023-09-29 16:34:38+00:00 | \n",
+ " 2023-06-16 14:16:04+00:00 | \n",
+ " 105 days 02:18:34 | \n",
+ "
\n",
+ " \n",
+ " 30056 | \n",
+ " 59330 | \n",
+ " 1551 | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " 2023-11-06 10:22:14+00:00 | \n",
+ " 2018-02-02 08:53:51+00:00 | \n",
+ " 2103 days 01:28:23 | \n",
+ "
\n",
+ " \n",
+ " 3243 | \n",
+ " 5441 | \n",
+ " 1544 | \n",
+ " 14133.0 | \n",
+ " 2 | \n",
+ " 2022-09-22 08:21:47+00:00 | \n",
+ " 2017-12-14 12:50:23+00:00 | \n",
+ " 1742 days 19:31:24 | \n",
+ "
\n",
+ " \n",
+ " 55195 | \n",
+ " 1084435 | \n",
+ " 1500 | \n",
+ " 16500.0 | \n",
+ " 1 | \n",
+ " 2022-09-27 14:32:13+00:00 | \n",
+ " 2022-05-18 08:04:41+00:00 | \n",
+ " 132 days 06:27:32 | \n",
+ "
\n",
+ " \n",
+ " 28983 | \n",
+ " 57816 | \n",
+ " 1485 | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " 2023-05-22 07:30:55+00:00 | \n",
+ " 2019-01-21 14:19:18+00:00 | \n",
+ " 1581 days 17:11:37 | \n",
+ "
\n",
+ " \n",
+ " 2231 | \n",
+ " 2942 | \n",
+ " 1307 | \n",
+ " 100.0 | \n",
+ " 2 | \n",
+ " 2023-06-29 09:33:58+00:00 | \n",
+ " 2017-10-25 15:06:58+00:00 | \n",
+ " 2072 days 18:27:00 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 24 | \n",
+ " 1266 | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " 2023-10-19 07:20:48+00:00 | \n",
+ " 2015-09-30 16:07:52+00:00 | \n",
+ " 2940 days 15:12:56 | \n",
+ "
\n",
+ " \n",
+ " 4513 | \n",
+ " 9592 | \n",
+ " 1211 | \n",
+ " 62.0 | \n",
+ " 4 | \n",
+ " 2023-10-17 09:39:40+00:00 | \n",
+ " 2018-02-25 07:17:19+00:00 | \n",
+ " 2060 days 02:22:21 | \n",
+ "
\n",
+ " \n",
+ " 2936 | \n",
+ " 5059 | \n",
+ " 1186 | \n",
+ " 6308.0 | \n",
+ " 3 | \n",
+ " 2023-05-22 13:41:22+00:00 | \n",
+ " 2018-02-01 11:16:51+00:00 | \n",
+ " 1936 days 02:24:31 | \n",
+ "
\n",
+ " \n",
+ " 11484 | \n",
+ " 25100 | \n",
+ " 1123 | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " 2021-07-13 07:39:57+00:00 | \n",
+ " 2015-12-21 15:38:05+00:00 | \n",
+ " 2030 days 16:01:52 | \n",
+ "
\n",
+ " \n",
+ " 934 | \n",
+ " 1326 | \n",
+ " 1098 | \n",
+ " 798.0 | \n",
+ " 3 | \n",
+ " 2023-02-01 08:39:45+00:00 | \n",
+ " 2018-02-13 13:13:48+00:00 | \n",
+ " 1813 days 19:25:57 | \n",
+ "
\n",
+ " \n",
+ " 30156 | \n",
+ " 59490 | \n",
+ " 1088 | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " 2023-10-05 08:23:50+00:00 | \n",
+ " 2019-12-06 12:59:20+00:00 | \n",
+ " 1398 days 19:24:30 | \n",
+ "
\n",
+ " \n",
+ " 36478 | \n",
+ " 251268 | \n",
+ " 1086 | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " 2023-06-30 07:22:46+00:00 | \n",
+ " 2018-02-02 09:06:22+00:00 | \n",
+ " 1973 days 22:16:24 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id nb_tickets total_amount nb_suppliers \\\n",
+ "0 1 1256574 8830567.5 7 \n",
+ "3615 6733 35527 1188.0 4 \n",
+ "39 41 16263 37642.0 6 \n",
+ "11 12 5871 38767.0 2 \n",
+ "32809 63488 5851 64350.0 1 \n",
+ "3708 6916 5482 51489.5 2 \n",
+ "32616 63194 4507 13232.0 3 \n",
+ "78 81 3562 38746.0 1 \n",
+ "35295 84002 3403 19830.0 4 \n",
+ "3377 5618 3294 31684.5 1 \n",
+ "30011 59259 2591 4350.0 3 \n",
+ "34937 74876 2571 2600.0 2 \n",
+ "270 295 2570 17678.5 6 \n",
+ "866 1221 2320 9652.0 2 \n",
+ "1022 1429 2249 3500.0 4 \n",
+ "3922 7249 1827 13385.0 1 \n",
+ "54425 1070539 1800 19800.0 1 \n",
+ "69520 1216801 1623 12562.0 2 \n",
+ "30056 59330 1551 0.0 1 \n",
+ "3243 5441 1544 14133.0 2 \n",
+ "55195 1084435 1500 16500.0 1 \n",
+ "28983 57816 1485 0.0 2 \n",
+ "2231 2942 1307 100.0 2 \n",
+ "23 24 1266 0.0 2 \n",
+ "4513 9592 1211 62.0 4 \n",
+ "2936 5059 1186 6308.0 3 \n",
+ "11484 25100 1123 0.0 1 \n",
+ "934 1326 1098 798.0 3 \n",
+ "30156 59490 1088 0.0 1 \n",
+ "36478 251268 1086 0.0 2 \n",
+ "\n",
+ " purchase_date_max purchase_date_min \\\n",
+ "0 2023-11-08 15:59:45+00:00 2013-06-10 10:37:58+00:00 \n",
+ "3615 2023-11-03 09:42:40+00:00 2015-09-09 13:48:38+00:00 \n",
+ "39 2023-10-25 09:13:16+00:00 2014-01-23 16:56:57+00:00 \n",
+ "11 2023-11-04 13:46:59+00:00 2018-04-04 07:46:31+00:00 \n",
+ "32809 2022-08-25 13:08:38+00:00 2020-08-18 08:32:57+00:00 \n",
+ "3708 2021-08-26 12:49:17+00:00 2018-03-26 11:13:43+00:00 \n",
+ "32616 2022-09-07 12:55:33+00:00 2017-11-28 13:52:15+00:00 \n",
+ "78 2022-08-30 11:51:34+00:00 2017-01-05 13:04:58+00:00 \n",
+ "35295 2023-11-06 15:59:22+00:00 2021-05-28 10:22:33+00:00 \n",
+ "3377 2022-02-24 07:47:20+00:00 2018-10-25 11:04:24+00:00 \n",
+ "30011 2023-06-12 14:05:19+00:00 2019-11-25 08:52:48+00:00 \n",
+ "34937 2023-10-02 08:13:05+00:00 2018-02-08 12:54:01+00:00 \n",
+ "270 2023-10-16 10:19:22+00:00 2014-01-24 15:16:17+00:00 \n",
+ "866 2022-09-19 12:55:15+00:00 2017-03-29 08:00:09+00:00 \n",
+ "1022 2023-11-06 08:30:37+00:00 2014-12-03 14:56:38+00:00 \n",
+ "3922 2021-10-26 12:28:40+00:00 2019-05-07 12:34:56+00:00 \n",
+ "54425 2022-07-25 12:49:27+00:00 2022-05-02 16:09:03+00:00 \n",
+ "69520 2023-09-29 16:34:38+00:00 2023-06-16 14:16:04+00:00 \n",
+ "30056 2023-11-06 10:22:14+00:00 2018-02-02 08:53:51+00:00 \n",
+ "3243 2022-09-22 08:21:47+00:00 2017-12-14 12:50:23+00:00 \n",
+ "55195 2022-09-27 14:32:13+00:00 2022-05-18 08:04:41+00:00 \n",
+ "28983 2023-05-22 07:30:55+00:00 2019-01-21 14:19:18+00:00 \n",
+ "2231 2023-06-29 09:33:58+00:00 2017-10-25 15:06:58+00:00 \n",
+ "23 2023-10-19 07:20:48+00:00 2015-09-30 16:07:52+00:00 \n",
+ "4513 2023-10-17 09:39:40+00:00 2018-02-25 07:17:19+00:00 \n",
+ "2936 2023-05-22 13:41:22+00:00 2018-02-01 11:16:51+00:00 \n",
+ "11484 2021-07-13 07:39:57+00:00 2015-12-21 15:38:05+00:00 \n",
+ "934 2023-02-01 08:39:45+00:00 2018-02-13 13:13:48+00:00 \n",
+ "30156 2023-10-05 08:23:50+00:00 2019-12-06 12:59:20+00:00 \n",
+ "36478 2023-06-30 07:22:46+00:00 2018-02-02 09:06:22+00:00 \n",
+ "\n",
+ " time_between_purchase \n",
+ "0 3803 days 05:21:47 \n",
+ "3615 2976 days 19:54:02 \n",
+ "39 3561 days 16:16:19 \n",
+ "11 2040 days 06:00:28 \n",
+ "32809 737 days 04:35:41 \n",
+ "3708 1249 days 01:35:34 \n",
+ "32616 1743 days 23:03:18 \n",
+ "78 2062 days 22:46:36 \n",
+ "35295 892 days 05:36:49 \n",
+ "3377 1217 days 20:42:56 \n",
+ "30011 1295 days 05:12:31 \n",
+ "34937 2061 days 19:19:04 \n",
+ "270 3551 days 19:03:05 \n",
+ "866 2000 days 04:55:06 \n",
+ "1022 3259 days 17:33:59 \n",
+ "3922 902 days 23:53:44 \n",
+ "54425 83 days 20:40:24 \n",
+ "69520 105 days 02:18:34 \n",
+ "30056 2103 days 01:28:23 \n",
+ "3243 1742 days 19:31:24 \n",
+ "55195 132 days 06:27:32 \n",
+ "28983 1581 days 17:11:37 \n",
+ "2231 2072 days 18:27:00 \n",
+ "23 2940 days 15:12:56 \n",
+ "4513 2060 days 02:22:21 \n",
+ "2936 1936 days 02:24:31 \n",
+ "11484 2030 days 16:01:52 \n",
+ "934 1813 days 19:25:57 \n",
+ "30156 1398 days 19:24:30 \n",
+ "36478 1973 days 22:16:24 "
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_tickets_kpi.sort_values(by='nb_tickets', ascending=False).head(30)"
]
},
{
@@ -2019,28 +2781,377 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 39,
"id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
"metadata": {},
"outputs": [],
"source": [
- "# Fusion liée au product\n",
- "df1_products_purchased = pd.merge(df1_tickets_kpi, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n",
- "\n",
- "# Fusion liée au customer\n",
- "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n",
- "\n",
- "# Fusion product et customer\n",
- "df1_customer_product = pd.merge(df1_customer, df1_products_purchased, on = 'customer_id', how = 'left')"
+ "# Fusion avec KPI liés au customer\n",
+ "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 40,
+ "id": "9740d64a-e5eb-4967-a534-ca6177546465",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " birthdate | \n",
+ " street_id | \n",
+ " is_partner | \n",
+ " gender | \n",
+ " is_email_true | \n",
+ " opt_in | \n",
+ " structure_id | \n",
+ " profession | \n",
+ " language | \n",
+ " ... | \n",
+ " average_ticket_basket | \n",
+ " total_price | \n",
+ " purchase_count | \n",
+ " first_buying_date | \n",
+ " country | \n",
+ " age | \n",
+ " tenant_id | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 12751 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 12825 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11261 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 13071 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 653061 | \n",
+ " NaN | \n",
+ " 10 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " 80.0 | \n",
+ " 2.0 | \n",
+ " 0 days 19:53:02.500000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 28 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id birthdate street_id is_partner gender is_email_true \\\n",
+ "0 12751 NaN 2 False 1 True \n",
+ "1 12825 NaN 2 False 2 True \n",
+ "2 11261 NaN 2 False 1 True \n",
+ "3 13071 NaN 2 False 2 True \n",
+ "4 653061 NaN 10 False 2 True \n",
+ "\n",
+ " opt_in structure_id profession language ... average_ticket_basket \\\n",
+ "0 True NaN NaN NaN ... NaN \n",
+ "1 True NaN NaN NaN ... NaN \n",
+ "2 True NaN NaN NaN ... NaN \n",
+ "3 True NaN NaN NaN ... NaN \n",
+ "4 False NaN NaN NaN ... NaN \n",
+ "\n",
+ " total_price purchase_count first_buying_date country age tenant_id \\\n",
+ "0 NaN 0 NaT fr NaN 1311 \n",
+ "1 NaN 0 NaT fr NaN 1311 \n",
+ "2 NaN 0 NaT fr NaN 1311 \n",
+ "3 NaN 0 NaT fr NaN 1311 \n",
+ "4 NaN 0 NaT NaN NaN 1311 \n",
+ "\n",
+ " nb_campaigns nb_campaigns_opened time_to_open \n",
+ "0 NaN NaN NaT \n",
+ "1 NaN NaN NaT \n",
+ "2 NaN NaN NaT \n",
+ "3 NaN NaN NaT \n",
+ "4 80.0 2.0 0 days 19:53:02.500000 \n",
+ "\n",
+ "[5 rows x 28 columns]"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_customer.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "b5c4418c-ad2e-4bb9-bd5c-3b769e9c87d4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " birthdate | \n",
+ " street_id | \n",
+ " is_partner | \n",
+ " gender | \n",
+ " is_email_true | \n",
+ " opt_in | \n",
+ " structure_id | \n",
+ " profession | \n",
+ " language | \n",
+ " mcp_contact_id | \n",
+ " last_buying_date | \n",
+ " max_price | \n",
+ " ticket_sum | \n",
+ " average_price | \n",
+ " fidelity | \n",
+ " average_purchase_delay | \n",
+ " average_price_basket | \n",
+ " average_ticket_basket | \n",
+ " total_price | \n",
+ " purchase_count | \n",
+ " first_buying_date | \n",
+ " country | \n",
+ " age | \n",
+ " tenant_id | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 58201 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2023-11-08 03:20:07 | \n",
+ " 45.0 | \n",
+ " 1254775 | \n",
+ " 7.030122 | \n",
+ " 330831 | \n",
+ " -67.790969 | \n",
+ " 13.75153 | \n",
+ " 1.956087 | \n",
+ " 8821221.5 | \n",
+ " 641472 | \n",
+ " 2013-06-10 10:37:58+00:00 | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id birthdate street_id is_partner gender is_email_true \\\n",
+ "58201 1 NaN 2 False 2 True \n",
+ "\n",
+ " opt_in structure_id profession language mcp_contact_id \\\n",
+ "58201 False NaN NaN NaN NaN \n",
+ "\n",
+ " last_buying_date max_price ticket_sum average_price fidelity \\\n",
+ "58201 2023-11-08 03:20:07 45.0 1254775 7.030122 330831 \n",
+ "\n",
+ " average_purchase_delay average_price_basket average_ticket_basket \\\n",
+ "58201 -67.790969 13.75153 1.956087 \n",
+ "\n",
+ " total_price purchase_count first_buying_date country age \\\n",
+ "58201 8821221.5 641472 2013-06-10 10:37:58+00:00 fr NaN \n",
+ "\n",
+ " tenant_id nb_campaigns nb_campaigns_opened time_to_open \n",
+ "58201 1311 NaN NaN NaT "
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.set_option('display.max_columns', None)\n",
+ "\n",
+ "\n",
+ "df1_customer[df1_customer['customer_id'] == 1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
"id": "1e42a790-b215-4107-a969-85005da06ebd",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "# Fusion avec KPI liés au comportement d'achat\n",
+ "# df1_customer_product = pd.merge(df1_products_purchased_reduced, df1_products_purchased, on = 'customer_id', how = 'outer')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# df1_customer_product"
+ ]
}
],
"metadata": {
diff --git a/Exploration_billet_AJ.ipynb b/Exploration_billet_AJ.ipynb
index 6af213e..344dd7b 100644
--- a/Exploration_billet_AJ.ipynb
+++ b/Exploration_billet_AJ.ipynb
@@ -143,7 +143,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 6,
"id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
"metadata": {},
"outputs": [
@@ -151,7 +151,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_683/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ "/tmp/ipykernel_15285/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in)\n"
]
}
@@ -2731,7 +2731,7 @@
},
{
"cell_type": "code",
- "execution_count": 60,
+ "execution_count": 8,
"id": "da5d4708-7147-4cc8-8686-52d4bcba5a7a",
"metadata": {},
"outputs": [
@@ -2739,7 +2739,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_619/2625134041.py:3: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_15285/2625134041.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -2795,11 +2795,9 @@
},
{
"cell_type": "code",
- "execution_count": 57,
- "id": "8072bbb7-1360-4882-bb2b-2f43b6beea0d",
- "metadata": {
- "scrolled": true
- },
+ "execution_count": 10,
+ "id": "c74746de-0bf4-4b83-9a75-f1d3183abf1c",
+ "metadata": {},
"outputs": [
{
"data": {
@@ -2831,226 +2829,42 @@
" \n",
" \n",
" \n",
- " 8793 | \n",
- " 4584599 | \n",
- " 1 | \n",
- " consentement optin jeune public | \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " DDCP PROMO Réseau livres | \n",
" False | \n",
" manual_static_filter | \n",
"
\n",
" \n",
- " 13249 | \n",
- " 4567465 | \n",
- " 1 | \n",
- " DDCP rentrée culturelle 2023 | \n",
+ " 1 | \n",
+ " 210571 | \n",
+ " 2412 | \n",
+ " DDCP PROMO Réseau livres | \n",
" False | \n",
" manual_static_filter | \n",
"
\n",
" \n",
- " 21424 | \n",
- " 4544805 | \n",
- " 1 | \n",
- " spectateurs cine dimanche_cine concert_2122 | \n",
+ " 2 | \n",
+ " 210572 | \n",
+ " 4536 | \n",
+ " DDCP PROMO Réseau livres | \n",
" False | \n",
" manual_static_filter | \n",
"
\n",
" \n",
- " 21665 | \n",
- " 4544911 | \n",
- " 1 | \n",
- " DDCP Cine 2023 | \n",
+ " 3 | \n",
+ " 210573 | \n",
+ " 6736 | \n",
+ " DDCP PROMO Réseau livres | \n",
" False | \n",
" manual_static_filter | \n",
"
\n",
" \n",
- " 22811 | \n",
- " 4545766 | \n",
- " 1 | \n",
- " DDCP OLBJ! 2023 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 57305 | \n",
- " 4457909 | \n",
- " 1 | \n",
- " ddcp_promo_visiteurs occasionnels_musee_8mois | \n",
- " False | \n",
- " manual_dynamic_filter | \n",
- "
\n",
- " \n",
- " 58843 | \n",
- " 3688872 | \n",
- " 1 | \n",
- " DDCP promo livemag | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 66813 | \n",
- " 4313646 | \n",
- " 1 | \n",
- " DDCP spectateurs Classique mais pas que 2022 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 68367 | \n",
- " 4547662 | \n",
- " 1 | \n",
- " ddcp_promo_musee_au moins 3 achats_dps8mois | \n",
- " False | \n",
- " manual_dynamic_filter | \n",
- "
\n",
- " \n",
- " 77320 | \n",
- " 4285520 | \n",
- " 1 | \n",
- " DDCP spectateurs Iminente | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 84350 | \n",
- " 4037805 | \n",
- " 1 | \n",
- " DDCP spectateurs Marseille Jazz 18-19-21 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 85383 | \n",
- " 4569504 | \n",
- " 1 | \n",
- " DDCP rendez-vous de septembre offre spéciale | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 92868 | \n",
- " 4433064 | \n",
- " 1 | \n",
- " ddcp_promo_plein air_ateliers_jardins | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 99670 | \n",
- " 3858684 | \n",
- " 1 | \n",
- " Acid Arab | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 105477 | \n",
- " 4321810 | \n",
- " 1 | \n",
- " Arenametrix_bascule tel vers sib | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 169513 | \n",
- " 3697992 | \n",
- " 1 | \n",
- " ddcp_achats billets nb dps 19052021 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 214421 | \n",
- " 2925324 | \n",
- " 1 | \n",
- " consentement optout scolaires | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 234546 | \n",
- " 4575957 | \n",
- " 1 | \n",
- " Portrait de Leila shahid | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 259808 | \n",
- " 3722259 | \n",
- " 1 | \n",
- " consentement optin b2b | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 274380 | \n",
- " 4510423 | \n",
- " 1 | \n",
- " DDCP_marseille_jazz_2023 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 307511 | \n",
- " 5174466 | \n",
- " 1 | \n",
- " ddcp actoral 21-22 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 357509 | \n",
- " 4442526 | \n",
- " 1 | \n",
- " ddcp musique barvalo | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 392920 | \n",
- " 4390642 | \n",
- " 1 | \n",
- " ddcp_md_promo_spectateurs theatre contempo | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 449620 | \n",
- " 4411897 | \n",
- " 1 | \n",
- " FORMATION _ acheteurs optin last year | \n",
- " False | \n",
- " manual_dynamic_filter | \n",
- "
\n",
- " \n",
- " 503809 | \n",
- " 4734591 | \n",
- " 1 | \n",
- " consentement optin mediation specialisee | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 651222 | \n",
- " 3554426 | \n",
- " 1 | \n",
- " consentement optin b2c | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 654246 | \n",
- " 5182212 | \n",
- " 1 | \n",
- " DDCP spectateurs Festival de Marseille 2023 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 654395 | \n",
- " 5182456 | \n",
- " 1 | \n",
- " rencontres_echelle_spectateurs_2021_2023 | \n",
+ " 4 | \n",
+ " 210574 | \n",
+ " 38210 | \n",
+ " DDCP PROMO Réseau livres | \n",
" False | \n",
" manual_static_filter | \n",
"
\n",
@@ -3059,80 +2873,241 @@
""
],
"text/plain": [
- " id customer_id target_name \\\n",
- "8793 4584599 1 consentement optin jeune public \n",
- "13249 4567465 1 DDCP rentrée culturelle 2023 \n",
- "21424 4544805 1 spectateurs cine dimanche_cine concert_2122 \n",
- "21665 4544911 1 DDCP Cine 2023 \n",
- "22811 4545766 1 DDCP OLBJ! 2023 \n",
- "57305 4457909 1 ddcp_promo_visiteurs occasionnels_musee_8mois \n",
- "58843 3688872 1 DDCP promo livemag \n",
- "66813 4313646 1 DDCP spectateurs Classique mais pas que 2022 \n",
- "68367 4547662 1 ddcp_promo_musee_au moins 3 achats_dps8mois \n",
- "77320 4285520 1 DDCP spectateurs Iminente \n",
- "84350 4037805 1 DDCP spectateurs Marseille Jazz 18-19-21 \n",
- "85383 4569504 1 DDCP rendez-vous de septembre offre spéciale \n",
- "92868 4433064 1 ddcp_promo_plein air_ateliers_jardins \n",
- "99670 3858684 1 Acid Arab \n",
- "105477 4321810 1 Arenametrix_bascule tel vers sib \n",
- "169513 3697992 1 ddcp_achats billets nb dps 19052021 \n",
- "214421 2925324 1 consentement optout scolaires \n",
- "234546 4575957 1 Portrait de Leila shahid \n",
- "259808 3722259 1 consentement optin b2b \n",
- "274380 4510423 1 DDCP_marseille_jazz_2023 \n",
- "307511 5174466 1 ddcp actoral 21-22 \n",
- "357509 4442526 1 ddcp musique barvalo \n",
- "392920 4390642 1 ddcp_md_promo_spectateurs theatre contempo \n",
- "449620 4411897 1 FORMATION _ acheteurs optin last year \n",
- "503809 4734591 1 consentement optin mediation specialisee \n",
- "651222 3554426 1 consentement optin b2c \n",
- "654246 5182212 1 DDCP spectateurs Festival de Marseille 2023 \n",
- "654395 5182456 1 rencontres_echelle_spectateurs_2021_2023 \n",
+ " id customer_id target_name target_type_is_import \\\n",
+ "0 1184824 645400 DDCP PROMO Réseau livres False \n",
+ "1 210571 2412 DDCP PROMO Réseau livres False \n",
+ "2 210572 4536 DDCP PROMO Réseau livres False \n",
+ "3 210573 6736 DDCP PROMO Réseau livres False \n",
+ "4 210574 38210 DDCP PROMO Réseau livres False \n",
"\n",
- " target_type_is_import target_type_name \n",
- "8793 False manual_static_filter \n",
- "13249 False manual_static_filter \n",
- "21424 False manual_static_filter \n",
- "21665 False manual_static_filter \n",
- "22811 False manual_static_filter \n",
- "57305 False manual_dynamic_filter \n",
- "58843 False manual_static_filter \n",
- "66813 False manual_static_filter \n",
- "68367 False manual_dynamic_filter \n",
- "77320 False manual_static_filter \n",
- "84350 False manual_static_filter \n",
- "85383 False manual_static_filter \n",
- "92868 False manual_static_filter \n",
- "99670 False manual_static_filter \n",
- "105477 False manual_static_filter \n",
- "169513 False manual_static_filter \n",
- "214421 False manual_static_filter \n",
- "234546 False manual_static_filter \n",
- "259808 False manual_static_filter \n",
- "274380 False manual_static_filter \n",
- "307511 False manual_static_filter \n",
- "357509 False manual_static_filter \n",
- "392920 False manual_static_filter \n",
- "449620 False manual_dynamic_filter \n",
- "503809 False manual_static_filter \n",
- "651222 False manual_static_filter \n",
- "654246 False manual_static_filter \n",
- "654395 False manual_static_filter "
+ " target_type_name \n",
+ "0 manual_static_filter \n",
+ "1 manual_static_filter \n",
+ "2 manual_static_filter \n",
+ "3 manual_static_filter \n",
+ "4 manual_static_filter "
]
},
- "execution_count": 57,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df1_targets_full[df1_targets_full['customer_id'] == 1]"
+ "df1_targets_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "47c55fa0-b2f3-46f9-9abf-c4ab66bd9fcb",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package stopwords is already up-to-date!\n",
+ "[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Catégorisation des target_name\n",
+ "import pandas as pd\n",
+ "import nltk\n",
+ "from nltk.tokenize import word_tokenize\n",
+ "from nltk.corpus import stopwords\n",
+ "from nltk.stem import WordNetLemmatizer\n",
+ "from nltk.probability import FreqDist\n",
+ "\n",
+ "# Téléchargement des ressources nécessaires\n",
+ "nltk.download('punkt')\n",
+ "nltk.download('stopwords')\n",
+ "nltk.download('wordnet')\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "8af1aeb9-ebdd-4286-a14c-3b7d801ea172",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Mots les plus fréquents:\n",
+ "consentement: 550777\n",
+ "optin: 463579\n",
+ "jeune: 155103\n",
+ "public: 155103\n",
+ "mediation: 150001\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Définition des fonctions de tokenisation, suppression des mots vides et lemmatisation\n",
+ "def preprocess_text(texte):\n",
+ " # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
+ " texte_concat = ' '.join(texte)\n",
+ " \n",
+ " # Tokenisation des mots\n",
+ " tokens = word_tokenize(texte_concat.lower())\n",
+ " \n",
+ " # Suppression des mots vides (stopwords)\n",
+ " stop_words = set(stopwords.words('french'))\n",
+ " filtered_tokens = [word for word in tokens if word not in stop_words]\n",
+ " \n",
+ " # Lemmatisation des mots\n",
+ " lemmatizer = WordNetLemmatizer()\n",
+ " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
+ " \n",
+ " return lemmatized_tokens\n",
+ "\n",
+ "\n",
+ "# Appliquer le prétraitement à la colonne de texte\n",
+ "df1_targets_full['target_name_tokened'] = df1_targets_full['target_name'].apply(preprocess_text)\n",
+ "\n",
+ "# Concaténer les listes de mots pour obtenir une liste de tous les mots dans le corpus\n",
+ "all_words = [word for tokens in df1_targets_full['target_name_tokened'] for word in tokens]\n",
+ "\n",
+ "# Calculer la fréquence des mots\n",
+ "freq_dist = FreqDist(all_words)\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "ceb069e5-76c9-46e4-9ea7-8c16eb4ed3cd",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Mots les plus fréquents:\n",
+ "consentement: 550777\n",
+ "optin: 463579\n",
+ "jeune: 155103\n",
+ "public: 155103\n",
+ "mediation: 150001\n",
+ "specialisee: 150001\n",
+ "b2c: 143432\n",
+ "optout: 97683\n",
+ "newsletter: 56022\n",
+ "(: 46084\n",
+ "): 46084\n",
+ "inscrits: 42296\n",
+ "nl: 42294\n",
+ "générale: 41037\n",
+ "generale: 40950\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Affichage des mots les plus fréquents\n",
+ "print(\"Mots les plus fréquents:\")\n",
+ "for mot, freq in freq_dist.most_common(15):\n",
+ " print(f\"{mot}: {freq}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "8bffef87-542e-4775-bc7c-2c0323fda581",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " texte \\\n",
+ "0 Le chat noir mange une souris. \n",
+ "1 Le chien blanc aboie. \n",
+ "\n",
+ " texte_preprocessed \n",
+ "0 [e, h, a, o, i, r, a, g, e, u, e, o, u, r, i, .] \n",
+ "1 [e, h, i, e, b, a, a, b, o, i, e, .] \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package stopwords is already up-to-date!\n",
+ "[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import nltk\n",
+ "from nltk.tokenize import word_tokenize\n",
+ "from nltk.corpus import stopwords\n",
+ "from nltk.stem import WordNetLemmatizer\n",
+ "\n",
+ "# Téléchargement des ressources nécessaires\n",
+ "nltk.download('punkt')\n",
+ "nltk.download('stopwords')\n",
+ "nltk.download('wordnet')\n",
+ "\n",
+ "# Création de la DataFrame d'exemple\n",
+ "data = {'texte': [\"Le chat noir mange une souris.\", \"Le chien blanc aboie.\"]}\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "# Fonction pour prétraiter le texte\n",
+ "def preprocess_text(texte):\n",
+ " # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
+ " texte_concat = ' '.join(texte)\n",
+ " \n",
+ " # Tokenisation des mots\n",
+ " tokens = word_tokenize(texte_concat.lower())\n",
+ " \n",
+ " # Suppression des mots vides (stopwords)\n",
+ " stop_words = set(stopwords.words('french'))\n",
+ " filtered_tokens = [word for word in tokens if word not in stop_words]\n",
+ " \n",
+ " # Lemmatisation des mots\n",
+ " lemmatizer = WordNetLemmatizer()\n",
+ " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
+ " \n",
+ " return lemmatized_tokens\n",
+ "\n",
+ "# Appliquer la fonction de prétraitement à la colonne de texte\n",
+ "df['texte_preprocessed'] = df['texte'].apply(preprocess_text)\n",
+ "\n",
+ "# Afficher le résultat\n",
+ "print(df)\n"
]
},
{
"cell_type": "markdown",
"id": "2f665824-a026-4acd-8358-b408a61854b4",
- "metadata": {},
+ "metadata": {
+ "jp-MarkdownHeadingCollapsed": true
+ },
"source": [
"## Campaign area"
]
@@ -3902,9 +3877,7 @@
{
"cell_type": "markdown",
"id": "96ea2523-38dc-47ef-a49e-2c2d9ad0b1c6",
- "metadata": {
- "jp-MarkdownHeadingCollapsed": true
- },
+ "metadata": {},
"source": [
"## Exploration variables"
]