diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb
index b95fec5..ced5bdf 100644
--- a/0_Cleaning_and_merge.ipynb
+++ b/0_Cleaning_and_merge.ipynb
@@ -59,7 +59,7 @@
},
{
"cell_type": "code",
- "execution_count": 93,
+ "execution_count": 3,
"id": "699664b9-eee4-4f8d-a207-e524526560c5",
"metadata": {},
"outputs": [],
@@ -78,7 +78,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_8302/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ "/tmp/ipykernel_42764/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in)\n"
]
}
@@ -242,23 +242,23 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_8302/3092893564.py:5: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/3092893564.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
- "/tmp/ipykernel_8302/3092893564.py:9: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/3092893564.py:9: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
- "/tmp/ipykernel_8302/3092893564.py:10: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/3092893564.py:10: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" suppliers['supplier_name'] = suppliers['supplier_name'].fillna('')\n",
- "/tmp/ipykernel_8302/3092893564.py:14: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/3092893564.py:14: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -439,7 +439,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_8302/3848597476.py:4: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/3848597476.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -451,165 +451,6 @@
"df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)"
]
},
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- "
\n",
- " \n",
- " target_name | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " consentement optin mediation specialisee | \n",
- " 150000 | \n",
- "
\n",
- " \n",
- " consentement optin jeune public | \n",
- " 149979 | \n",
- "
\n",
- " \n",
- " consentement optin b2c | \n",
- " 108909 | \n",
- "
\n",
- " \n",
- " Arenametrix_bascule tel vers sib | \n",
- " 35216 | \n",
- "
\n",
- " \n",
- " consentement optout b2c | \n",
- " 34523 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_id\n",
- "target_name \n",
- "consentement optin mediation specialisee 150000\n",
- "consentement optin jeune public 149979\n",
- "consentement optin b2c 108909\n",
- "Arenametrix_bascule tel vers sib 35216\n",
- "consentement optout b2c 34523"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False).head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "id": "4417ff51-f501-4ab9-a192-4ab75764a8ed",
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- "
\n",
- " \n",
- " target_name | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Arenametrix_bascule tel vers sib | \n",
- " 35216 | \n",
- "
\n",
- " \n",
- " Autres_interet_exposition | \n",
- " 1021 | \n",
- "
\n",
- " \n",
- " COM Inscrits NL générale (historique) | \n",
- " 23005 | \n",
- "
\n",
- " \n",
- " Contacts_prenomsdoubles | \n",
- " 11643 | \n",
- "
\n",
- " \n",
- " DDCP MD Procès du Siècle | \n",
- " 1684 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_id\n",
- "target_name \n",
- "Arenametrix_bascule tel vers sib 35216\n",
- "Autres_interet_exposition 1021\n",
- "COM Inscrits NL générale (historique) 23005\n",
- "Contacts_prenomsdoubles 11643\n",
- "DDCP MD Procès du Siècle 1684"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n",
- "df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000].head()"
- ]
- },
{
"cell_type": "markdown",
"id": "cdbb48b4-5e16-4ef4-8791-ed213d68d52f",
@@ -620,7 +461,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 13,
"id": "d883cc7b-ac43-4485-b86f-eaf595fbad85",
"metadata": {},
"outputs": [],
@@ -645,7 +486,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 14,
"id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f",
"metadata": {},
"outputs": [
@@ -653,19 +494,19 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_8302/1967867975.py:15: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
- "/tmp/ipykernel_8302/1967867975.py:15: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
- "/tmp/ipykernel_8302/1967867975.py:15: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@@ -680,7 +521,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 15,
"id": "c24457e7-3cad-451a-a65b-7373b656bd6e",
"metadata": {
"scrolled": true
@@ -800,7 +641,7 @@
"4 404 2021-03-27 23:00:00+00:00 "
]
},
- "execution_count": 17,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -814,7 +655,7 @@
"id": "56520a97-ede8-4920-a211-3b5b136af33d",
"metadata": {},
"source": [
- "## Create Products Table"
+ "## Product area"
]
},
{
@@ -827,7 +668,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 16,
"id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d",
"metadata": {},
"outputs": [],
@@ -838,7 +679,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 17,
"id": "607eb4b4-eed9-4b50-b823-f75c116dd37c",
"metadata": {},
"outputs": [],
@@ -909,7 +750,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 18,
"id": "350b09b9-451f-4d47-81fe-f34b892db027",
"metadata": {},
"outputs": [],
@@ -997,7 +838,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 19,
"id": "0fccc8ef-e575-4857-a401-94a7274394df",
"metadata": {},
"outputs": [
@@ -1150,7 +991,7 @@
"4 indiv entrées tp "
]
},
- "execution_count": 24,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -1162,7 +1003,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 20,
"id": "779d8aaf-6668-4f66-8852-847304407ea3",
"metadata": {},
"outputs": [
@@ -1332,7 +1173,7 @@
"4 spectacle vivant mucem "
]
},
- "execution_count": 25,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -1344,7 +1185,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 21,
"id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0",
"metadata": {},
"outputs": [
@@ -1443,7 +1284,7 @@
"4 37 383 269 1"
]
},
- "execution_count": 26,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -1463,7 +1304,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 22,
"id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba",
"metadata": {},
"outputs": [],
@@ -1491,7 +1332,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 23,
"id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951",
"metadata": {},
"outputs": [
@@ -1730,7 +1571,7 @@
"[5 rows x 21 columns]"
]
},
- "execution_count": 28,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -1742,13 +1583,16 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 24,
"id": "98f78cd5-b694-4cc6-b033-20170aa13e8d",
"metadata": {},
"outputs": [],
"source": [
"# Fusion liée au product\n",
- "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')"
+ "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n",
+ "\n",
+ "# Selection des variables d'intérêts\n",
+ "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]"
]
},
{
@@ -1759,772 +1603,6 @@
"# Construction des variables explicatives"
]
},
- {
- "cell_type": "markdown",
- "id": "b09c2964-bef9-489e-ad71-84959054531b",
- "metadata": {},
- "source": [
- "## Alexis' work"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 142,
- "id": "4ab1c0d2-0097-4669-b984-b6822c976740",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " event_type_id | \n",
- " avg_amount | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2 | \n",
- " 6.150659 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 4 | \n",
- " 7.762474 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 5 | \n",
- " 4.452618 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 6 | \n",
- " 6.439463 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " event_type_id avg_amount\n",
- "0 2 6.150659\n",
- "1 4 7.762474\n",
- "2 5 4.452618\n",
- "3 6 6.439463"
- ]
- },
- "execution_count": 142,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "avg_amount = (df1_products_purchased_reduced.groupby([\"event_type_id\"])\n",
- " .agg({\"amount\" : \"mean\"}).reset_index()\n",
- " .rename(columns = {'amount' : 'avg_amount'}))\n",
- "\n",
- "avg_amount"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 143,
- "id": "a9c62b39-389e-4dac-89a6-ac8a59fea58a",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- " event_type_id | \n",
- " nb_tickets | \n",
- " avg_amount | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- " 384226 | \n",
- " 6.150659 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 1 | \n",
- " 4 | \n",
- " 453242 | \n",
- " 7.762474 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 1 | \n",
- " 5 | \n",
- " 201750 | \n",
- " 4.452618 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1 | \n",
- " 6 | \n",
- " 217356 | \n",
- " 6.439463 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2 | \n",
- " 2 | \n",
- " 143 | \n",
- " 6.150659 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_id event_type_id nb_tickets avg_amount\n",
- "0 1 2 384226 6.150659\n",
- "1 1 4 453242 7.762474\n",
- "2 1 5 201750 4.452618\n",
- "3 1 6 217356 6.439463\n",
- "4 2 2 143 6.150659"
- ]
- },
- "execution_count": 143,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "nb_tickets = (df1_products_purchased_reduced.groupby([\"customer_id\", \"event_type_id\"])\n",
- " .agg({\"ticket_id\" : \"count\"}).reset_index()\n",
- " .rename(columns = {'ticket_id' : 'nb_tickets'})\n",
- " .merge(avg_amount, how='left', on='event_type_id'))\n",
- "nb_tickets.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 144,
- "id": "8710611c-7eb8-45ca-bdcc-009f4081f9e2",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- " birthdate | \n",
- " street_id | \n",
- " is_partner | \n",
- " gender | \n",
- " is_email_true | \n",
- " opt_in | \n",
- " structure_id | \n",
- " profession | \n",
- " language | \n",
- " mcp_contact_id | \n",
- " last_buying_date | \n",
- " max_price | \n",
- " ticket_sum | \n",
- " average_price | \n",
- " fidelity | \n",
- " average_purchase_delay | \n",
- " average_price_basket | \n",
- " average_ticket_basket | \n",
- " total_price | \n",
- " purchase_count | \n",
- " first_buying_date | \n",
- " country | \n",
- " age | \n",
- " tenant_id | \n",
- " nb_campaigns | \n",
- " nb_campaigns_opened | \n",
- " time_to_open | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 12751 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 1 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 12825 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 11261 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 1 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 13071 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 653061 | \n",
- " NaN | \n",
- " 10 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " 1311 | \n",
- " 80.0 | \n",
- " 2.0 | \n",
- " 0 days 19:53:02.500000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_id birthdate street_id is_partner gender is_email_true \\\n",
- "0 12751 NaN 2 False 1 True \n",
- "1 12825 NaN 2 False 2 True \n",
- "2 11261 NaN 2 False 1 True \n",
- "3 13071 NaN 2 False 2 True \n",
- "4 653061 NaN 10 False 2 True \n",
- "\n",
- " opt_in structure_id profession language mcp_contact_id last_buying_date \\\n",
- "0 True NaN NaN NaN NaN NaN \n",
- "1 True NaN NaN NaN NaN NaN \n",
- "2 True NaN NaN NaN NaN NaN \n",
- "3 True NaN NaN NaN NaN NaN \n",
- "4 False NaN NaN NaN NaN NaN \n",
- "\n",
- " max_price ticket_sum average_price fidelity average_purchase_delay \\\n",
- "0 NaN 0 0.0 0 NaN \n",
- "1 NaN 0 0.0 0 NaN \n",
- "2 NaN 0 0.0 0 NaN \n",
- "3 NaN 0 0.0 0 NaN \n",
- "4 NaN 0 0.0 0 NaN \n",
- "\n",
- " average_price_basket average_ticket_basket total_price purchase_count \\\n",
- "0 NaN NaN NaN 0 \n",
- "1 NaN NaN NaN 0 \n",
- "2 NaN NaN NaN 0 \n",
- "3 NaN NaN NaN 0 \n",
- "4 NaN NaN NaN 0 \n",
- "\n",
- " first_buying_date country age tenant_id nb_campaigns \\\n",
- "0 NaT fr NaN 1311 NaN \n",
- "1 NaT fr NaN 1311 NaN \n",
- "2 NaT fr NaN 1311 NaN \n",
- "3 NaT fr NaN 1311 NaN \n",
- "4 NaT NaN NaN 1311 80.0 \n",
- "\n",
- " nb_campaigns_opened time_to_open \n",
- "0 NaN NaT \n",
- "1 NaN NaT \n",
- "2 NaN NaT \n",
- "3 NaN NaT \n",
- "4 2.0 0 days 19:53:02.500000 "
- ]
- },
- "execution_count": 144,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Fusion avec KPI campaigns liés au customer\n",
- "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n",
- "df1_customer.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 146,
- "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "shape : (156289, 31)\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- " birthdate | \n",
- " street_id | \n",
- " is_partner | \n",
- " gender | \n",
- " is_email_true | \n",
- " opt_in | \n",
- " structure_id | \n",
- " profession | \n",
- " language | \n",
- " mcp_contact_id | \n",
- " last_buying_date | \n",
- " max_price | \n",
- " ticket_sum | \n",
- " average_price | \n",
- " fidelity | \n",
- " average_purchase_delay | \n",
- " average_price_basket | \n",
- " average_ticket_basket | \n",
- " total_price | \n",
- " purchase_count | \n",
- " first_buying_date | \n",
- " country | \n",
- " age | \n",
- " tenant_id | \n",
- " nb_campaigns | \n",
- " nb_campaigns_opened | \n",
- " time_to_open | \n",
- " event_type_id | \n",
- " nb_tickets | \n",
- " avg_amount | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 12751 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 1 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 12825 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 11261 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 1 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 13071 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 653061 | \n",
- " NaN | \n",
- " 10 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " 1311 | \n",
- " 80.0 | \n",
- " 2.0 | \n",
- " 0 days 19:53:02.500000 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_id birthdate street_id is_partner gender is_email_true \\\n",
- "0 12751 NaN 2 False 1 True \n",
- "1 12825 NaN 2 False 2 True \n",
- "2 11261 NaN 2 False 1 True \n",
- "3 13071 NaN 2 False 2 True \n",
- "4 653061 NaN 10 False 2 True \n",
- "\n",
- " opt_in structure_id profession language mcp_contact_id last_buying_date \\\n",
- "0 True NaN NaN NaN NaN NaN \n",
- "1 True NaN NaN NaN NaN NaN \n",
- "2 True NaN NaN NaN NaN NaN \n",
- "3 True NaN NaN NaN NaN NaN \n",
- "4 False NaN NaN NaN NaN NaN \n",
- "\n",
- " max_price ticket_sum average_price fidelity average_purchase_delay \\\n",
- "0 NaN 0 0.0 0 NaN \n",
- "1 NaN 0 0.0 0 NaN \n",
- "2 NaN 0 0.0 0 NaN \n",
- "3 NaN 0 0.0 0 NaN \n",
- "4 NaN 0 0.0 0 NaN \n",
- "\n",
- " average_price_basket average_ticket_basket total_price purchase_count \\\n",
- "0 NaN NaN NaN 0 \n",
- "1 NaN NaN NaN 0 \n",
- "2 NaN NaN NaN 0 \n",
- "3 NaN NaN NaN 0 \n",
- "4 NaN NaN NaN 0 \n",
- "\n",
- " first_buying_date country age tenant_id nb_campaigns \\\n",
- "0 NaT fr NaN 1311 NaN \n",
- "1 NaT fr NaN 1311 NaN \n",
- "2 NaT fr NaN 1311 NaN \n",
- "3 NaT fr NaN 1311 NaN \n",
- "4 NaT NaN NaN 1311 80.0 \n",
- "\n",
- " nb_campaigns_opened time_to_open event_type_id nb_tickets \\\n",
- "0 NaN NaT NaN NaN \n",
- "1 NaN NaT NaN NaN \n",
- "2 NaN NaT NaN NaN \n",
- "3 NaN NaT NaN NaN \n",
- "4 2.0 0 days 19:53:02.500000 NaN NaN \n",
- "\n",
- " avg_amount \n",
- "0 NaN \n",
- "1 NaN \n",
- "2 NaN \n",
- "3 NaN \n",
- "4 NaN "
- ]
- },
- "execution_count": 146,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
- "print(\"shape : \", df1_customer_product.shape)\n",
- "df1_customer_product.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 147,
- "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd",
- "metadata": {},
- "outputs": [],
- "source": [
- "df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
- ]
- },
{
"cell_type": "markdown",
"id": "314f1b7f-ae48-4c6f-8469-9ce879043243",
@@ -2535,7 +1613,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 28,
"id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
"metadata": {},
"outputs": [],
@@ -2569,7 +1647,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 29,
"id": "24537647-bc29-4777-9848-ac4120a4aa60",
"metadata": {},
"outputs": [
@@ -2577,7 +1655,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_8302/3700263836.py:11: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/3700263836.py:11: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -2591,7 +1669,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 30,
"id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
"metadata": {},
"outputs": [
@@ -2671,7 +1749,7 @@
"4 6 20 0.0 NaT"
]
},
- "execution_count": 20,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -2688,35 +1766,6 @@
"## KPI tickets"
]
},
- {
- "cell_type": "code",
- "execution_count": 30,
- "id": "665a5925-9c0e-425a-8f11-c33a0a9ec444",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['ticket_id', 'product_id', 'is_from_subscription', 'supplier_name',\n",
- " 'type_of_ticket_name', 'children', 'purchase_date', 'customer_id',\n",
- " 'id_products', 'representation_id', 'pricing_formula_id', 'category_id',\n",
- " 'products_group_id', 'product_pack_id', 'event_id',\n",
- " 'id_representation_cap', 'season_id', 'facility_id', 'event_type_id',\n",
- " 'event_type_key_id', 'facility_key_id', 'street_id', 'amount',\n",
- " 'is_full_price', 'name_categories', 'name_events', 'name_seasons',\n",
- " 'name_event_types', 'name_facilities'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 30,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df1_products_purchased.columns"
- ]
- },
{
"cell_type": "code",
"execution_count": 31,
@@ -2736,206 +1785,23 @@
}
],
"source": [
- "df1_products_purchased['name_event_types'].unique()"
+ "df1_products_purchased_reduced['name_event_types'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 32,
- "id": "e01e8cf9-1187-4a4b-993d-b7b4321cd8f0",
- "metadata": {},
- "outputs": [],
- "source": [
- "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 78,
- "id": "3d8b0875-b409-44ce-b688-d9d6758782d3",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " ticket_id | \n",
- " customer_id | \n",
- " event_type_id | \n",
- " supplier_name | \n",
- " purchase_date | \n",
- " type_of_ticket_name | \n",
- " amount | \n",
- " children | \n",
- " is_full_price | \n",
- " name_event_types | \n",
- " name_facilities | \n",
- " name_categories | \n",
- " name_events | \n",
- " name_seasons | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 13070859 | \n",
- " 48187 | \n",
- " 4 | \n",
- " vente en ligne | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " Atelier | \n",
- " 8.0 | \n",
- " pricing_formula | \n",
- " False | \n",
- " spectacle vivant | \n",
- " mucem | \n",
- " indiv prog enfant | \n",
- " l'école des magiciens | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 13070855 | \n",
- " 48187 | \n",
- " 4 | \n",
- " vente en ligne | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " Atelier | \n",
- " 8.0 | \n",
- " pricing_formula | \n",
- " False | \n",
- " spectacle vivant | \n",
- " mucem | \n",
- " indiv prog enfant | \n",
- " l'école des magiciens | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 13070856 | \n",
- " 48187 | \n",
- " 4 | \n",
- " vente en ligne | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " Atelier | \n",
- " 8.0 | \n",
- " pricing_formula | \n",
- " False | \n",
- " spectacle vivant | \n",
- " mucem | \n",
- " indiv prog enfant | \n",
- " l'école des magiciens | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 13070857 | \n",
- " 48187 | \n",
- " 4 | \n",
- " vente en ligne | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " Atelier | \n",
- " 8.0 | \n",
- " pricing_formula | \n",
- " False | \n",
- " spectacle vivant | \n",
- " mucem | \n",
- " indiv prog enfant | \n",
- " l'école des magiciens | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 13070858 | \n",
- " 48187 | \n",
- " 4 | \n",
- " vente en ligne | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " Atelier | \n",
- " 8.0 | \n",
- " pricing_formula | \n",
- " False | \n",
- " spectacle vivant | \n",
- " mucem | \n",
- " indiv prog enfant | \n",
- " l'école des magiciens | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " ticket_id customer_id event_type_id supplier_name \\\n",
- "0 13070859 48187 4 vente en ligne \n",
- "1 13070855 48187 4 vente en ligne \n",
- "2 13070856 48187 4 vente en ligne \n",
- "3 13070857 48187 4 vente en ligne \n",
- "4 13070858 48187 4 vente en ligne \n",
- "\n",
- " purchase_date type_of_ticket_name amount children \\\n",
- "0 2018-12-28 14:47:50+00:00 Atelier 8.0 pricing_formula \n",
- "1 2018-12-28 14:47:50+00:00 Atelier 8.0 pricing_formula \n",
- "2 2018-12-28 14:47:50+00:00 Atelier 8.0 pricing_formula \n",
- "3 2018-12-28 14:47:50+00:00 Atelier 8.0 pricing_formula \n",
- "4 2018-12-28 14:47:50+00:00 Atelier 8.0 pricing_formula \n",
- "\n",
- " is_full_price name_event_types name_facilities name_categories \\\n",
- "0 False spectacle vivant mucem indiv prog enfant \n",
- "1 False spectacle vivant mucem indiv prog enfant \n",
- "2 False spectacle vivant mucem indiv prog enfant \n",
- "3 False spectacle vivant mucem indiv prog enfant \n",
- "4 False spectacle vivant mucem indiv prog enfant \n",
- "\n",
- " name_events name_seasons \n",
- "0 l'école des magiciens 2018 \n",
- "1 l'école des magiciens 2018 \n",
- "2 l'école des magiciens 2018 \n",
- "3 l'école des magiciens 2018 \n",
- "4 l'école des magiciens 2018 "
- ]
- },
- "execution_count": 78,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Importance des suppliers\n",
- "df1_products_purchased_reduced.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 34,
"id": "2bda0b97-b28b-4070-a57d-aeab0e2f7dfe",
"metadata": {},
"outputs": [],
"source": [
"# Nombre de client assistant à plus de 2 type d'événement\n",
- "nb_event_types = df1_products_purchased_reduced[['customer_id', 'name_event_types']].groupby('customer_id').nunique()\n"
+ "nb_event_types = df1_products_purchased_reduced[['customer_id', 'name_event_types']].groupby('customer_id').nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 82,
+ "execution_count": 33,
"id": "043303fe-e90f-4689-a2a9-5d690555a045",
"metadata": {},
"outputs": [],
@@ -2961,6 +1827,7 @@
" 'purchase_date' : ['min', 'max']})\n",
" .reset_index()\n",
" )\n",
+ " \n",
" tickets_kpi.columns = tickets_kpi.columns.map('_'.join)\n",
" \n",
" tickets_kpi.rename(columns = {'ticket_id_count' : 'nb_tickets', \n",
@@ -2980,7 +1847,7 @@
},
{
"cell_type": "code",
- "execution_count": 83,
+ "execution_count": 34,
"id": "5882234a-1ed5-4269-87a6-0d75613476e3",
"metadata": {},
"outputs": [],
@@ -2998,7 +1865,7 @@
},
{
"cell_type": "code",
- "execution_count": 94,
+ "execution_count": 35,
"id": "a4a2311d-8a72-4030-afd5-218004d5d2a5",
"metadata": {},
"outputs": [],
@@ -3014,7 +1881,7 @@
},
{
"cell_type": "code",
- "execution_count": 84,
+ "execution_count": 36,
"id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad",
"metadata": {},
"outputs": [
@@ -3144,7 +2011,7 @@
"5032 1049 days 18:46:12 13497.0 "
]
},
- "execution_count": 84,
+ "execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
@@ -3153,6 +2020,641 @@
"df1_tickets_kpi.sort_values(by='nb_tickets', ascending=False).head(5)"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "f1d7f7ba-361b-467d-b375-b09c149185f7",
+ "metadata": {},
+ "source": [
+ "## Alexis' work"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "4ab1c0d2-0097-4669-b984-b6822c976740",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " event_type_id | \n",
+ " avg_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 6.150659 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 7.762474 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 4.452618 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 6 | \n",
+ " 6.439463 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " event_type_id avg_amount\n",
+ "0 2 6.150659\n",
+ "1 4 7.762474\n",
+ "2 5 4.452618\n",
+ "3 6 6.439463"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "avg_amount = (df1_products_purchased_reduced.groupby([\"event_type_id\"])\n",
+ " .agg({\"amount\" : \"mean\"}).reset_index()\n",
+ " .rename(columns = {'amount' : 'avg_amount'}))\n",
+ "\n",
+ "avg_amount"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "a9c62b39-389e-4dac-89a6-ac8a59fea58a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " event_type_id | \n",
+ " nb_tickets | \n",
+ " avg_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 384226 | \n",
+ " 6.150659 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 453242 | \n",
+ " 7.762474 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 201750 | \n",
+ " 4.452618 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 217356 | \n",
+ " 6.439463 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 143 | \n",
+ " 6.150659 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id event_type_id nb_tickets avg_amount\n",
+ "0 1 2 384226 6.150659\n",
+ "1 1 4 453242 7.762474\n",
+ "2 1 5 201750 4.452618\n",
+ "3 1 6 217356 6.439463\n",
+ "4 2 2 143 6.150659"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nb_tickets = (df1_products_purchased_reduced.groupby([\"customer_id\", \"event_type_id\"])\n",
+ " .agg({\"ticket_id\" : \"count\"}).reset_index()\n",
+ " .rename(columns = {'ticket_id' : 'nb_tickets'})\n",
+ " .merge(avg_amount, how='left', on='event_type_id'))\n",
+ "nb_tickets.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "8710611c-7eb8-45ca-bdcc-009f4081f9e2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " birthdate | \n",
+ " street_id | \n",
+ " is_partner | \n",
+ " gender | \n",
+ " is_email_true | \n",
+ " opt_in | \n",
+ " structure_id | \n",
+ " profession | \n",
+ " language | \n",
+ " ... | \n",
+ " average_ticket_basket | \n",
+ " total_price | \n",
+ " purchase_count | \n",
+ " first_buying_date | \n",
+ " country | \n",
+ " age | \n",
+ " tenant_id | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 12751 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 12825 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11261 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 13071 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 653061 | \n",
+ " NaN | \n",
+ " 10 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " 80.0 | \n",
+ " 2.0 | \n",
+ " 0 days 19:53:02.500000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 28 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id birthdate street_id is_partner gender is_email_true \\\n",
+ "0 12751 NaN 2 False 1 True \n",
+ "1 12825 NaN 2 False 2 True \n",
+ "2 11261 NaN 2 False 1 True \n",
+ "3 13071 NaN 2 False 2 True \n",
+ "4 653061 NaN 10 False 2 True \n",
+ "\n",
+ " opt_in structure_id profession language ... average_ticket_basket \\\n",
+ "0 True NaN NaN NaN ... NaN \n",
+ "1 True NaN NaN NaN ... NaN \n",
+ "2 True NaN NaN NaN ... NaN \n",
+ "3 True NaN NaN NaN ... NaN \n",
+ "4 False NaN NaN NaN ... NaN \n",
+ "\n",
+ " total_price purchase_count first_buying_date country age tenant_id \\\n",
+ "0 NaN 0 NaT fr NaN 1311 \n",
+ "1 NaN 0 NaT fr NaN 1311 \n",
+ "2 NaN 0 NaT fr NaN 1311 \n",
+ "3 NaN 0 NaT fr NaN 1311 \n",
+ "4 NaN 0 NaT NaN NaN 1311 \n",
+ "\n",
+ " nb_campaigns nb_campaigns_opened time_to_open \n",
+ "0 NaN NaN NaT \n",
+ "1 NaN NaN NaT \n",
+ "2 NaN NaN NaT \n",
+ "3 NaN NaN NaT \n",
+ "4 80.0 2.0 0 days 19:53:02.500000 \n",
+ "\n",
+ "[5 rows x 28 columns]"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Fusion avec KPI campaigns liés au customer\n",
+ "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n",
+ "df1_customer.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "shape : (156289, 31)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " birthdate | \n",
+ " street_id | \n",
+ " is_partner | \n",
+ " gender | \n",
+ " is_email_true | \n",
+ " opt_in | \n",
+ " structure_id | \n",
+ " profession | \n",
+ " language | \n",
+ " ... | \n",
+ " first_buying_date | \n",
+ " country | \n",
+ " age | \n",
+ " tenant_id | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ " event_type_id | \n",
+ " nb_tickets | \n",
+ " avg_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 12751 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 12825 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11261 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 13071 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 653061 | \n",
+ " NaN | \n",
+ " 10 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " 80.0 | \n",
+ " 2.0 | \n",
+ " 0 days 19:53:02.500000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 31 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id birthdate street_id is_partner gender is_email_true \\\n",
+ "0 12751 NaN 2 False 1 True \n",
+ "1 12825 NaN 2 False 2 True \n",
+ "2 11261 NaN 2 False 1 True \n",
+ "3 13071 NaN 2 False 2 True \n",
+ "4 653061 NaN 10 False 2 True \n",
+ "\n",
+ " opt_in structure_id profession language ... first_buying_date country \\\n",
+ "0 True NaN NaN NaN ... NaT fr \n",
+ "1 True NaN NaN NaN ... NaT fr \n",
+ "2 True NaN NaN NaN ... NaT fr \n",
+ "3 True NaN NaN NaN ... NaT fr \n",
+ "4 False NaN NaN NaN ... NaT NaN \n",
+ "\n",
+ " age tenant_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
+ "0 NaN 1311 NaN NaN NaT \n",
+ "1 NaN 1311 NaN NaN NaT \n",
+ "2 NaN 1311 NaN NaN NaT \n",
+ "3 NaN 1311 NaN NaN NaT \n",
+ "4 NaN 1311 80.0 2.0 0 days 19:53:02.500000 \n",
+ "\n",
+ " event_type_id nb_tickets avg_amount \n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ "[5 rows x 31 columns]"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
+ "print(\"shape : \", df1_customer_product.shape)\n",
+ "df1_customer_product.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "7c3211a5-a851-43bc-a1f0-b39d51857fb7",
@@ -3163,7 +2665,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 42,
"id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
"metadata": {},
"outputs": [],
@@ -3174,7 +2676,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 43,
"id": "1e42a790-b215-4107-a969-85005da06ebd",
"metadata": {},
"outputs": [],