diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb index b95fec5..ced5bdf 100644 --- a/0_Cleaning_and_merge.ipynb +++ b/0_Cleaning_and_merge.ipynb @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 3, "id": "699664b9-eee4-4f8d-a207-e524526560c5", "metadata": {}, "outputs": [], @@ -78,7 +78,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_8302/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_42764/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(file_in)\n" ] } @@ -242,23 +242,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_8302/3092893564.py:5: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/3092893564.py:5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n", - "/tmp/ipykernel_8302/3092893564.py:9: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/3092893564.py:9: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n", - "/tmp/ipykernel_8302/3092893564.py:10: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/3092893564.py:10: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " suppliers['supplier_name'] = suppliers['supplier_name'].fillna('')\n", - "/tmp/ipykernel_8302/3092893564.py:14: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/3092893564.py:14: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -439,7 +439,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_8302/3848597476.py:4: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/3848597476.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -451,165 +451,6 @@ "df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)" ] }, - { - "cell_type": "code", - "execution_count": 13, - "id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_id
target_name
consentement optin mediation specialisee150000
consentement optin jeune public149979
consentement optin b2c108909
Arenametrix_bascule tel vers sib35216
consentement optout b2c34523
\n", - "
" - ], - "text/plain": [ - " customer_id\n", - "target_name \n", - "consentement optin mediation specialisee 150000\n", - "consentement optin jeune public 149979\n", - "consentement optin b2c 108909\n", - "Arenametrix_bascule tel vers sib 35216\n", - "consentement optout b2c 34523" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False).head()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "4417ff51-f501-4ab9-a192-4ab75764a8ed", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_id
target_name
Arenametrix_bascule tel vers sib35216
Autres_interet_exposition1021
COM Inscrits NL générale (historique)23005
Contacts_prenomsdoubles11643
DDCP MD Procès du Siècle1684
\n", - "
" - ], - "text/plain": [ - " customer_id\n", - "target_name \n", - "Arenametrix_bascule tel vers sib 35216\n", - "Autres_interet_exposition 1021\n", - "COM Inscrits NL générale (historique) 23005\n", - "Contacts_prenomsdoubles 11643\n", - "DDCP MD Procès du Siècle 1684" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n", - "df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000].head()" - ] - }, { "cell_type": "markdown", "id": "cdbb48b4-5e16-4ef4-8791-ed213d68d52f", @@ -620,7 +461,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85", "metadata": {}, "outputs": [], @@ -645,7 +486,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f", "metadata": {}, "outputs": [ @@ -653,19 +494,19 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_8302/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_8302/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_8302/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -680,7 +521,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "id": "c24457e7-3cad-451a-a65b-7373b656bd6e", "metadata": { "scrolled": true @@ -800,7 +641,7 @@ "4 404 2021-03-27 23:00:00+00:00 " ] }, - "execution_count": 17, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -814,7 +655,7 @@ "id": "56520a97-ede8-4920-a211-3b5b136af33d", "metadata": {}, "source": [ - "## Create Products Table" + "## Product area" ] }, { @@ -827,7 +668,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 16, "id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d", "metadata": {}, "outputs": [], @@ -838,7 +679,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 17, "id": "607eb4b4-eed9-4b50-b823-f75c116dd37c", "metadata": {}, "outputs": [], @@ -909,7 +750,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 18, "id": "350b09b9-451f-4d47-81fe-f34b892db027", "metadata": {}, "outputs": [], @@ -997,7 +838,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 19, "id": "0fccc8ef-e575-4857-a401-94a7274394df", "metadata": {}, "outputs": [ @@ -1150,7 +991,7 @@ "4 indiv entrées tp " ] }, - "execution_count": 24, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1162,7 +1003,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 20, "id": "779d8aaf-6668-4f66-8852-847304407ea3", "metadata": {}, "outputs": [ @@ -1332,7 +1173,7 @@ "4 spectacle vivant mucem " ] }, - "execution_count": 25, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1344,7 +1185,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 21, "id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0", "metadata": {}, "outputs": [ @@ -1443,7 +1284,7 @@ "4 37 383 269 1" ] }, - "execution_count": 26, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1463,7 +1304,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 22, "id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba", "metadata": {}, "outputs": [], @@ -1491,7 +1332,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 23, "id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951", "metadata": {}, "outputs": [ @@ -1730,7 +1571,7 @@ "[5 rows x 21 columns]" ] }, - "execution_count": 28, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1742,13 +1583,16 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 24, "id": "98f78cd5-b694-4cc6-b033-20170aa13e8d", "metadata": {}, "outputs": [], "source": [ "# Fusion liée au product\n", - "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')" + "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n", + "\n", + "# Selection des variables d'intérêts\n", + "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]" ] }, { @@ -1759,772 +1603,6 @@ "# Construction des variables explicatives" ] }, - { - "cell_type": "markdown", - "id": "b09c2964-bef9-489e-ad71-84959054531b", - "metadata": {}, - "source": [ - "## Alexis' work" - ] - }, - { - "cell_type": "code", - "execution_count": 142, - "id": "4ab1c0d2-0097-4669-b984-b6822c976740", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
event_type_idavg_amount
026.150659
147.762474
254.452618
366.439463
\n", - "
" - ], - "text/plain": [ - " event_type_id avg_amount\n", - "0 2 6.150659\n", - "1 4 7.762474\n", - "2 5 4.452618\n", - "3 6 6.439463" - ] - }, - "execution_count": 142, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "avg_amount = (df1_products_purchased_reduced.groupby([\"event_type_id\"])\n", - " .agg({\"amount\" : \"mean\"}).reset_index()\n", - " .rename(columns = {'amount' : 'avg_amount'}))\n", - "\n", - "avg_amount" - ] - }, - { - "cell_type": "code", - "execution_count": 143, - "id": "a9c62b39-389e-4dac-89a6-ac8a59fea58a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idevent_type_idnb_ticketsavg_amount
0123842266.150659
1144532427.762474
2152017504.452618
3162173566.439463
4221436.150659
\n", - "
" - ], - "text/plain": [ - " customer_id event_type_id nb_tickets avg_amount\n", - "0 1 2 384226 6.150659\n", - "1 1 4 453242 7.762474\n", - "2 1 5 201750 4.452618\n", - "3 1 6 217356 6.439463\n", - "4 2 2 143 6.150659" - ] - }, - "execution_count": 143, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nb_tickets = (df1_products_purchased_reduced.groupby([\"customer_id\", \"event_type_id\"])\n", - " .agg({\"ticket_id\" : \"count\"}).reset_index()\n", - " .rename(columns = {'ticket_id' : 'nb_tickets'})\n", - " .merge(avg_amount, how='left', on='event_type_id'))\n", - "nb_tickets.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "id": "8710611c-7eb8-45ca-bdcc-009f4081f9e2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguagemcp_contact_idlast_buying_datemax_priceticket_sumaverage_pricefidelityaverage_purchase_delayaverage_price_basketaverage_ticket_baskettotal_pricepurchase_countfirst_buying_datecountryagetenant_idnb_campaignsnb_campaigns_openedtime_to_open
012751NaN2False1TrueTrueNaNNaNNaNNaNNaNNaN00.00NaNNaNNaNNaN0NaTfrNaN1311NaNNaNNaT
112825NaN2False2TrueTrueNaNNaNNaNNaNNaNNaN00.00NaNNaNNaNNaN0NaTfrNaN1311NaNNaNNaT
211261NaN2False1TrueTrueNaNNaNNaNNaNNaNNaN00.00NaNNaNNaNNaN0NaTfrNaN1311NaNNaNNaT
313071NaN2False2TrueTrueNaNNaNNaNNaNNaNNaN00.00NaNNaNNaNNaN0NaTfrNaN1311NaNNaNNaT
4653061NaN10False2TrueFalseNaNNaNNaNNaNNaNNaN00.00NaNNaNNaNNaN0NaTNaNNaN131180.02.00 days 19:53:02.500000
\n", - "
" - ], - "text/plain": [ - " customer_id birthdate street_id is_partner gender is_email_true \\\n", - "0 12751 NaN 2 False 1 True \n", - "1 12825 NaN 2 False 2 True \n", - "2 11261 NaN 2 False 1 True \n", - "3 13071 NaN 2 False 2 True \n", - "4 653061 NaN 10 False 2 True \n", - "\n", - " opt_in structure_id profession language mcp_contact_id last_buying_date \\\n", - "0 True NaN NaN NaN NaN NaN \n", - "1 True NaN NaN NaN NaN NaN \n", - "2 True NaN NaN NaN NaN NaN \n", - "3 True NaN NaN NaN NaN NaN \n", - "4 False NaN NaN NaN NaN NaN \n", - "\n", - " max_price ticket_sum average_price fidelity average_purchase_delay \\\n", - "0 NaN 0 0.0 0 NaN \n", - "1 NaN 0 0.0 0 NaN \n", - "2 NaN 0 0.0 0 NaN \n", - "3 NaN 0 0.0 0 NaN \n", - "4 NaN 0 0.0 0 NaN \n", - "\n", - " average_price_basket average_ticket_basket total_price purchase_count \\\n", - "0 NaN NaN NaN 0 \n", - "1 NaN NaN NaN 0 \n", - "2 NaN NaN NaN 0 \n", - "3 NaN NaN NaN 0 \n", - "4 NaN NaN NaN 0 \n", - "\n", - " first_buying_date country age tenant_id nb_campaigns \\\n", - "0 NaT fr NaN 1311 NaN \n", - "1 NaT fr NaN 1311 NaN \n", - "2 NaT fr NaN 1311 NaN \n", - "3 NaT fr NaN 1311 NaN \n", - "4 NaT NaN NaN 1311 80.0 \n", - "\n", - " nb_campaigns_opened time_to_open \n", - "0 NaN NaT \n", - "1 NaN NaT \n", - "2 NaN NaT \n", - "3 NaN NaT \n", - "4 2.0 0 days 19:53:02.500000 " - ] - }, - "execution_count": 144, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Fusion avec KPI campaigns liés au customer\n", - "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n", - "df1_customer.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 146, - "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "shape : (156289, 31)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguagemcp_contact_idlast_buying_datemax_priceticket_sumaverage_pricefidelityaverage_purchase_delayaverage_price_basketaverage_ticket_baskettotal_pricepurchase_countfirst_buying_datecountryagetenant_idnb_campaignsnb_campaigns_openedtime_to_openevent_type_idnb_ticketsavg_amount
012751NaN2False1TrueTrueNaNNaNNaNNaNNaNNaN00.00NaNNaNNaNNaN0NaTfrNaN1311NaNNaNNaTNaNNaNNaN
112825NaN2False2TrueTrueNaNNaNNaNNaNNaNNaN00.00NaNNaNNaNNaN0NaTfrNaN1311NaNNaNNaTNaNNaNNaN
211261NaN2False1TrueTrueNaNNaNNaNNaNNaNNaN00.00NaNNaNNaNNaN0NaTfrNaN1311NaNNaNNaTNaNNaNNaN
313071NaN2False2TrueTrueNaNNaNNaNNaNNaNNaN00.00NaNNaNNaNNaN0NaTfrNaN1311NaNNaNNaTNaNNaNNaN
4653061NaN10False2TrueFalseNaNNaNNaNNaNNaNNaN00.00NaNNaNNaNNaN0NaTNaNNaN131180.02.00 days 19:53:02.500000NaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " customer_id birthdate street_id is_partner gender is_email_true \\\n", - "0 12751 NaN 2 False 1 True \n", - "1 12825 NaN 2 False 2 True \n", - "2 11261 NaN 2 False 1 True \n", - "3 13071 NaN 2 False 2 True \n", - "4 653061 NaN 10 False 2 True \n", - "\n", - " opt_in structure_id profession language mcp_contact_id last_buying_date \\\n", - "0 True NaN NaN NaN NaN NaN \n", - "1 True NaN NaN NaN NaN NaN \n", - "2 True NaN NaN NaN NaN NaN \n", - "3 True NaN NaN NaN NaN NaN \n", - "4 False NaN NaN NaN NaN NaN \n", - "\n", - " max_price ticket_sum average_price fidelity average_purchase_delay \\\n", - "0 NaN 0 0.0 0 NaN \n", - "1 NaN 0 0.0 0 NaN \n", - "2 NaN 0 0.0 0 NaN \n", - "3 NaN 0 0.0 0 NaN \n", - "4 NaN 0 0.0 0 NaN \n", - "\n", - " average_price_basket average_ticket_basket total_price purchase_count \\\n", - "0 NaN NaN NaN 0 \n", - "1 NaN NaN NaN 0 \n", - "2 NaN NaN NaN 0 \n", - "3 NaN NaN NaN 0 \n", - "4 NaN NaN NaN 0 \n", - "\n", - " first_buying_date country age tenant_id nb_campaigns \\\n", - "0 NaT fr NaN 1311 NaN \n", - "1 NaT fr NaN 1311 NaN \n", - "2 NaT fr NaN 1311 NaN \n", - "3 NaT fr NaN 1311 NaN \n", - "4 NaT NaN NaN 1311 80.0 \n", - "\n", - " nb_campaigns_opened time_to_open event_type_id nb_tickets \\\n", - "0 NaN NaT NaN NaN \n", - "1 NaN NaT NaN NaN \n", - "2 NaN NaT NaN NaN \n", - "3 NaN NaT NaN NaN \n", - "4 2.0 0 days 19:53:02.500000 NaN NaN \n", - "\n", - " avg_amount \n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN " - ] - }, - "execution_count": 146, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n", - "print(\"shape : \", df1_customer_product.shape)\n", - "df1_customer_product.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 147, - "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd", - "metadata": {}, - "outputs": [], - "source": [ - "df1_customer_product.to_csv(\"customer_product.csv\", index = False)" - ] - }, { "cell_type": "markdown", "id": "314f1b7f-ae48-4c6f-8469-9ce879043243", @@ -2535,7 +1613,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 28, "id": "e2c88552-b863-47a2-be23-8d2898fb28bc", "metadata": {}, "outputs": [], @@ -2569,7 +1647,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 29, "id": "24537647-bc29-4777-9848-ac4120a4aa60", "metadata": {}, "outputs": [ @@ -2577,7 +1655,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_8302/3700263836.py:11: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/3700263836.py:11: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -2591,7 +1669,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 30, "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3", "metadata": {}, "outputs": [ @@ -2671,7 +1749,7 @@ "4 6 20 0.0 NaT" ] }, - "execution_count": 20, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -2688,35 +1766,6 @@ "## KPI tickets" ] }, - { - "cell_type": "code", - "execution_count": 30, - "id": "665a5925-9c0e-425a-8f11-c33a0a9ec444", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['ticket_id', 'product_id', 'is_from_subscription', 'supplier_name',\n", - " 'type_of_ticket_name', 'children', 'purchase_date', 'customer_id',\n", - " 'id_products', 'representation_id', 'pricing_formula_id', 'category_id',\n", - " 'products_group_id', 'product_pack_id', 'event_id',\n", - " 'id_representation_cap', 'season_id', 'facility_id', 'event_type_id',\n", - " 'event_type_key_id', 'facility_key_id', 'street_id', 'amount',\n", - " 'is_full_price', 'name_categories', 'name_events', 'name_seasons',\n", - " 'name_event_types', 'name_facilities'],\n", - " dtype='object')" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_products_purchased.columns" - ] - }, { "cell_type": "code", "execution_count": 31, @@ -2736,206 +1785,23 @@ } ], "source": [ - "df1_products_purchased['name_event_types'].unique()" + "df1_products_purchased_reduced['name_event_types'].unique()" ] }, { "cell_type": "code", "execution_count": 32, - "id": "e01e8cf9-1187-4a4b-993d-b7b4321cd8f0", - "metadata": {}, - "outputs": [], - "source": [ - "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "id": "3d8b0875-b409-44ce-b688-d9d6758782d3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ticket_idcustomer_idevent_type_idsupplier_namepurchase_datetype_of_ticket_nameamountchildrenis_full_pricename_event_typesname_facilitiesname_categoriesname_eventsname_seasons
013070859481874vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
113070855481874vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
213070856481874vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
313070857481874vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
413070858481874vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
\n", - "
" - ], - "text/plain": [ - " ticket_id customer_id event_type_id supplier_name \\\n", - "0 13070859 48187 4 vente en ligne \n", - "1 13070855 48187 4 vente en ligne \n", - "2 13070856 48187 4 vente en ligne \n", - "3 13070857 48187 4 vente en ligne \n", - "4 13070858 48187 4 vente en ligne \n", - "\n", - " purchase_date type_of_ticket_name amount children \\\n", - "0 2018-12-28 14:47:50+00:00 Atelier 8.0 pricing_formula \n", - "1 2018-12-28 14:47:50+00:00 Atelier 8.0 pricing_formula \n", - "2 2018-12-28 14:47:50+00:00 Atelier 8.0 pricing_formula \n", - "3 2018-12-28 14:47:50+00:00 Atelier 8.0 pricing_formula \n", - "4 2018-12-28 14:47:50+00:00 Atelier 8.0 pricing_formula \n", - "\n", - " is_full_price name_event_types name_facilities name_categories \\\n", - "0 False spectacle vivant mucem indiv prog enfant \n", - "1 False spectacle vivant mucem indiv prog enfant \n", - "2 False spectacle vivant mucem indiv prog enfant \n", - "3 False spectacle vivant mucem indiv prog enfant \n", - "4 False spectacle vivant mucem indiv prog enfant \n", - "\n", - " name_events name_seasons \n", - "0 l'école des magiciens 2018 \n", - "1 l'école des magiciens 2018 \n", - "2 l'école des magiciens 2018 \n", - "3 l'école des magiciens 2018 \n", - "4 l'école des magiciens 2018 " - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Importance des suppliers\n", - "df1_products_purchased_reduced.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 34, "id": "2bda0b97-b28b-4070-a57d-aeab0e2f7dfe", "metadata": {}, "outputs": [], "source": [ "# Nombre de client assistant à plus de 2 type d'événement\n", - "nb_event_types = df1_products_purchased_reduced[['customer_id', 'name_event_types']].groupby('customer_id').nunique()\n" + "nb_event_types = df1_products_purchased_reduced[['customer_id', 'name_event_types']].groupby('customer_id').nunique()" ] }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 33, "id": "043303fe-e90f-4689-a2a9-5d690555a045", "metadata": {}, "outputs": [], @@ -2961,6 +1827,7 @@ " 'purchase_date' : ['min', 'max']})\n", " .reset_index()\n", " )\n", + " \n", " tickets_kpi.columns = tickets_kpi.columns.map('_'.join)\n", " \n", " tickets_kpi.rename(columns = {'ticket_id_count' : 'nb_tickets', \n", @@ -2980,7 +1847,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 34, "id": "5882234a-1ed5-4269-87a6-0d75613476e3", "metadata": {}, "outputs": [], @@ -2998,7 +1865,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 35, "id": "a4a2311d-8a72-4030-afd5-218004d5d2a5", "metadata": {}, "outputs": [], @@ -3014,7 +1881,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 36, "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad", "metadata": {}, "outputs": [ @@ -3144,7 +2011,7 @@ "5032 1049 days 18:46:12 13497.0 " ] }, - "execution_count": 84, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -3153,6 +2020,641 @@ "df1_tickets_kpi.sort_values(by='nb_tickets', ascending=False).head(5)" ] }, + { + "cell_type": "markdown", + "id": "f1d7f7ba-361b-467d-b375-b09c149185f7", + "metadata": {}, + "source": [ + "## Alexis' work" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "4ab1c0d2-0097-4669-b984-b6822c976740", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
event_type_idavg_amount
026.150659
147.762474
254.452618
366.439463
\n", + "
" + ], + "text/plain": [ + " event_type_id avg_amount\n", + "0 2 6.150659\n", + "1 4 7.762474\n", + "2 5 4.452618\n", + "3 6 6.439463" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_amount = (df1_products_purchased_reduced.groupby([\"event_type_id\"])\n", + " .agg({\"amount\" : \"mean\"}).reset_index()\n", + " .rename(columns = {'amount' : 'avg_amount'}))\n", + "\n", + "avg_amount" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "a9c62b39-389e-4dac-89a6-ac8a59fea58a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idevent_type_idnb_ticketsavg_amount
0123842266.150659
1144532427.762474
2152017504.452618
3162173566.439463
4221436.150659
\n", + "
" + ], + "text/plain": [ + " customer_id event_type_id nb_tickets avg_amount\n", + "0 1 2 384226 6.150659\n", + "1 1 4 453242 7.762474\n", + "2 1 5 201750 4.452618\n", + "3 1 6 217356 6.439463\n", + "4 2 2 143 6.150659" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nb_tickets = (df1_products_purchased_reduced.groupby([\"customer_id\", \"event_type_id\"])\n", + " .agg({\"ticket_id\" : \"count\"}).reset_index()\n", + " .rename(columns = {'ticket_id' : 'nb_tickets'})\n", + " .merge(avg_amount, how='left', on='event_type_id'))\n", + "nb_tickets.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "8710611c-7eb8-45ca-bdcc-009f4081f9e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguage...average_ticket_baskettotal_pricepurchase_countfirst_buying_datecountryagetenant_idnb_campaignsnb_campaigns_openedtime_to_open
012751NaN2False1TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
112825NaN2False2TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
211261NaN2False1TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
313071NaN2False2TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
4653061NaN10False2TrueFalseNaNNaNNaN...NaNNaN0NaTNaNNaN131180.02.00 days 19:53:02.500000
\n", + "

5 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " customer_id birthdate street_id is_partner gender is_email_true \\\n", + "0 12751 NaN 2 False 1 True \n", + "1 12825 NaN 2 False 2 True \n", + "2 11261 NaN 2 False 1 True \n", + "3 13071 NaN 2 False 2 True \n", + "4 653061 NaN 10 False 2 True \n", + "\n", + " opt_in structure_id profession language ... average_ticket_basket \\\n", + "0 True NaN NaN NaN ... NaN \n", + "1 True NaN NaN NaN ... NaN \n", + "2 True NaN NaN NaN ... NaN \n", + "3 True NaN NaN NaN ... NaN \n", + "4 False NaN NaN NaN ... NaN \n", + "\n", + " total_price purchase_count first_buying_date country age tenant_id \\\n", + "0 NaN 0 NaT fr NaN 1311 \n", + "1 NaN 0 NaT fr NaN 1311 \n", + "2 NaN 0 NaT fr NaN 1311 \n", + "3 NaN 0 NaT fr NaN 1311 \n", + "4 NaN 0 NaT NaN NaN 1311 \n", + "\n", + " nb_campaigns nb_campaigns_opened time_to_open \n", + "0 NaN NaN NaT \n", + "1 NaN NaN NaT \n", + "2 NaN NaN NaT \n", + "3 NaN NaN NaT \n", + "4 80.0 2.0 0 days 19:53:02.500000 \n", + "\n", + "[5 rows x 28 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fusion avec KPI campaigns liés au customer\n", + "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n", + "df1_customer.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape : (156289, 31)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguage...first_buying_datecountryagetenant_idnb_campaignsnb_campaigns_openedtime_to_openevent_type_idnb_ticketsavg_amount
012751NaN2False1TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
112825NaN2False2TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
211261NaN2False1TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
313071NaN2False2TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
4653061NaN10False2TrueFalseNaNNaNNaN...NaTNaNNaN131180.02.00 days 19:53:02.500000NaNNaNNaN
\n", + "

5 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " customer_id birthdate street_id is_partner gender is_email_true \\\n", + "0 12751 NaN 2 False 1 True \n", + "1 12825 NaN 2 False 2 True \n", + "2 11261 NaN 2 False 1 True \n", + "3 13071 NaN 2 False 2 True \n", + "4 653061 NaN 10 False 2 True \n", + "\n", + " opt_in structure_id profession language ... first_buying_date country \\\n", + "0 True NaN NaN NaN ... NaT fr \n", + "1 True NaN NaN NaN ... NaT fr \n", + "2 True NaN NaN NaN ... NaT fr \n", + "3 True NaN NaN NaN ... NaT fr \n", + "4 False NaN NaN NaN ... NaT NaN \n", + "\n", + " age tenant_id nb_campaigns nb_campaigns_opened time_to_open \\\n", + "0 NaN 1311 NaN NaN NaT \n", + "1 NaN 1311 NaN NaN NaT \n", + "2 NaN 1311 NaN NaN NaT \n", + "3 NaN 1311 NaN NaN NaT \n", + "4 NaN 1311 80.0 2.0 0 days 19:53:02.500000 \n", + "\n", + " event_type_id nb_tickets avg_amount \n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n", + "print(\"shape : \", df1_customer_product.shape)\n", + "df1_customer_product.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd", + "metadata": {}, + "outputs": [], + "source": [ + "# df1_customer_product.to_csv(\"customer_product.csv\", index = False)" + ] + }, { "cell_type": "markdown", "id": "7c3211a5-a851-43bc-a1f0-b39d51857fb7", @@ -3163,7 +2665,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b", "metadata": {}, "outputs": [], @@ -3174,7 +2676,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "id": "1e42a790-b215-4107-a969-85005da06ebd", "metadata": {}, "outputs": [],