diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb index aaaa80a..5077370 100644 --- a/0_Cleaning_and_merge.ipynb +++ b/0_Cleaning_and_merge.ipynb @@ -1702,7 +1702,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 69, "id": "043303fe-e90f-4689-a2a9-5d690555a045", "metadata": {}, "outputs": [], @@ -1718,6 +1718,12 @@ " # Proportion de vente en ligne\n", " prop_vente_internet = tickets_information_copy[tickets_information_copy['vente_internet'] == 1].groupby(['customer_id', 'event_type_id'])['ticket_id'].count().reset_index()\n", " prop_vente_internet.rename(columns = {'ticket_id' : 'nb_tickets_internet'}, inplace = True)\n", + "\n", + " # Average amount\n", + " avg_amount = (tickets_information_copy.groupby([\"event_type_id\", 'name_event_types'])\n", + " .agg({\"amount\" : \"mean\"}).reset_index()\n", + " .rename(columns = {'amount' : 'avg_amount'}))\n", + "\n", " \n", " tickets_kpi = (tickets_information_copy[['event_type_id', 'customer_id', 'purchase_id' ,'ticket_id','supplier_name', 'purchase_date', 'amount', 'vente_internet']]\n", " .groupby(['customer_id', 'event_type_id']) \n", @@ -1751,15 +1757,15 @@ " tickets_kpi = tickets_kpi.merge(prop_vente_internet, on = ['customer_id', 'event_type_id'], how = 'left')\n", " tickets_kpi['nb_tickets_internet'] = tickets_kpi['nb_tickets_internet'].fillna(0)\n", "\n", - " \n", - " \n", + " tickets_kpi = tickets_kpi.merge(avg_amount, how='left', on= 'event_type_id')\n", + "\n", " return tickets_kpi\n", " " ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 70, "id": "5882234a-1ed5-4269-87a6-0d75613476e3", "metadata": {}, "outputs": [], @@ -1938,800 +1944,6 @@ "## Alexis' work" ] }, - { - "cell_type": "code", - "execution_count": 39, - "id": "273857e0-7112-4294-8ba6-3c39c5cbc13a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idevent_type_idnb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internet
0123842261947902686540.5713262.1908684.1793063258.01156251.0
1144532422289453248965.5613698.1982295.2218403692.9763892988.0
2152017501071101459190.0613803.3697920.1463313803.2234619.0
3162173561117861435871.5512502.7155091408.7155321093.9999775.0
4221431430.0102041.2745491340.308160700.9663890.0
\n", - "
" - ], - "text/plain": [ - " customer_id event_type_id nb_tickets nb_purchases total_amount \\\n", - "0 1 2 384226 194790 2686540.5 \n", - "1 1 4 453242 228945 3248965.5 \n", - "2 1 5 201750 107110 1459190.0 \n", - "3 1 6 217356 111786 1435871.5 \n", - "4 2 2 143 143 0.0 \n", - "\n", - " nb_suppliers vente_internet_max purchase_date_min purchase_date_max \\\n", - "0 7 1 3262.190868 4.179306 \n", - "1 6 1 3698.198229 5.221840 \n", - "2 6 1 3803.369792 0.146331 \n", - "3 5 1 2502.715509 1408.715532 \n", - "4 1 0 2041.274549 1340.308160 \n", - "\n", - " time_between_purchase nb_tickets_internet \n", - "0 3258.011562 51.0 \n", - "1 3692.976389 2988.0 \n", - "2 3803.223461 9.0 \n", - "3 1093.999977 5.0 \n", - "4 700.966389 0.0 " - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_tickets_kpi.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "449731f3-340f-4648-8210-4622c7dbc174", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
event_type_idname_event_typesavg_amount
02offre muséale individuel6.150659
14spectacle vivant7.762474
25offre muséale groupe4.452618
36formule adhésion6.439463
\n", - "
" - ], - "text/plain": [ - " event_type_id name_event_types avg_amount\n", - "0 2 offre muséale individuel 6.150659\n", - "1 4 spectacle vivant 7.762474\n", - "2 5 offre muséale groupe 4.452618\n", - "3 6 formule adhésion 6.439463" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "avg_amount = (df1_products_purchased_reduced.groupby([\"event_type_id\", 'name_event_types'])\n", - " .agg({\"amount\" : \"mean\"}).reset_index()\n", - " .rename(columns = {'amount' : 'avg_amount'}))\n", - "\n", - "avg_amount" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "b54bd9e8-3cad-453b-8e58-bf6d047912eb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idevent_type_idnb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetname_event_typesavg_amount
0123842261947902686540.5713262.1908684.1793063258.01156251.0offre muséale individuel6.150659
1144532422289453248965.5613698.1982295.2218403692.9763892988.0spectacle vivant7.762474
2152017501071101459190.0613803.3697920.1463313803.2234619.0offre muséale groupe4.452618
3162173561117861435871.5512502.7155091408.7155321093.9999775.0formule adhésion6.439463
4221431430.0102041.2745491340.308160700.9663890.0offre muséale individuel6.150659
\n", - "
" - ], - "text/plain": [ - " customer_id event_type_id nb_tickets nb_purchases total_amount \\\n", - "0 1 2 384226 194790 2686540.5 \n", - "1 1 4 453242 228945 3248965.5 \n", - "2 1 5 201750 107110 1459190.0 \n", - "3 1 6 217356 111786 1435871.5 \n", - "4 2 2 143 143 0.0 \n", - "\n", - " nb_suppliers vente_internet_max purchase_date_min purchase_date_max \\\n", - "0 7 1 3262.190868 4.179306 \n", - "1 6 1 3698.198229 5.221840 \n", - "2 6 1 3803.369792 0.146331 \n", - "3 5 1 2502.715509 1408.715532 \n", - "4 1 0 2041.274549 1340.308160 \n", - "\n", - " time_between_purchase nb_tickets_internet name_event_types \\\n", - "0 3258.011562 51.0 offre muséale individuel \n", - "1 3692.976389 2988.0 spectacle vivant \n", - "2 3803.223461 9.0 offre muséale groupe \n", - "3 1093.999977 5.0 formule adhésion \n", - "4 700.966389 0.0 offre muséale individuel \n", - "\n", - " avg_amount \n", - "0 6.150659 \n", - "1 7.762474 \n", - "2 4.452618 \n", - "3 6.439463 \n", - "4 6.150659 " - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_tickets_kpi = df1_tickets_kpi.merge(avg_amount, how='left', on= 'event_type_id')\n", - "df1_tickets_kpi.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "2d6afe74-2517-478b-a99c-da9c7bd2edd4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguage...fidelityaverage_purchase_delayaverage_price_basketaverage_ticket_baskettotal_pricepurchase_countfirst_buying_datecountryagetenant_id
012751NaN2False1TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
112825NaN2False2TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
211261NaN2False1TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
313071NaN2False2TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
4653061NaN10False2TrueFalseNaNNaNNaN...0NaNNaNNaNNaN0NaTNaNNaN1311
..................................................................
151861295252NaN10False2TrueFalseNaNNaNNaN...0NaNNaNNaNNaN0NaTNaNNaN1311
151862295271NaN10False2TrueFalseNaNNaNNaN...0NaNNaNNaNNaN0NaTNaNNaN1311
151863295275NaN10False2TrueFalseNaNNaNNaN...0NaNNaNNaNNaN0NaTNaNNaN1311
151864295366NaN2False2TrueFalseNaNNaNNaN...13.033.03.033.012021-05-26 17:20:37+00:00frNaN1311
151865295368NaN2False2TrueFalseNaNNaNNaN...16.022.02.022.012021-05-26 17:35:38+00:00frNaN1311
\n", - "

151866 rows × 25 columns

\n", - "
" - ], - "text/plain": [ - " customer_id birthdate street_id is_partner gender is_email_true \\\n", - "0 12751 NaN 2 False 1 True \n", - "1 12825 NaN 2 False 2 True \n", - "2 11261 NaN 2 False 1 True \n", - "3 13071 NaN 2 False 2 True \n", - "4 653061 NaN 10 False 2 True \n", - "... ... ... ... ... ... ... \n", - "151861 295252 NaN 10 False 2 True \n", - "151862 295271 NaN 10 False 2 True \n", - "151863 295275 NaN 10 False 2 True \n", - "151864 295366 NaN 2 False 2 True \n", - "151865 295368 NaN 2 False 2 True \n", - "\n", - " opt_in structure_id profession language ... fidelity \\\n", - "0 True NaN NaN NaN ... 0 \n", - "1 True NaN NaN NaN ... 0 \n", - "2 True NaN NaN NaN ... 0 \n", - "3 True NaN NaN NaN ... 0 \n", - "4 False NaN NaN NaN ... 0 \n", - "... ... ... ... ... ... ... \n", - "151861 False NaN NaN NaN ... 0 \n", - "151862 False NaN NaN NaN ... 0 \n", - "151863 False NaN NaN NaN ... 0 \n", - "151864 False NaN NaN NaN ... 1 \n", - "151865 False NaN NaN NaN ... 1 \n", - "\n", - " average_purchase_delay average_price_basket average_ticket_basket \\\n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "... ... ... ... \n", - "151861 NaN NaN NaN \n", - "151862 NaN NaN NaN \n", - "151863 NaN NaN NaN \n", - "151864 3.0 33.0 3.0 \n", - "151865 6.0 22.0 2.0 \n", - "\n", - " total_price purchase_count first_buying_date country age \\\n", - "0 NaN 0 NaT fr NaN \n", - "1 NaN 0 NaT fr NaN \n", - "2 NaN 0 NaT fr NaN \n", - "3 NaN 0 NaT fr NaN \n", - "4 NaN 0 NaT NaN NaN \n", - "... ... ... ... ... ... \n", - "151861 NaN 0 NaT NaN NaN \n", - "151862 NaN 0 NaT NaN NaN \n", - "151863 NaN 0 NaT NaN NaN \n", - "151864 33.0 1 2021-05-26 17:20:37+00:00 fr NaN \n", - "151865 22.0 1 2021-05-26 17:35:38+00:00 fr NaN \n", - "\n", - " tenant_id \n", - "0 1311 \n", - "1 1311 \n", - "2 1311 \n", - "3 1311 \n", - "4 1311 \n", - "... ... \n", - "151861 1311 \n", - "151862 1311 \n", - "151863 1311 \n", - "151864 1311 \n", - "151865 1311 \n", - "\n", - "[151866 rows x 25 columns]" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_customerplus_clean" - ] - }, { "cell_type": "code", "execution_count": 43,