diff --git a/Spectacle/Exploration_spectacle.ipynb b/Spectacle/Exploration_spectacle.ipynb index 841d297..6324287 100644 --- a/Spectacle/Exploration_spectacle.ipynb +++ b/Spectacle/Exploration_spectacle.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 42, "id": "cca62d72-f809-41a9-bb06-1be7d6b09307", "metadata": {}, "outputs": [ @@ -42,7 +42,7 @@ " 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']" ] }, - "execution_count": 12, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -58,9 +58,423 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 49, + "id": "68fb54f3-8eb3-4cd0-966b-000876912fb5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idstreet_idstructure_idmcp_contact_idfidelitytenant_idis_partnerdeleted_atgenderis_email_true...max_priceticket_sumaverage_priceaverage_purchase_delayaverage_price_basketaverage_ticket_baskettotal_pricepurchase_countfirst_buying_datecountry
0821538139NaNNaN0875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
18091261063NaNNaN0875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNfr
2110051063NaNNaN0875FalseNaN2False...NaN00.0NaNNaNNaNNaN14NaNfr
31766312731NaNNaN0875FalseNaN0False...NaN00.0NaNNaNNaNNaN1NaNfr
43810012395NaNNaN0875FalseNaN0True...NaN00.0NaNNaNNaNNaN1NaNfr
..................................................................
98789766266139NaN181304.00875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
98790766336139NaN178189.00875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
98791766348139NaN178141.00875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
98792766363139NaN176807.00875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
98793766366139NaN176788.00875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
\n", + "

98794 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " customer_id street_id structure_id mcp_contact_id fidelity \\\n", + "0 821538 139 NaN NaN 0 \n", + "1 809126 1063 NaN NaN 0 \n", + "2 11005 1063 NaN NaN 0 \n", + "3 17663 12731 NaN NaN 0 \n", + "4 38100 12395 NaN NaN 0 \n", + "... ... ... ... ... ... \n", + "98789 766266 139 NaN 181304.0 0 \n", + "98790 766336 139 NaN 178189.0 0 \n", + "98791 766348 139 NaN 178141.0 0 \n", + "98792 766363 139 NaN 176807.0 0 \n", + "98793 766366 139 NaN 176788.0 0 \n", + "\n", + " tenant_id is_partner deleted_at gender is_email_true ... \\\n", + "0 875 False NaN 2 True ... \n", + "1 875 False NaN 2 True ... \n", + "2 875 False NaN 2 False ... \n", + "3 875 False NaN 0 False ... \n", + "4 875 False NaN 0 True ... \n", + "... ... ... ... ... ... ... \n", + "98789 875 False NaN 2 True ... \n", + "98790 875 False NaN 2 True ... \n", + "98791 875 False NaN 2 True ... \n", + "98792 875 False NaN 2 True ... \n", + "98793 875 False NaN 2 True ... \n", + "\n", + " max_price ticket_sum average_price average_purchase_delay \\\n", + "0 NaN 0 NaN NaN \n", + "1 NaN 0 NaN NaN \n", + "2 NaN 0 0.0 NaN \n", + "3 NaN 0 0.0 NaN \n", + "4 NaN 0 0.0 NaN \n", + "... ... ... ... ... \n", + "98789 NaN 0 NaN NaN \n", + "98790 NaN 0 NaN NaN \n", + "98791 NaN 0 NaN NaN \n", + "98792 NaN 0 NaN NaN \n", + "98793 NaN 0 NaN NaN \n", + "\n", + " average_price_basket average_ticket_basket total_price \\\n", + "0 NaN NaN 0.0 \n", + "1 NaN NaN 0.0 \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "98789 NaN NaN 0.0 \n", + "98790 NaN NaN 0.0 \n", + "98791 NaN NaN 0.0 \n", + "98792 NaN NaN 0.0 \n", + "98793 NaN NaN 0.0 \n", + "\n", + " purchase_count first_buying_date country \n", + "0 0 NaN NaN \n", + "1 0 NaN fr \n", + "2 14 NaN fr \n", + "3 1 NaN fr \n", + "4 1 NaN fr \n", + "... ... ... ... \n", + "98789 0 NaN NaN \n", + "98790 0 NaN NaN \n", + "98791 0 NaN NaN \n", + "98792 0 NaN NaN \n", + "98793 0 NaN NaN \n", + "\n", + "[98794 rows x 22 columns]" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "customerplus_cleaned" + ] + }, + { + "cell_type": "code", + "execution_count": 56, "id": "0e1ce56c-2e50-456c-ba97-ed4a699cc8d4", "metadata": {}, + "outputs": [], + "source": [ + "BUCKET = \"projet-bdc2324-team1\"\n", + "FILE_KEY_S3 = \"0_Input/Company_10/customerplus_cleaned.csv\"\n", + "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " df_customerplus_cleaned = pd.read_csv(file_in, sep=\",\")\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "bcdba447-90f7-450c-b4a3-6da656e38493", + "metadata": {}, "outputs": [ { "name": "stderr", @@ -83,7 +497,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 44, "id": "637aa400-f49a-4d8d-802a-868b241f8a9d", "metadata": {}, "outputs": [], @@ -98,7 +512,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 45, "id": "e60529b5-986f-4685-91e1-782c2b022e09", "metadata": {}, "outputs": [ @@ -254,7 +668,7 @@ "[69258 rows x 5 columns]" ] }, - "execution_count": 31, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -265,9 +679,172 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 46, "id": "6ece1bb3-5a2d-41f8-be96-eb70697881dc", "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":27: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idnb_campaignsnb_campaigns_openedtime_to_open
0294NaNNaT
1373NaNNaT
23941.00 days 05:16:38
34141.00 days 01:12:29
4444NaNNaT
...............
571388279401NaNNaT
571398279411NaNNaT
571408279421NaNNaT
571418279431NaNNaT
571428279441NaNNaT
\n", + "

57143 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " customer_id nb_campaigns nb_campaigns_opened time_to_open\n", + "0 29 4 NaN NaT\n", + "1 37 3 NaN NaT\n", + "2 39 4 1.0 0 days 05:16:38\n", + "3 41 4 1.0 0 days 01:12:29\n", + "4 44 4 NaN NaT\n", + "... ... ... ... ...\n", + "57138 827940 1 NaN NaT\n", + "57139 827941 1 NaN NaT\n", + "57140 827942 1 NaN NaT\n", + "57141 827943 1 NaN NaT\n", + "57142 827944 1 NaN NaT\n", + "\n", + "[57143 rows x 4 columns]" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "campaigns_kpi_function(campaigns)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "8c42f4a3-bdbc-44fe-a873-3192b983410d", + "metadata": {}, + "outputs": [], + "source": [ + "# KPI sur le comportement d'achat\n", + "df_tickets_kpi = tickets_kpi_function(purchases)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "df124880-1e4f-4eaf-b0ef-72bb4f840d45", + "metadata": {}, "outputs": [ { "data": { @@ -290,119 +867,389 @@ " \n", " \n", " \n", - " ticket_id\n", " customer_id\n", - " purchase_id\n", - " event_type_id\n", - " supplier_name\n", - " purchase_date\n", - " amount\n", - " is_full_price\n", - " name_event_types\n", - " name_facilities\n", - " name_categories\n", - " name_events\n", - " name_seasons\n", - " start_date_time\n", - " end_date_time\n", - " open\n", + " nb_tickets\n", + " nb_purchases\n", + " total_amount\n", + " nb_suppliers\n", + " vente_internet_max\n", + " purchase_date_min\n", + " purchase_date_max\n", + " time_between_purchase\n", + " nb_tickets_internet\n", " \n", " \n", " \n", " \n", " 0\n", - " 1799177\n", - " 36984\n", - " 409613\n", + " 19482\n", + " 88\n", + " 29\n", + " 872.0\n", " 2\n", - " guichet\n", - " 2016-04-28 15:58:26+00:00\n", - " 9.0\n", - " False\n", - " danse\n", - " le grand t\n", - " abo t gourmand jeune\n", - " aringa rossa\n", - " test 2016/2017\n", - " 2016-09-27 00:00:00+02:00\n", - " 1901-01-01 00:09:21+00:09\n", - " True\n", + " 1\n", + " 2643.092500\n", + " 718.149398\n", + " 1924.943102\n", + " 8.0\n", " \n", " \n", " 1\n", - " 1799178\n", - " 36984\n", - " 409613\n", + " 19484\n", " 3\n", - " guichet\n", - " 2016-04-28 15:58:26+00:00\n", - " 9.0\n", - " False\n", - " cirque\n", - " le grand t\n", - " abo t gourmand jeune\n", - " 5èmes hurlants\n", - " test 2016/2017\n", - " 2016-11-18 00:00:00+01:00\n", - " 1901-01-01 00:09:21+00:09\n", - " True\n", + " 2\n", + " 62.0\n", + " 1\n", + " 0\n", + " 1745.021736\n", + " 1743.045035\n", + " 1.976701\n", + " 0.0\n", " \n", " \n", " 2\n", - " 1799179\n", - " 36984\n", - " 409613\n", + " 19485\n", + " 131\n", + " 21\n", + " 1878.0\n", + " 2\n", " 1\n", - " guichet\n", - " 2016-04-28 15:58:26+00:00\n", - " 9.0\n", - " False\n", - " théâtre\n", - " le grand t\n", - " abo t gourmand jeune\n", - " dom juan\n", - " test 2016/2017\n", - " 2016-12-07 00:00:00+01:00\n", - " 1901-01-01 00:09:21+00:09\n", - " True\n", + " 2649.044745\n", + " 85.240845\n", + " 2563.803900\n", + " 84.0\n", " \n", " \n", " 3\n", - " 1799180\n", - " 36984\n", - " 409613\n", + " 19486\n", + " 10\n", + " 4\n", + " 96.0\n", " 1\n", - " guichet\n", - " 2016-04-28 15:58:26+00:00\n", - " 9.0\n", - " False\n", - " théâtre\n", - " le grand t\n", - " abo t gourmand jeune\n", - " vanishing point\n", - " test 2016/2017\n", - " 2017-01-04 00:00:00+01:00\n", - " 1901-01-01 00:09:21+00:09\n", - " True\n", + " 0\n", + " 1944.077604\n", + " 1742.794225\n", + " 201.283380\n", + " 0.0\n", " \n", " \n", " 4\n", - " 1799181\n", - " 36984\n", - " 409613\n", - " 3\n", - " guichet\n", - " 2016-04-28 15:58:26+00:00\n", + " 19487\n", + " 2\n", + " 1\n", + " 33.0\n", + " 1\n", + " 0\n", + " 1742.877766\n", + " 1742.877766\n", + " 0.000000\n", + " 0.0\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 26100\n", + " 824877\n", + " 1\n", + " 1\n", + " -12.0\n", + " 1\n", + " 0\n", + " 5.956111\n", + " 5.956111\n", + " 0.000000\n", + " 0.0\n", + " \n", + " \n", + " 26101\n", + " 824878\n", + " 1\n", + " 1\n", " 12.0\n", + " 1\n", + " 0\n", + " 5.956921\n", + " 5.956921\n", + " 0.000000\n", + " 0.0\n", + " \n", + " \n", + " 26102\n", + " 824879\n", + " 2\n", + " 1\n", + " -38.0\n", + " 1\n", + " 0\n", + " 5.226238\n", + " 5.226238\n", + " 0.000000\n", + " 0.0\n", + " \n", + " \n", + " 26103\n", + " 824991\n", + " 14\n", + " 3\n", + " -100.0\n", + " 1\n", + " 0\n", + " 3.021539\n", + " 3.017222\n", + " 0.004317\n", + " 0.0\n", + " \n", + " \n", + " 26104\n", + " 824998\n", + " 1\n", + " 1\n", + " 25.0\n", + " 1\n", + " 0\n", + " 0.072720\n", + " 0.072720\n", + " 0.000000\n", + " 0.0\n", + " \n", + " \n", + "\n", + "

26105 rows × 10 columns

\n", + "" + ], + "text/plain": [ + " customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n", + "0 19482 88 29 872.0 2 \n", + "1 19484 3 2 62.0 1 \n", + "2 19485 131 21 1878.0 2 \n", + "3 19486 10 4 96.0 1 \n", + "4 19487 2 1 33.0 1 \n", + "... ... ... ... ... ... \n", + "26100 824877 1 1 -12.0 1 \n", + "26101 824878 1 1 12.0 1 \n", + "26102 824879 2 1 -38.0 1 \n", + "26103 824991 14 3 -100.0 1 \n", + "26104 824998 1 1 25.0 1 \n", + "\n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 1 2643.092500 718.149398 \n", + "1 0 1745.021736 1743.045035 \n", + "2 1 2649.044745 85.240845 \n", + "3 0 1944.077604 1742.794225 \n", + "4 0 1742.877766 1742.877766 \n", + "... ... ... ... \n", + "26100 0 5.956111 5.956111 \n", + "26101 0 5.956921 5.956921 \n", + "26102 0 5.226238 5.226238 \n", + "26103 0 3.021539 3.017222 \n", + "26104 0 0.072720 0.072720 \n", + "\n", + " time_between_purchase nb_tickets_internet \n", + "0 1924.943102 8.0 \n", + "1 1.976701 0.0 \n", + "2 2563.803900 84.0 \n", + "3 201.283380 0.0 \n", + "4 0.000000 0.0 \n", + "... ... ... \n", + "26100 0.000000 0.0 \n", + "26101 0.000000 0.0 \n", + "26102 0.000000 0.0 \n", + "26103 0.004317 0.0 \n", + "26104 0.000000 0.0 \n", + "\n", + "[26105 rows x 10 columns]" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_tickets_kpi" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "4e8c0d75-117f-4400-8d55-b3ae3f43501b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -422,183 +1269,203 @@ " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
customer_idstreet_idstructure_idmcp_contact_idfidelitytenant_idis_partnerdeleted_atgenderis_email_true...total_pricepurchase_countfirst_buying_datecountrygender_labelgender_femalegender_malegender_othercountry_frhas_tags
0821538139NaNNaN0875Falsecirquela cite des congresabo t gourmand jeunea o lang photest 2016/20172017-01-03 00:00:00+01:001901-01-01 00:09:21+00:09NaN2True...0.00NaNNaNother001NaN0
18091261063NaNNaN0875FalseNaN2True...0.00NaNfrother0011.00
2110051063NaNNaN0875FalseNaN2False...NaN14NaNfrother0011.00
31766312731NaNNaN0875FalseNaN0False...NaN1NaNfrfemale1001.00
43810012395NaNNaN0875FalseNaN0True...NaN1NaNfrfemale1001.00
...........................
49230932522326217167100621guichet2023-03-09 11:08:45+00:007.098789766266139NaN181304.00875Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09NaN2True...0.00NaNNaNother001NaN0
49231032522336217167100621guichet2023-03-09 11:08:45+00:007.098790766336139NaN178189.00875Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09NaN2True...0.00NaNNaNother001NaN0
49231132522346217167100621guichet2023-03-09 11:08:45+00:007.098791766348139NaN178141.00875Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09NaN2True...0.00NaNNaNother001NaN0
49231232522356217167100621guichet2023-03-09 11:08:45+00:007.098792766363139NaN176807.00875Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09NaN2True...0.00NaNNaNother001NaN0
49231332522366217167100621guichet2023-03-09 11:08:45+00:007.098793766366139NaN176788.00875Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09NaN2True...0.00NaNNaNother001NaN0
\n", - "

492314 rows × 16 columns

\n", + "

98794 rows × 28 columns

\n", "
" ], "text/plain": [ - " ticket_id customer_id purchase_id event_type_id supplier_name \\\n", - "0 1799177 36984 409613 2 guichet \n", - "1 1799178 36984 409613 3 guichet \n", - "2 1799179 36984 409613 1 guichet \n", - "3 1799180 36984 409613 1 guichet \n", - "4 1799181 36984 409613 3 guichet \n", - "... ... ... ... ... ... \n", - "492309 3252232 621716 710062 1 guichet \n", - "492310 3252233 621716 710062 1 guichet \n", - "492311 3252234 621716 710062 1 guichet \n", - "492312 3252235 621716 710062 1 guichet \n", - "492313 3252236 621716 710062 1 guichet \n", + " customer_id street_id structure_id mcp_contact_id fidelity \\\n", + "0 821538 139 NaN NaN 0 \n", + "1 809126 1063 NaN NaN 0 \n", + "2 11005 1063 NaN NaN 0 \n", + "3 17663 12731 NaN NaN 0 \n", + "4 38100 12395 NaN NaN 0 \n", + "... ... ... ... ... ... \n", + "98789 766266 139 NaN 181304.0 0 \n", + "98790 766336 139 NaN 178189.0 0 \n", + "98791 766348 139 NaN 178141.0 0 \n", + "98792 766363 139 NaN 176807.0 0 \n", + "98793 766366 139 NaN 176788.0 0 \n", "\n", - " purchase_date amount is_full_price name_event_types \\\n", - "0 2016-04-28 15:58:26+00:00 9.0 False danse \n", - "1 2016-04-28 15:58:26+00:00 9.0 False cirque \n", - "2 2016-04-28 15:58:26+00:00 9.0 False théâtre \n", - "3 2016-04-28 15:58:26+00:00 9.0 False théâtre \n", - "4 2016-04-28 15:58:26+00:00 12.0 False cirque \n", - "... ... ... ... ... \n", - "492309 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", - "492310 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", - "492311 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", - "492312 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", - "492313 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", + " tenant_id is_partner deleted_at gender is_email_true ... \\\n", + "0 875 False NaN 2 True ... \n", + "1 875 False NaN 2 True ... \n", + "2 875 False NaN 2 False ... \n", + "3 875 False NaN 0 False ... \n", + "4 875 False NaN 0 True ... \n", + "... ... ... ... ... ... ... \n", + "98789 875 False NaN 2 True ... \n", + "98790 875 False NaN 2 True ... \n", + "98791 875 False NaN 2 True ... \n", + "98792 875 False NaN 2 True ... \n", + "98793 875 False NaN 2 True ... \n", "\n", - " name_facilities name_categories \\\n", - "0 le grand t abo t gourmand jeune \n", - "1 le grand t abo t gourmand jeune \n", - "2 le grand t abo t gourmand jeune \n", - "3 le grand t abo t gourmand jeune \n", - "4 la cite des congres abo t gourmand jeune \n", - "... ... ... \n", - "492309 cap nort tarif sco co 1 seance scolaire \n", - "492310 cap nort tarif sco co 1 seance scolaire \n", - "492311 cap nort tarif sco co 1 seance scolaire \n", - "492312 cap nort tarif sco co 1 seance scolaire \n", - "492313 cap nort tarif sco co 1 seance scolaire \n", + " total_price purchase_count first_buying_date country gender_label \\\n", + "0 0.0 0 NaN NaN other \n", + "1 0.0 0 NaN fr other \n", + "2 NaN 14 NaN fr other \n", + "3 NaN 1 NaN fr female \n", + "4 NaN 1 NaN fr female \n", + "... ... ... ... ... ... \n", + "98789 0.0 0 NaN NaN other \n", + "98790 0.0 0 NaN NaN other \n", + "98791 0.0 0 NaN NaN other \n", + "98792 0.0 0 NaN NaN other \n", + "98793 0.0 0 NaN NaN other \n", "\n", - " name_events name_seasons start_date_time \\\n", - "0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n", - "1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n", - "2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n", - "3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n", - "4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n", - "... ... ... ... \n", - "492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", - "492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", - "492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", - "492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", - "492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", + " gender_female gender_male gender_other country_fr has_tags \n", + "0 0 0 1 NaN 0 \n", + "1 0 0 1 1.0 0 \n", + "2 0 0 1 1.0 0 \n", + "3 1 0 0 1.0 0 \n", + "4 1 0 0 1.0 0 \n", + "... ... ... ... ... ... \n", + "98789 0 0 1 NaN 0 \n", + "98790 0 0 1 NaN 0 \n", + "98791 0 0 1 NaN 0 \n", + "98792 0 0 1 NaN 0 \n", + "98793 0 0 1 NaN 0 \n", "\n", - " end_date_time open \n", - "0 1901-01-01 00:09:21+00:09 True \n", - "1 1901-01-01 00:09:21+00:09 True \n", - "2 1901-01-01 00:09:21+00:09 True \n", - "3 1901-01-01 00:09:21+00:09 True \n", - "4 1901-01-01 00:09:21+00:09 True \n", - "... ... ... \n", - "492309 1901-01-01 00:09:21+00:09 True \n", - "492310 1901-01-01 00:09:21+00:09 True \n", - "492311 1901-01-01 00:09:21+00:09 True \n", - "492312 1901-01-01 00:09:21+00:09 True \n", - "492313 1901-01-01 00:09:21+00:09 True \n", - "\n", - "[492314 rows x 16 columns]" + "[98794 rows x 28 columns]" ] }, - "execution_count": 25, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "purchases" + " # KPI sur les données socio-démographiques\n", + "df_customerplus_clean = customerplus_kpi_function(df_customerplus_cleaned)\n", + " \n", + "df_customerplus_clean" ] }, { @@ -811,9 +1678,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "c90d94ab-cf0e-4d18-9d5e-cb1d22f4d58b", "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "f-string: expecting '}' (1665996669.py, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m Cell \u001b[0;32mIn[40], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{'musee'}'\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m f-string: expecting '}'\n" + ] + } + ], + "source": [ + "BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{'musee'}'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6767ba6-94ef-43f9-8f67-15ecdb41a70b", + "metadata": {}, "outputs": [], "source": [] }