diff --git a/Spectacle/Exploration_spectacle.ipynb b/Spectacle/Exploration_spectacle.ipynb
index 841d297..6324287 100644
--- a/Spectacle/Exploration_spectacle.ipynb
+++ b/Spectacle/Exploration_spectacle.ipynb
@@ -29,7 +29,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 42,
"id": "cca62d72-f809-41a9-bb06-1be7d6b09307",
"metadata": {},
"outputs": [
@@ -42,7 +42,7 @@
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
- "execution_count": 12,
+ "execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
@@ -58,9 +58,423 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 49,
+ "id": "68fb54f3-8eb3-4cd0-966b-000876912fb5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " street_id | \n",
+ " structure_id | \n",
+ " mcp_contact_id | \n",
+ " fidelity | \n",
+ " tenant_id | \n",
+ " is_partner | \n",
+ " deleted_at | \n",
+ " gender | \n",
+ " is_email_true | \n",
+ " ... | \n",
+ " max_price | \n",
+ " ticket_sum | \n",
+ " average_price | \n",
+ " average_purchase_delay | \n",
+ " average_price_basket | \n",
+ " average_ticket_basket | \n",
+ " total_price | \n",
+ " purchase_count | \n",
+ " first_buying_date | \n",
+ " country | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 821538 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " True | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 809126 | \n",
+ " 1063 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " True | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " fr | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11005 | \n",
+ " 1063 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 14 | \n",
+ " NaN | \n",
+ " fr | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 17663 | \n",
+ " 12731 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " fr | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 38100 | \n",
+ " 12395 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " True | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " fr | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 98789 | \n",
+ " 766266 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " 181304.0 | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " True | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 98790 | \n",
+ " 766336 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " 178189.0 | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " True | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 98791 | \n",
+ " 766348 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " 178141.0 | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " True | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 98792 | \n",
+ " 766363 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " 176807.0 | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " True | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 98793 | \n",
+ " 766366 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " 176788.0 | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " True | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
98794 rows × 22 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id street_id structure_id mcp_contact_id fidelity \\\n",
+ "0 821538 139 NaN NaN 0 \n",
+ "1 809126 1063 NaN NaN 0 \n",
+ "2 11005 1063 NaN NaN 0 \n",
+ "3 17663 12731 NaN NaN 0 \n",
+ "4 38100 12395 NaN NaN 0 \n",
+ "... ... ... ... ... ... \n",
+ "98789 766266 139 NaN 181304.0 0 \n",
+ "98790 766336 139 NaN 178189.0 0 \n",
+ "98791 766348 139 NaN 178141.0 0 \n",
+ "98792 766363 139 NaN 176807.0 0 \n",
+ "98793 766366 139 NaN 176788.0 0 \n",
+ "\n",
+ " tenant_id is_partner deleted_at gender is_email_true ... \\\n",
+ "0 875 False NaN 2 True ... \n",
+ "1 875 False NaN 2 True ... \n",
+ "2 875 False NaN 2 False ... \n",
+ "3 875 False NaN 0 False ... \n",
+ "4 875 False NaN 0 True ... \n",
+ "... ... ... ... ... ... ... \n",
+ "98789 875 False NaN 2 True ... \n",
+ "98790 875 False NaN 2 True ... \n",
+ "98791 875 False NaN 2 True ... \n",
+ "98792 875 False NaN 2 True ... \n",
+ "98793 875 False NaN 2 True ... \n",
+ "\n",
+ " max_price ticket_sum average_price average_purchase_delay \\\n",
+ "0 NaN 0 NaN NaN \n",
+ "1 NaN 0 NaN NaN \n",
+ "2 NaN 0 0.0 NaN \n",
+ "3 NaN 0 0.0 NaN \n",
+ "4 NaN 0 0.0 NaN \n",
+ "... ... ... ... ... \n",
+ "98789 NaN 0 NaN NaN \n",
+ "98790 NaN 0 NaN NaN \n",
+ "98791 NaN 0 NaN NaN \n",
+ "98792 NaN 0 NaN NaN \n",
+ "98793 NaN 0 NaN NaN \n",
+ "\n",
+ " average_price_basket average_ticket_basket total_price \\\n",
+ "0 NaN NaN 0.0 \n",
+ "1 NaN NaN 0.0 \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "98789 NaN NaN 0.0 \n",
+ "98790 NaN NaN 0.0 \n",
+ "98791 NaN NaN 0.0 \n",
+ "98792 NaN NaN 0.0 \n",
+ "98793 NaN NaN 0.0 \n",
+ "\n",
+ " purchase_count first_buying_date country \n",
+ "0 0 NaN NaN \n",
+ "1 0 NaN fr \n",
+ "2 14 NaN fr \n",
+ "3 1 NaN fr \n",
+ "4 1 NaN fr \n",
+ "... ... ... ... \n",
+ "98789 0 NaN NaN \n",
+ "98790 0 NaN NaN \n",
+ "98791 0 NaN NaN \n",
+ "98792 0 NaN NaN \n",
+ "98793 0 NaN NaN \n",
+ "\n",
+ "[98794 rows x 22 columns]"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "customerplus_cleaned"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
"id": "0e1ce56c-2e50-456c-ba97-ed4a699cc8d4",
"metadata": {},
+ "outputs": [],
+ "source": [
+ "BUCKET = \"projet-bdc2324-team1\"\n",
+ "FILE_KEY_S3 = \"0_Input/Company_10/customerplus_cleaned.csv\"\n",
+ "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
+ "\n",
+ "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
+ " df_customerplus_cleaned = pd.read_csv(file_in, sep=\",\")\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "bcdba447-90f7-450c-b4a3-6da656e38493",
+ "metadata": {},
"outputs": [
{
"name": "stderr",
@@ -83,7 +497,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 44,
"id": "637aa400-f49a-4d8d-802a-868b241f8a9d",
"metadata": {},
"outputs": [],
@@ -98,7 +512,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 45,
"id": "e60529b5-986f-4685-91e1-782c2b022e09",
"metadata": {},
"outputs": [
@@ -254,7 +668,7 @@
"[69258 rows x 5 columns]"
]
},
- "execution_count": 31,
+ "execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
@@ -265,9 +679,172 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 46,
"id": "6ece1bb3-5a2d-41f8-be96-eb70697881dc",
"metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ ":27: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 29 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 37 | \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 39 | \n",
+ " 4 | \n",
+ " 1.0 | \n",
+ " 0 days 05:16:38 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 41 | \n",
+ " 4 | \n",
+ " 1.0 | \n",
+ " 0 days 01:12:29 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 44 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 57138 | \n",
+ " 827940 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 57139 | \n",
+ " 827941 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 57140 | \n",
+ " 827942 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 57141 | \n",
+ " 827943 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 57142 | \n",
+ " 827944 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
57143 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id nb_campaigns nb_campaigns_opened time_to_open\n",
+ "0 29 4 NaN NaT\n",
+ "1 37 3 NaN NaT\n",
+ "2 39 4 1.0 0 days 05:16:38\n",
+ "3 41 4 1.0 0 days 01:12:29\n",
+ "4 44 4 NaN NaT\n",
+ "... ... ... ... ...\n",
+ "57138 827940 1 NaN NaT\n",
+ "57139 827941 1 NaN NaT\n",
+ "57140 827942 1 NaN NaT\n",
+ "57141 827943 1 NaN NaT\n",
+ "57142 827944 1 NaN NaT\n",
+ "\n",
+ "[57143 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "campaigns_kpi_function(campaigns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "8c42f4a3-bdbc-44fe-a873-3192b983410d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# KPI sur le comportement d'achat\n",
+ "df_tickets_kpi = tickets_kpi_function(purchases)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "df124880-1e4f-4eaf-b0ef-72bb4f840d45",
+ "metadata": {},
"outputs": [
{
"data": {
@@ -290,119 +867,389 @@
" \n",
" \n",
" | \n",
- " ticket_id | \n",
" customer_id | \n",
- " purchase_id | \n",
- " event_type_id | \n",
- " supplier_name | \n",
- " purchase_date | \n",
- " amount | \n",
- " is_full_price | \n",
- " name_event_types | \n",
- " name_facilities | \n",
- " name_categories | \n",
- " name_events | \n",
- " name_seasons | \n",
- " start_date_time | \n",
- " end_date_time | \n",
- " open | \n",
+ " nb_tickets | \n",
+ " nb_purchases | \n",
+ " total_amount | \n",
+ " nb_suppliers | \n",
+ " vente_internet_max | \n",
+ " purchase_date_min | \n",
+ " purchase_date_max | \n",
+ " time_between_purchase | \n",
+ " nb_tickets_internet | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " 1799177 | \n",
- " 36984 | \n",
- " 409613 | \n",
+ " 19482 | \n",
+ " 88 | \n",
+ " 29 | \n",
+ " 872.0 | \n",
" 2 | \n",
- " guichet | \n",
- " 2016-04-28 15:58:26+00:00 | \n",
- " 9.0 | \n",
- " False | \n",
- " danse | \n",
- " le grand t | \n",
- " abo t gourmand jeune | \n",
- " aringa rossa | \n",
- " test 2016/2017 | \n",
- " 2016-09-27 00:00:00+02:00 | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " True | \n",
+ " 1 | \n",
+ " 2643.092500 | \n",
+ " 718.149398 | \n",
+ " 1924.943102 | \n",
+ " 8.0 | \n",
"
\n",
" \n",
" 1 | \n",
- " 1799178 | \n",
- " 36984 | \n",
- " 409613 | \n",
+ " 19484 | \n",
" 3 | \n",
- " guichet | \n",
- " 2016-04-28 15:58:26+00:00 | \n",
- " 9.0 | \n",
- " False | \n",
- " cirque | \n",
- " le grand t | \n",
- " abo t gourmand jeune | \n",
- " 5èmes hurlants | \n",
- " test 2016/2017 | \n",
- " 2016-11-18 00:00:00+01:00 | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " True | \n",
+ " 2 | \n",
+ " 62.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1745.021736 | \n",
+ " 1743.045035 | \n",
+ " 1.976701 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
- " 1799179 | \n",
- " 36984 | \n",
- " 409613 | \n",
+ " 19485 | \n",
+ " 131 | \n",
+ " 21 | \n",
+ " 1878.0 | \n",
+ " 2 | \n",
" 1 | \n",
- " guichet | \n",
- " 2016-04-28 15:58:26+00:00 | \n",
- " 9.0 | \n",
- " False | \n",
- " théâtre | \n",
- " le grand t | \n",
- " abo t gourmand jeune | \n",
- " dom juan | \n",
- " test 2016/2017 | \n",
- " 2016-12-07 00:00:00+01:00 | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " True | \n",
+ " 2649.044745 | \n",
+ " 85.240845 | \n",
+ " 2563.803900 | \n",
+ " 84.0 | \n",
"
\n",
" \n",
" 3 | \n",
- " 1799180 | \n",
- " 36984 | \n",
- " 409613 | \n",
+ " 19486 | \n",
+ " 10 | \n",
+ " 4 | \n",
+ " 96.0 | \n",
" 1 | \n",
- " guichet | \n",
- " 2016-04-28 15:58:26+00:00 | \n",
- " 9.0 | \n",
- " False | \n",
- " théâtre | \n",
- " le grand t | \n",
- " abo t gourmand jeune | \n",
- " vanishing point | \n",
- " test 2016/2017 | \n",
- " 2017-01-04 00:00:00+01:00 | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " True | \n",
+ " 0 | \n",
+ " 1944.077604 | \n",
+ " 1742.794225 | \n",
+ " 201.283380 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
- " 1799181 | \n",
- " 36984 | \n",
- " 409613 | \n",
- " 3 | \n",
- " guichet | \n",
- " 2016-04-28 15:58:26+00:00 | \n",
+ " 19487 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 33.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1742.877766 | \n",
+ " 1742.877766 | \n",
+ " 0.000000 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 26100 | \n",
+ " 824877 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " -12.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 5.956111 | \n",
+ " 5.956111 | \n",
+ " 0.000000 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 26101 | \n",
+ " 824878 | \n",
+ " 1 | \n",
+ " 1 | \n",
" 12.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 5.956921 | \n",
+ " 5.956921 | \n",
+ " 0.000000 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 26102 | \n",
+ " 824879 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " -38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 5.226238 | \n",
+ " 5.226238 | \n",
+ " 0.000000 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 26103 | \n",
+ " 824991 | \n",
+ " 14 | \n",
+ " 3 | \n",
+ " -100.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3.021539 | \n",
+ " 3.017222 | \n",
+ " 0.004317 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 26104 | \n",
+ " 824998 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 25.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0.072720 | \n",
+ " 0.072720 | \n",
+ " 0.000000 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "\n",
+ "26105 rows × 10 columns
\n",
+ ""
+ ],
+ "text/plain": [
+ " customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
+ "0 19482 88 29 872.0 2 \n",
+ "1 19484 3 2 62.0 1 \n",
+ "2 19485 131 21 1878.0 2 \n",
+ "3 19486 10 4 96.0 1 \n",
+ "4 19487 2 1 33.0 1 \n",
+ "... ... ... ... ... ... \n",
+ "26100 824877 1 1 -12.0 1 \n",
+ "26101 824878 1 1 12.0 1 \n",
+ "26102 824879 2 1 -38.0 1 \n",
+ "26103 824991 14 3 -100.0 1 \n",
+ "26104 824998 1 1 25.0 1 \n",
+ "\n",
+ " vente_internet_max purchase_date_min purchase_date_max \\\n",
+ "0 1 2643.092500 718.149398 \n",
+ "1 0 1745.021736 1743.045035 \n",
+ "2 1 2649.044745 85.240845 \n",
+ "3 0 1944.077604 1742.794225 \n",
+ "4 0 1742.877766 1742.877766 \n",
+ "... ... ... ... \n",
+ "26100 0 5.956111 5.956111 \n",
+ "26101 0 5.956921 5.956921 \n",
+ "26102 0 5.226238 5.226238 \n",
+ "26103 0 3.021539 3.017222 \n",
+ "26104 0 0.072720 0.072720 \n",
+ "\n",
+ " time_between_purchase nb_tickets_internet \n",
+ "0 1924.943102 8.0 \n",
+ "1 1.976701 0.0 \n",
+ "2 2563.803900 84.0 \n",
+ "3 201.283380 0.0 \n",
+ "4 0.000000 0.0 \n",
+ "... ... ... \n",
+ "26100 0.000000 0.0 \n",
+ "26101 0.000000 0.0 \n",
+ "26102 0.000000 0.0 \n",
+ "26103 0.004317 0.0 \n",
+ "26104 0.000000 0.0 \n",
+ "\n",
+ "[26105 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_tickets_kpi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "4e8c0d75-117f-4400-8d55-b3ae3f43501b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " street_id | \n",
+ " structure_id | \n",
+ " mcp_contact_id | \n",
+ " fidelity | \n",
+ " tenant_id | \n",
+ " is_partner | \n",
+ " deleted_at | \n",
+ " gender | \n",
+ " is_email_true | \n",
+ " ... | \n",
+ " total_price | \n",
+ " purchase_count | \n",
+ " first_buying_date | \n",
+ " country | \n",
+ " gender_label | \n",
+ " gender_female | \n",
+ " gender_male | \n",
+ " gender_other | \n",
+ " country_fr | \n",
+ " has_tags | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 821538 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 875 | \n",
" False | \n",
- " cirque | \n",
- " la cite des congres | \n",
- " abo t gourmand jeune | \n",
- " a o lang pho | \n",
- " test 2016/2017 | \n",
- " 2017-01-03 00:00:00+01:00 | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
+ " NaN | \n",
+ " 2 | \n",
" True | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " other | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 809126 | \n",
+ " 1063 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " True | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " fr | \n",
+ " other | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11005 | \n",
+ " 1063 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 14 | \n",
+ " NaN | \n",
+ " fr | \n",
+ " other | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 17663 | \n",
+ " 12731 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " fr | \n",
+ " female | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 38100 | \n",
+ " 12395 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 875 | \n",
+ " False | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " True | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " fr | \n",
+ " female | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
"
\n",
" \n",
" ... | \n",
@@ -422,183 +1269,203 @@
" ... | \n",
" ... | \n",
" ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
"
\n",
" \n",
- " 492309 | \n",
- " 3252232 | \n",
- " 621716 | \n",
- " 710062 | \n",
- " 1 | \n",
- " guichet | \n",
- " 2023-03-09 11:08:45+00:00 | \n",
- " 7.0 | \n",
+ " 98789 | \n",
+ " 766266 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " 181304.0 | \n",
+ " 0 | \n",
+ " 875 | \n",
" False | \n",
- " théâtre | \n",
- " cap nort | \n",
- " tarif sco co 1 seance scolaire | \n",
- " sur moi, le temps | \n",
- " 2022/2023 | \n",
- " 2023-03-13 14:00:00+01:00 | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
+ " NaN | \n",
+ " 2 | \n",
" True | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " other | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 492310 | \n",
- " 3252233 | \n",
- " 621716 | \n",
- " 710062 | \n",
- " 1 | \n",
- " guichet | \n",
- " 2023-03-09 11:08:45+00:00 | \n",
- " 7.0 | \n",
+ " 98790 | \n",
+ " 766336 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " 178189.0 | \n",
+ " 0 | \n",
+ " 875 | \n",
" False | \n",
- " théâtre | \n",
- " cap nort | \n",
- " tarif sco co 1 seance scolaire | \n",
- " sur moi, le temps | \n",
- " 2022/2023 | \n",
- " 2023-03-13 14:00:00+01:00 | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
+ " NaN | \n",
+ " 2 | \n",
" True | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " other | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 492311 | \n",
- " 3252234 | \n",
- " 621716 | \n",
- " 710062 | \n",
- " 1 | \n",
- " guichet | \n",
- " 2023-03-09 11:08:45+00:00 | \n",
- " 7.0 | \n",
+ " 98791 | \n",
+ " 766348 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " 178141.0 | \n",
+ " 0 | \n",
+ " 875 | \n",
" False | \n",
- " théâtre | \n",
- " cap nort | \n",
- " tarif sco co 1 seance scolaire | \n",
- " sur moi, le temps | \n",
- " 2022/2023 | \n",
- " 2023-03-13 14:00:00+01:00 | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
+ " NaN | \n",
+ " 2 | \n",
" True | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " other | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 492312 | \n",
- " 3252235 | \n",
- " 621716 | \n",
- " 710062 | \n",
- " 1 | \n",
- " guichet | \n",
- " 2023-03-09 11:08:45+00:00 | \n",
- " 7.0 | \n",
+ " 98792 | \n",
+ " 766363 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " 176807.0 | \n",
+ " 0 | \n",
+ " 875 | \n",
" False | \n",
- " théâtre | \n",
- " cap nort | \n",
- " tarif sco co 1 seance scolaire | \n",
- " sur moi, le temps | \n",
- " 2022/2023 | \n",
- " 2023-03-13 14:00:00+01:00 | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
+ " NaN | \n",
+ " 2 | \n",
" True | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " other | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 492313 | \n",
- " 3252236 | \n",
- " 621716 | \n",
- " 710062 | \n",
- " 1 | \n",
- " guichet | \n",
- " 2023-03-09 11:08:45+00:00 | \n",
- " 7.0 | \n",
+ " 98793 | \n",
+ " 766366 | \n",
+ " 139 | \n",
+ " NaN | \n",
+ " 176788.0 | \n",
+ " 0 | \n",
+ " 875 | \n",
" False | \n",
- " théâtre | \n",
- " cap nort | \n",
- " tarif sco co 1 seance scolaire | \n",
- " sur moi, le temps | \n",
- " 2022/2023 | \n",
- " 2023-03-13 14:00:00+01:00 | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
+ " NaN | \n",
+ " 2 | \n",
" True | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " other | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 0 | \n",
"
\n",
" \n",
"
\n",
- "
492314 rows × 16 columns
\n",
+ "
98794 rows × 28 columns
\n",
"
"
],
"text/plain": [
- " ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
- "0 1799177 36984 409613 2 guichet \n",
- "1 1799178 36984 409613 3 guichet \n",
- "2 1799179 36984 409613 1 guichet \n",
- "3 1799180 36984 409613 1 guichet \n",
- "4 1799181 36984 409613 3 guichet \n",
- "... ... ... ... ... ... \n",
- "492309 3252232 621716 710062 1 guichet \n",
- "492310 3252233 621716 710062 1 guichet \n",
- "492311 3252234 621716 710062 1 guichet \n",
- "492312 3252235 621716 710062 1 guichet \n",
- "492313 3252236 621716 710062 1 guichet \n",
+ " customer_id street_id structure_id mcp_contact_id fidelity \\\n",
+ "0 821538 139 NaN NaN 0 \n",
+ "1 809126 1063 NaN NaN 0 \n",
+ "2 11005 1063 NaN NaN 0 \n",
+ "3 17663 12731 NaN NaN 0 \n",
+ "4 38100 12395 NaN NaN 0 \n",
+ "... ... ... ... ... ... \n",
+ "98789 766266 139 NaN 181304.0 0 \n",
+ "98790 766336 139 NaN 178189.0 0 \n",
+ "98791 766348 139 NaN 178141.0 0 \n",
+ "98792 766363 139 NaN 176807.0 0 \n",
+ "98793 766366 139 NaN 176788.0 0 \n",
"\n",
- " purchase_date amount is_full_price name_event_types \\\n",
- "0 2016-04-28 15:58:26+00:00 9.0 False danse \n",
- "1 2016-04-28 15:58:26+00:00 9.0 False cirque \n",
- "2 2016-04-28 15:58:26+00:00 9.0 False théâtre \n",
- "3 2016-04-28 15:58:26+00:00 9.0 False théâtre \n",
- "4 2016-04-28 15:58:26+00:00 12.0 False cirque \n",
- "... ... ... ... ... \n",
- "492309 2023-03-09 11:08:45+00:00 7.0 False théâtre \n",
- "492310 2023-03-09 11:08:45+00:00 7.0 False théâtre \n",
- "492311 2023-03-09 11:08:45+00:00 7.0 False théâtre \n",
- "492312 2023-03-09 11:08:45+00:00 7.0 False théâtre \n",
- "492313 2023-03-09 11:08:45+00:00 7.0 False théâtre \n",
+ " tenant_id is_partner deleted_at gender is_email_true ... \\\n",
+ "0 875 False NaN 2 True ... \n",
+ "1 875 False NaN 2 True ... \n",
+ "2 875 False NaN 2 False ... \n",
+ "3 875 False NaN 0 False ... \n",
+ "4 875 False NaN 0 True ... \n",
+ "... ... ... ... ... ... ... \n",
+ "98789 875 False NaN 2 True ... \n",
+ "98790 875 False NaN 2 True ... \n",
+ "98791 875 False NaN 2 True ... \n",
+ "98792 875 False NaN 2 True ... \n",
+ "98793 875 False NaN 2 True ... \n",
"\n",
- " name_facilities name_categories \\\n",
- "0 le grand t abo t gourmand jeune \n",
- "1 le grand t abo t gourmand jeune \n",
- "2 le grand t abo t gourmand jeune \n",
- "3 le grand t abo t gourmand jeune \n",
- "4 la cite des congres abo t gourmand jeune \n",
- "... ... ... \n",
- "492309 cap nort tarif sco co 1 seance scolaire \n",
- "492310 cap nort tarif sco co 1 seance scolaire \n",
- "492311 cap nort tarif sco co 1 seance scolaire \n",
- "492312 cap nort tarif sco co 1 seance scolaire \n",
- "492313 cap nort tarif sco co 1 seance scolaire \n",
+ " total_price purchase_count first_buying_date country gender_label \\\n",
+ "0 0.0 0 NaN NaN other \n",
+ "1 0.0 0 NaN fr other \n",
+ "2 NaN 14 NaN fr other \n",
+ "3 NaN 1 NaN fr female \n",
+ "4 NaN 1 NaN fr female \n",
+ "... ... ... ... ... ... \n",
+ "98789 0.0 0 NaN NaN other \n",
+ "98790 0.0 0 NaN NaN other \n",
+ "98791 0.0 0 NaN NaN other \n",
+ "98792 0.0 0 NaN NaN other \n",
+ "98793 0.0 0 NaN NaN other \n",
"\n",
- " name_events name_seasons start_date_time \\\n",
- "0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n",
- "1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n",
- "2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n",
- "3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n",
- "4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n",
- "... ... ... ... \n",
- "492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
- "492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
- "492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
- "492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
- "492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
+ " gender_female gender_male gender_other country_fr has_tags \n",
+ "0 0 0 1 NaN 0 \n",
+ "1 0 0 1 1.0 0 \n",
+ "2 0 0 1 1.0 0 \n",
+ "3 1 0 0 1.0 0 \n",
+ "4 1 0 0 1.0 0 \n",
+ "... ... ... ... ... ... \n",
+ "98789 0 0 1 NaN 0 \n",
+ "98790 0 0 1 NaN 0 \n",
+ "98791 0 0 1 NaN 0 \n",
+ "98792 0 0 1 NaN 0 \n",
+ "98793 0 0 1 NaN 0 \n",
"\n",
- " end_date_time open \n",
- "0 1901-01-01 00:09:21+00:09 True \n",
- "1 1901-01-01 00:09:21+00:09 True \n",
- "2 1901-01-01 00:09:21+00:09 True \n",
- "3 1901-01-01 00:09:21+00:09 True \n",
- "4 1901-01-01 00:09:21+00:09 True \n",
- "... ... ... \n",
- "492309 1901-01-01 00:09:21+00:09 True \n",
- "492310 1901-01-01 00:09:21+00:09 True \n",
- "492311 1901-01-01 00:09:21+00:09 True \n",
- "492312 1901-01-01 00:09:21+00:09 True \n",
- "492313 1901-01-01 00:09:21+00:09 True \n",
- "\n",
- "[492314 rows x 16 columns]"
+ "[98794 rows x 28 columns]"
]
},
- "execution_count": 25,
+ "execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "purchases"
+ " # KPI sur les données socio-démographiques\n",
+ "df_customerplus_clean = customerplus_kpi_function(df_customerplus_cleaned)\n",
+ " \n",
+ "df_customerplus_clean"
]
},
{
@@ -811,9 +1678,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 40,
"id": "c90d94ab-cf0e-4d18-9d5e-cb1d22f4d58b",
"metadata": {},
+ "outputs": [
+ {
+ "ename": "SyntaxError",
+ "evalue": "f-string: expecting '}' (1665996669.py, line 1)",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;36m Cell \u001b[0;32mIn[40], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{'musee'}'\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m f-string: expecting '}'\n"
+ ]
+ }
+ ],
+ "source": [
+ "BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{'musee'}'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d6767ba6-94ef-43f9-8f67-15ecdb41a70b",
+ "metadata": {},
"outputs": [],
"source": []
}