This commit is contained in:
Fanta RODRIGUE 2024-03-10 16:41:43 +00:00
parent 4aa781daf0
commit 75664a33d7
2 changed files with 294 additions and 504 deletions

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 106,
"execution_count": 1,
"id": "0eefb67b-5399-44fa-9c1c-7724ec1c7cd2",
"metadata": {},
"outputs": [],
@ -17,19 +17,19 @@
},
{
"cell_type": "code",
"execution_count": 107,
"execution_count": 7,
"id": "37977b4e-42e7-4d8e-8b9a-6843292fd128",
"metadata": {},
"outputs": [],
"source": [
"# Import KPI construction functions\n",
"exec(open('0_KPI_functions.py').read())\n",
"# exec(open('../0_KPI_functions.py').read())\n"
"#exec(open('0_KPI_functions.py').read())\n",
"exec(open('../0_KPI_functions.py').read())\n"
]
},
{
"cell_type": "code",
"execution_count": 108,
"execution_count": 8,
"id": "cca62d72-f809-41a9-bb06-1be7d6b09307",
"metadata": {},
"outputs": [
@ -42,7 +42,7 @@
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
"execution_count": 108,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@ -58,345 +58,7 @@
},
{
"cell_type": "code",
"execution_count": 109,
"id": "68fb54f3-8eb3-4cd0-966b-000876912fb5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>amount</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" <th>start_date_time</th>\n",
" <th>end_date_time</th>\n",
" <th>open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1799177</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>2</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>danse</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>aringa rossa</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-09-27 00:00:00+02:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1799178</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>5èmes hurlants</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-11-18 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1799179</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>dom juan</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-12-07 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1799180</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>vanishing point</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-04 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1799181</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>12.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>la cite des congres</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>a o lang pho</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-03 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492309</th>\n",
" <td>3252232</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492310</th>\n",
" <td>3252233</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492311</th>\n",
" <td>3252234</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492312</th>\n",
" <td>3252235</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492313</th>\n",
" <td>3252236</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>492314 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 1799177 36984 409613 2 guichet \n",
"1 1799178 36984 409613 3 guichet \n",
"2 1799179 36984 409613 1 guichet \n",
"3 1799180 36984 409613 1 guichet \n",
"4 1799181 36984 409613 3 guichet \n",
"... ... ... ... ... ... \n",
"492309 3252232 621716 710062 1 guichet \n",
"492310 3252233 621716 710062 1 guichet \n",
"492311 3252234 621716 710062 1 guichet \n",
"492312 3252235 621716 710062 1 guichet \n",
"492313 3252236 621716 710062 1 guichet \n",
"\n",
" purchase_date amount is_full_price name_event_types \\\n",
"0 2016-04-28 17:58:26+02:00 9.0 False danse \n",
"1 2016-04-28 17:58:26+02:00 9.0 False cirque \n",
"2 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"3 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"4 2016-04-28 17:58:26+02:00 12.0 False cirque \n",
"... ... ... ... ... \n",
"492309 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492310 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492311 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492312 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492313 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"\n",
" name_facilities name_categories \\\n",
"0 le grand t abo t gourmand jeune \n",
"1 le grand t abo t gourmand jeune \n",
"2 le grand t abo t gourmand jeune \n",
"3 le grand t abo t gourmand jeune \n",
"4 la cite des congres abo t gourmand jeune \n",
"... ... ... \n",
"492309 cap nort tarif sco co 1 seance scolaire \n",
"492310 cap nort tarif sco co 1 seance scolaire \n",
"492311 cap nort tarif sco co 1 seance scolaire \n",
"492312 cap nort tarif sco co 1 seance scolaire \n",
"492313 cap nort tarif sco co 1 seance scolaire \n",
"\n",
" name_events name_seasons start_date_time \\\n",
"0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n",
"1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n",
"2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n",
"3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n",
"4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n",
"... ... ... ... \n",
"492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"\n",
" end_date_time open \n",
"0 1901-01-01 00:09:21+00:09 True \n",
"1 1901-01-01 00:09:21+00:09 True \n",
"2 1901-01-01 00:09:21+00:09 True \n",
"3 1901-01-01 00:09:21+00:09 True \n",
"4 1901-01-01 00:09:21+00:09 True \n",
"... ... ... \n",
"492309 1901-01-01 00:09:21+00:09 True \n",
"492310 1901-01-01 00:09:21+00:09 True \n",
"492311 1901-01-01 00:09:21+00:09 True \n",
"492312 1901-01-01 00:09:21+00:09 True \n",
"492313 1901-01-01 00:09:21+00:09 True \n",
"\n",
"[492314 rows x 16 columns]"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced"
]
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 10,
"id": "0e1ce56c-2e50-456c-ba97-ed4a699cc8d4",
"metadata": {},
"outputs": [],
@ -412,7 +74,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 11,
"id": "bcdba447-90f7-450c-b4a3-6da656e38493",
"metadata": {},
"outputs": [
@ -420,7 +82,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_438/3710670046.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"/tmp/ipykernel_491/3710670046.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n"
]
}
@ -437,7 +99,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 12,
"id": "637aa400-f49a-4d8d-802a-868b241f8a9d",
"metadata": {},
"outputs": [],
@ -452,7 +114,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 13,
"id": "e60529b5-986f-4685-91e1-782c2b022e09",
"metadata": {},
"outputs": [
@ -608,7 +270,7 @@
"[69258 rows x 5 columns]"
]
},
"execution_count": 45,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}

File diff suppressed because one or more lines are too long