Quelques observations

This commit is contained in:
Antoine JOUBREL 2024-02-28 20:57:28 +00:00
parent 80a8642484
commit 2fabf98413

View File

@ -93,589 +93,6 @@
" return df"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "35da2e15-1e23-4653-a214-c6ff8f186e85",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_4/customerplus_cleaned.csv\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>479734</td>\n",
" <td>3587</td>\n",
" <td>NaN</td>\n",
" <td>184801.0</td>\n",
" <td>0</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1537</td>\n",
" <td>1352</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>504615</td>\n",
" <td>3587</td>\n",
" <td>NaN</td>\n",
" <td>152176.0</td>\n",
" <td>0</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3832780</td>\n",
" <td>3587</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3096540</td>\n",
" <td>3587</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320804</th>\n",
" <td>2637745</td>\n",
" <td>406842</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>0.000000</td>\n",
" <td>2.0</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>2021-12-08 20:30:11+00:00</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320805</th>\n",
" <td>23334</td>\n",
" <td>22677</td>\n",
" <td>NaN</td>\n",
" <td>185203.0</td>\n",
" <td>4</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>13.0</td>\n",
" <td>13</td>\n",
" <td>11.692308</td>\n",
" <td>0.0</td>\n",
" <td>25.333333</td>\n",
" <td>2.166667</td>\n",
" <td>152.0</td>\n",
" <td>6</td>\n",
" <td>2018-05-02 07:47:40+00:00</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320806</th>\n",
" <td>2641373</td>\n",
" <td>408068</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>12.0</td>\n",
" <td>4</td>\n",
" <td>12.000000</td>\n",
" <td>0.0</td>\n",
" <td>48.000000</td>\n",
" <td>4.000000</td>\n",
" <td>48.0</td>\n",
" <td>1</td>\n",
" <td>2021-12-09 11:46:23+00:00</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320807</th>\n",
" <td>2641469</td>\n",
" <td>408160</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>12.0</td>\n",
" <td>1</td>\n",
" <td>12.000000</td>\n",
" <td>0.0</td>\n",
" <td>12.000000</td>\n",
" <td>1.000000</td>\n",
" <td>12.0</td>\n",
" <td>1</td>\n",
" <td>2021-12-09 18:50:55+00:00</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320808</th>\n",
" <td>2641474</td>\n",
" <td>408165</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>12.0</td>\n",
" <td>1</td>\n",
" <td>12.000000</td>\n",
" <td>0.0</td>\n",
" <td>12.000000</td>\n",
" <td>1.000000</td>\n",
" <td>12.0</td>\n",
" <td>1</td>\n",
" <td>2021-12-09 19:02:42+00:00</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>320809 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 479734 3587 NaN 184801.0 0 \n",
"1 1537 1352 NaN NaN 0 \n",
"2 504615 3587 NaN 152176.0 0 \n",
"3 3832780 3587 NaN NaN 0 \n",
"4 3096540 3587 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"320804 2637745 406842 NaN NaN 1 \n",
"320805 23334 22677 NaN 185203.0 4 \n",
"320806 2641373 408068 NaN NaN 1 \n",
"320807 2641469 408160 NaN NaN 1 \n",
"320808 2641474 408165 NaN NaN 1 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 1342 False NaN 0 True ... \n",
"1 1342 False NaN 0 True ... \n",
"2 1342 False NaN 0 True ... \n",
"3 1342 False NaN 2 True ... \n",
"4 1342 False NaN 2 True ... \n",
"... ... ... ... ... ... ... \n",
"320804 1342 False NaN 0 True ... \n",
"320805 1342 False NaN 0 True ... \n",
"320806 1342 False NaN 0 True ... \n",
"320807 1342 False NaN 0 True ... \n",
"320808 1342 False NaN 0 True ... \n",
"\n",
" max_price ticket_sum average_price average_purchase_delay \\\n",
"0 NaN 0 NaN NaN \n",
"1 NaN 0 NaN NaN \n",
"2 NaN 0 NaN NaN \n",
"3 NaN 0 NaN NaN \n",
"4 NaN 0 NaN NaN \n",
"... ... ... ... ... \n",
"320804 0.0 2 0.000000 2.0 \n",
"320805 13.0 13 11.692308 0.0 \n",
"320806 12.0 4 12.000000 0.0 \n",
"320807 12.0 1 12.000000 0.0 \n",
"320808 12.0 1 12.000000 0.0 \n",
"\n",
" average_price_basket average_ticket_basket total_price \\\n",
"0 NaN NaN 0.0 \n",
"1 NaN NaN 0.0 \n",
"2 NaN NaN 0.0 \n",
"3 NaN NaN 0.0 \n",
"4 NaN NaN 0.0 \n",
"... ... ... ... \n",
"320804 0.000000 2.000000 0.0 \n",
"320805 25.333333 2.166667 152.0 \n",
"320806 48.000000 4.000000 48.0 \n",
"320807 12.000000 1.000000 12.0 \n",
"320808 12.000000 1.000000 12.0 \n",
"\n",
" purchase_count first_buying_date country \n",
"0 0 NaN fr \n",
"1 0 NaN fr \n",
"2 0 NaN fr \n",
"3 0 NaN fr \n",
"4 0 NaN fr \n",
"... ... ... ... \n",
"320804 1 2021-12-08 20:30:11+00:00 fr \n",
"320805 6 2018-05-02 07:47:40+00:00 fr \n",
"320806 1 2021-12-09 11:46:23+00:00 fr \n",
"320807 1 2021-12-09 18:50:55+00:00 fr \n",
"320808 1 2021-12-09 19:02:42+00:00 fr \n",
"\n",
"[320809 rows x 22 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"display_databases(\"4\", \"customerplus_cleaned\")"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "6c8ad8c3-25df-4fe4-9ad0-ee5f9498bc14",
"metadata": {},
"outputs": [],
"source": [
"pd.reset_option('display.max_rows')"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "c897916c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>code</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>101</td>\n",
" <td>hongrie</td>\n",
" <td>hu</td>\n",
" <td>2023-06-13 11:17:40.600622+02:00</td>\n",
" <td>2023-06-13 11:17:40.600622+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>albanie</td>\n",
" <td>al</td>\n",
" <td>2023-06-13 11:17:40.540652+02:00</td>\n",
" <td>2023-06-13 11:17:40.540652+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>antarctique</td>\n",
" <td>aq</td>\n",
" <td>2023-06-13 11:17:40.541315+02:00</td>\n",
" <td>2023-06-13 11:17:40.541315+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>12</td>\n",
" <td>autriche</td>\n",
" <td>at</td>\n",
" <td>2023-06-13 11:17:40.546711+02:00</td>\n",
" <td>2023-06-13 11:17:40.546711+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>samoa américaines</td>\n",
" <td>as</td>\n",
" <td>2023-06-13 11:17:40.542569+02:00</td>\n",
" <td>2023-06-13 11:17:40.542569+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238</th>\n",
" <td>228</td>\n",
" <td>royaume-uni</td>\n",
" <td>gb</td>\n",
" <td>2023-06-13 11:17:40.678023+02:00</td>\n",
" <td>2023-06-13 11:17:40.678023+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>239</th>\n",
" <td>25</td>\n",
" <td>brésil</td>\n",
" <td>br</td>\n",
" <td>2023-06-13 11:17:40.554209+02:00</td>\n",
" <td>2023-06-13 11:17:40.554209+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240</th>\n",
" <td>10</td>\n",
" <td>argentine</td>\n",
" <td>ar</td>\n",
" <td>2023-06-13 11:17:40.545489+02:00</td>\n",
" <td>2023-06-13 11:17:40.545489+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>241</th>\n",
" <td>203</td>\n",
" <td>espagne</td>\n",
" <td>es</td>\n",
" <td>2023-06-13 11:17:40.662472+02:00</td>\n",
" <td>2023-06-13 11:17:40.662472+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>242</th>\n",
" <td>192</td>\n",
" <td>arabie saoudite</td>\n",
" <td>sa</td>\n",
" <td>2023-06-13 11:17:40.656154+02:00</td>\n",
" <td>2023-06-13 11:17:40.656154+02:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>243 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id name code created_at \\\n",
"0 101 hongrie hu 2023-06-13 11:17:40.600622+02:00 \n",
"1 2 albanie al 2023-06-13 11:17:40.540652+02:00 \n",
"2 3 antarctique aq 2023-06-13 11:17:40.541315+02:00 \n",
"3 12 autriche at 2023-06-13 11:17:40.546711+02:00 \n",
"4 5 samoa américaines as 2023-06-13 11:17:40.542569+02:00 \n",
".. ... ... ... ... \n",
"238 228 royaume-uni gb 2023-06-13 11:17:40.678023+02:00 \n",
"239 25 brésil br 2023-06-13 11:17:40.554209+02:00 \n",
"240 10 argentine ar 2023-06-13 11:17:40.545489+02:00 \n",
"241 203 espagne es 2023-06-13 11:17:40.662472+02:00 \n",
"242 192 arabie saoudite sa 2023-06-13 11:17:40.656154+02:00 \n",
"\n",
" updated_at \n",
"0 2023-06-13 11:17:40.600622+02:00 \n",
"1 2023-06-13 11:17:40.540652+02:00 \n",
"2 2023-06-13 11:17:40.541315+02:00 \n",
"3 2023-06-13 11:17:40.546711+02:00 \n",
"4 2023-06-13 11:17:40.542569+02:00 \n",
".. ... \n",
"238 2023-06-13 11:17:40.678023+02:00 \n",
"239 2023-06-13 11:17:40.554209+02:00 \n",
"240 2023-06-13 11:17:40.545489+02:00 \n",
"241 2023-06-13 11:17:40.662472+02:00 \n",
"242 2023-06-13 11:17:40.656154+02:00 \n",
"\n",
"[243 rows x 5 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"load_dataset_2(\"7\", \"countries\")"
]
},
{
"cell_type": "markdown",
"id": "ca2c8b6a-4965-422e-ba7c-66423a464fc1",
@ -1058,7 +475,9 @@
{
"cell_type": "markdown",
"id": "605cced5-052f-4a99-ac26-020c5d2ab633",
"metadata": {},
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## KPI sur tags"
]
@ -2098,6 +1517,374 @@
"tags_information(\"101\", 20)"
]
},
{
"cell_type": "markdown",
"id": "87d131cd-ead0-4ef4-a8ee-b09022d08ffa",
"metadata": {},
"source": [
"## KPI product"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "26582be9-cfd1-48ea-a0a7-31101fdeb9d1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_1/products_purchased_reduced.csv\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>amount</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" <th>start_date_time</th>\n",
" <th>end_date_time</th>\n",
" <th>open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13070859</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>8.0</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" <td>2018-12-31 14:15:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13070860</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>4.0</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" <td>2018-12-31 14:15:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13070861</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>4.0</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" <td>2018-12-31 14:15:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13070862</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>4.0</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" <td>2018-12-31 14:15:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13070863</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>4.0</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" <td>2018-12-31 14:15:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 13070859 48187 5107462 4 vente en ligne \n",
"1 13070860 48187 5107462 4 vente en ligne \n",
"2 13070861 48187 5107462 4 vente en ligne \n",
"3 13070862 48187 5107462 4 vente en ligne \n",
"4 13070863 48187 5107462 4 vente en ligne \n",
"\n",
" purchase_date amount is_full_price name_event_types \\\n",
"0 2018-12-28 14:47:50+00:00 8.0 False spectacle vivant \n",
"1 2018-12-28 14:47:50+00:00 4.0 False spectacle vivant \n",
"2 2018-12-28 14:47:50+00:00 4.0 False spectacle vivant \n",
"3 2018-12-28 14:47:50+00:00 4.0 False spectacle vivant \n",
"4 2018-12-28 14:47:50+00:00 4.0 False spectacle vivant \n",
"\n",
" name_facilities name_categories name_events name_seasons \\\n",
"0 mucem indiv prog enfant l'école des magiciens 2018 \n",
"1 mucem indiv prog enfant l'école des magiciens 2018 \n",
"2 mucem indiv prog enfant l'école des magiciens 2018 \n",
"3 mucem indiv prog enfant l'école des magiciens 2018 \n",
"4 mucem indiv prog enfant l'école des magiciens 2018 \n",
"\n",
" start_date_time end_date_time open \n",
"0 2018-12-31 14:15:00+01:00 1901-01-01 00:09:21+00:09 True \n",
"1 2018-12-31 14:15:00+01:00 1901-01-01 00:09:21+00:09 True \n",
"2 2018-12-31 14:15:00+01:00 1901-01-01 00:09:21+00:09 True \n",
"3 2018-12-31 14:15:00+01:00 1901-01-01 00:09:21+00:09 True \n",
"4 2018-12-31 14:15:00+01:00 1901-01-01 00:09:21+00:09 True "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tenant_id = \"1\"\n",
"\n",
"df_product = display_databases(tenant_id, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
"\n",
"df_product.head()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "533bf499-dd56-4d29-b261-ca1e4928c9c7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name_event_types</th>\n",
" <th>name_events</th>\n",
" <th>ticket_id</th>\n",
" <th>prop_tickets</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>offre muséale groupe</td>\n",
" <td>visite générale du mucem (1h30)</td>\n",
" <td>43814</td>\n",
" <td>0.024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>212</th>\n",
" <td>offre muséale individuel</td>\n",
" <td>visite autonome scolaires (2h00)</td>\n",
" <td>34423</td>\n",
" <td>0.019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>offre muséale groupe</td>\n",
" <td>visite autonome exposition (1h30)</td>\n",
" <td>26489</td>\n",
" <td>0.015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>210</th>\n",
" <td>offre muséale individuel</td>\n",
" <td>visite autonome adultes (2h00)</td>\n",
" <td>22065</td>\n",
" <td>0.012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>offre muséale groupe</td>\n",
" <td>visites des exterieurs scolaires</td>\n",
" <td>15595</td>\n",
" <td>0.009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>364</th>\n",
" <td>spectacle vivant</td>\n",
" <td>kay ! lettres à un poète disparu</td>\n",
" <td>1</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>443</th>\n",
" <td>spectacle vivant</td>\n",
" <td>mauvais genre</td>\n",
" <td>1</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>375</th>\n",
" <td>spectacle vivant</td>\n",
" <td>la madre que parió a la música</td>\n",
" <td>1</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>260</th>\n",
" <td>spectacle vivant</td>\n",
" <td>ali a les yeux bleus (dès 12 ans)</td>\n",
" <td>1</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>484</th>\n",
" <td>spectacle vivant</td>\n",
" <td>rengaine (dès 12 ans)</td>\n",
" <td>1</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>544 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" name_event_types name_events ticket_id \\\n",
"118 offre muséale groupe visite générale du mucem (1h30) 43814 \n",
"212 offre muséale individuel visite autonome scolaires (2h00) 34423 \n",
"68 offre muséale groupe visite autonome exposition (1h30) 26489 \n",
"210 offre muséale individuel visite autonome adultes (2h00) 22065 \n",
"160 offre muséale groupe visites des exterieurs scolaires 15595 \n",
".. ... ... ... \n",
"364 spectacle vivant kay ! lettres à un poète disparu 1 \n",
"443 spectacle vivant mauvais genre 1 \n",
"375 spectacle vivant la madre que parió a la música 1 \n",
"260 spectacle vivant ali a les yeux bleus (dès 12 ans) 1 \n",
"484 spectacle vivant rengaine (dès 12 ans) 1 \n",
"\n",
" prop_tickets \n",
"118 0.024 \n",
"212 0.019 \n",
"68 0.015 \n",
"210 0.012 \n",
"160 0.009 \n",
".. ... \n",
"364 0.000 \n",
"443 0.000 \n",
"375 0.000 \n",
"260 0.000 \n",
"484 0.000 \n",
"\n",
"[544 rows x 4 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nb_tickets_per_events = df_product.groupby(['name_event_types', 'name_events'])['ticket_id'].count().reset_index().sort_values('ticket_id', ascending = False)\n",
"nb_tickets_per_events['prop_tickets'] = round(nb_tickets_per_events['ticket_id']/len(df_product), 3)\n",
"nb_tickets_per_events"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6a38ff60-b608-43a0-9e18-4cd2bcbcea70",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "1ede9eaa-7f0a-4856-9349-b2747d6a4901",