diff --git a/Notebook_AR.ipynb b/Notebook_AR.ipynb
index f9fab02..9107796 100644
--- a/Notebook_AR.ipynb
+++ b/Notebook_AR.ipynb
@@ -624,7 +624,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_2473/2210053343.py:5: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ "/tmp/ipykernel_513/2210053343.py:5: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" customersplus = pd.read_csv(file_in, sep=\",\")\n"
]
},
@@ -5485,7 +5485,7 @@
},
{
"cell_type": "code",
- "execution_count": 62,
+ "execution_count": 82,
"id": "43576244-c8cf-4ca0-b056-7aea1fbf0bc7",
"metadata": {},
"outputs": [],
@@ -5500,7 +5500,7 @@
},
{
"cell_type": "code",
- "execution_count": 63,
+ "execution_count": 83,
"id": "0fad097e-474c-4af7-b1e1-7d8dda3f09ea",
"metadata": {},
"outputs": [],
@@ -5526,7 +5526,7 @@
},
{
"cell_type": "code",
- "execution_count": 64,
+ "execution_count": 84,
"id": "6213b1eb-c5f8-49dd-ab69-366542380e80",
"metadata": {},
"outputs": [],
@@ -5536,9 +5536,15 @@
" print(\"first merge products and categories\")\n",
" products = load_dataset(\"1products.csv\")\n",
" categories = load_dataset(\"1categories.csv\")\n",
+ " # Drop useless columns\n",
+ " products = products.drop(columns = ['apply_price', 'extra_field', 'amount_consumption'])\n",
+ " categories = categories.drop(columns = ['extra_field', 'quota'])\n",
+ "\n",
+ " #Merge\n",
" products_theme = products.merge(categories, how = 'left', left_on = 'category_id',\n",
" right_on = 'id', suffixes=('_products', '_categories'))\n",
" products_theme = products_theme.rename(columns = {\"name\" : \"name_categories\"})\n",
+ " \n",
" # Second merge products_theme and type of categories\n",
" print(\"Second merge products_theme and type of categories\")\n",
" type_of_categories = load_dataset(\"1type_of_categories.csv\")\n",
@@ -5550,12 +5556,14 @@
" products_theme = products_theme.drop(columns = ['id_categories'])\n",
" products_theme = order_columns_id(products_theme)\n",
"\n",
+ " \n",
+ "\n",
" return products_theme"
]
},
{
"cell_type": "code",
- "execution_count": 65,
+ "execution_count": 85,
"id": "b853e020-f73d-44e8-b086-e5548ce21011",
"metadata": {},
"outputs": [
@@ -5612,12 +5620,7 @@
"
type_of_id | \n",
" amount | \n",
" is_full_price | \n",
- " apply_price | \n",
- " extra_field_products | \n",
- " amount_consumption | \n",
" name_categories | \n",
- " extra_field_categories | \n",
- " quota | \n",
" \n",
" \n",
" \n",
@@ -5632,12 +5635,7 @@
" NaN | \n",
" 9.0 | \n",
" False | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
" indiv activité tr | \n",
- " NaN | \n",
- " NaN | \n",
" \n",
" \n",
" 1 | \n",
@@ -5650,12 +5648,7 @@
" 12.0 | \n",
" 9.5 | \n",
" False | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
" indiv entrées tp | \n",
- " NaN | \n",
- " NaN | \n",
"
\n",
" \n",
" 2 | \n",
@@ -5668,12 +5661,7 @@
" 12.0 | \n",
" 11.5 | \n",
" False | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
" indiv entrées tp | \n",
- " NaN | \n",
- " NaN | \n",
"
\n",
" \n",
" 3 | \n",
@@ -5686,12 +5674,7 @@
" NaN | \n",
" 8.0 | \n",
" False | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
" indiv entrées tr | \n",
- " NaN | \n",
- " NaN | \n",
"
\n",
" \n",
" 4 | \n",
@@ -5704,12 +5687,7 @@
" 12.0 | \n",
" 8.5 | \n",
" False | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
" indiv entrées tp | \n",
- " NaN | \n",
- " NaN | \n",
"
\n",
" \n",
"\n",
@@ -5730,22 +5708,15 @@
"3 156773 1 NaN 8.0 False \n",
"4 1175 1 12.0 8.5 False \n",
"\n",
- " apply_price extra_field_products amount_consumption name_categories \\\n",
- "0 0.0 NaN NaN indiv activité tr \n",
- "1 0.0 NaN NaN indiv entrées tp \n",
- "2 0.0 NaN NaN indiv entrées tp \n",
- "3 0.0 NaN NaN indiv entrées tr \n",
- "4 0.0 NaN NaN indiv entrées tp \n",
- "\n",
- " extra_field_categories quota \n",
- "0 NaN NaN \n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN "
+ " name_categories \n",
+ "0 indiv activité tr \n",
+ "1 indiv entrées tp \n",
+ "2 indiv entrées tp \n",
+ "3 indiv entrées tr \n",
+ "4 indiv entrées tp "
]
},
- "execution_count": 65,
+ "execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
@@ -5765,7 +5736,7 @@
},
{
"cell_type": "code",
- "execution_count": 103,
+ "execution_count": 86,
"id": "6ed0ad20-8315-4112-9a85-10e5f04ef852",
"metadata": {},
"outputs": [],
@@ -5775,12 +5746,18 @@
" print(\"first merge events and seasons : \")\n",
" events = load_dataset(\"1events.csv\")\n",
" seasons = load_dataset(\"1seasons.csv\")\n",
+ "\n",
+ " # Drop useless columns\n",
+ " events = events.drop(columns = ['manual_added', 'is_display'])\n",
+ " seasons = seasons.drop(columns = ['start_date_time'])\n",
+ " \n",
" events_theme = events.merge(seasons, how = 'left', left_on = 'season_id', right_on = 'id', suffixes=('_events', '_seasons'))\n",
"\n",
" # Secondly merge events_theme and event_types\n",
" print(\"Secondly merge events_theme and event_types : \")\n",
" event_types = load_dataset(\"1event_types.csv\")\n",
- "\n",
+ " event_types = event_types.drop(columns = ['fidelity_delay'])\n",
+ " \n",
" events_theme = events_theme.merge(event_types, how = 'left', left_on = 'event_type_id', right_on = 'id', suffixes=('_events', '_event_type'))\n",
" events_theme = events_theme.rename(columns = {\"name\" : \"name_event_types\"})\n",
" events_theme = events_theme.drop(columns = 'id')\n",
@@ -5788,8 +5765,10 @@
" # thirdly merge events_theme and facilities\n",
" print(\"thirdly merge events_theme and facilities : \")\n",
" facilities = load_dataset(\"1facilities.csv\")\n",
+ " facilities = facilities.drop(columns = ['fixed_capacity'])\n",
+ " \n",
" events_theme = events_theme.merge(facilities, how = 'left', left_on = 'facility_id', right_on = 'id', suffixes=('_events', '_facility'))\n",
- " events_theme = events_theme.rename(columns = {\"name\" : \"name_facilties\", \"id_events\" : \"event_id\"})\n",
+ " events_theme = events_theme.rename(columns = {\"name\" : \"name_facilities\", \"id_events\" : \"event_id\"})\n",
" events_theme = events_theme.drop(columns = 'id')\n",
"\n",
" # Index cleaning\n",
@@ -5800,7 +5779,7 @@
},
{
"cell_type": "code",
- "execution_count": 104,
+ "execution_count": 87,
"id": "98ef0636-8c45-4a23-a62a-1fbe1544f8ce",
"metadata": {},
"outputs": [
@@ -5858,16 +5837,11 @@
" event_type_id | \n",
" event_type_key_id | \n",
" facility_key_id | \n",
- " fidelity_delay | \n",
" street_id | \n",
" name_events | \n",
- " manual_added | \n",
- " is_display | \n",
" name_seasons | \n",
- " start_date_time | \n",
" name_event_types | \n",
- " name_facilties | \n",
- " fixed_capacity | \n",
+ " name_facilities | \n",
" \n",
" \n",
" \n",
@@ -5879,16 +5853,11 @@
" 4 | \n",
" 4 | \n",
" 1 | \n",
- " 36 | \n",
" 1 | \n",
" frontières | \n",
- " False | \n",
- " True | \n",
" 2018 | \n",
- " NaN | \n",
" spectacle vivant | \n",
" mucem | \n",
- " NaN | \n",
" \n",
" \n",
" 1 | \n",
@@ -5898,16 +5867,11 @@
" 5 | \n",
" 5 | \n",
" 1 | \n",
- " 36 | \n",
" 1 | \n",
" visite guidée une autre histoire du monde (1h00) | \n",
- " False | \n",
- " True | \n",
" 2023 | \n",
- " NaN | \n",
" offre muséale groupe | \n",
" mucem | \n",
- " NaN | \n",
"
\n",
" \n",
" 2 | \n",
@@ -5917,16 +5881,11 @@
" 2 | \n",
" 2 | \n",
" 1 | \n",
- " 36 | \n",
" 1 | \n",
" visite contée les chercheurs d'or indiv | \n",
- " False | \n",
- " True | \n",
" 2018 | \n",
- " NaN | \n",
" offre muséale individuel | \n",
" mucem | \n",
- " NaN | \n",
"
\n",
" \n",
" 3 | \n",
@@ -5936,16 +5895,11 @@
" 4 | \n",
" 4 | \n",
" 1 | \n",
- " 36 | \n",
" 1 | \n",
" we dreamt of utopia and we woke up screaming. | \n",
- " False | \n",
- " True | \n",
" 2021 | \n",
- " NaN | \n",
" spectacle vivant | \n",
" mucem | \n",
- " NaN | \n",
"
\n",
" \n",
" 4 | \n",
@@ -5955,16 +5909,11 @@
" 4 | \n",
" 4 | \n",
" 1 | \n",
- " 36 | \n",
" 1 | \n",
" jeff koons épisodes 4 | \n",
- " False | \n",
- " True | \n",
" 2021 | \n",
- " NaN | \n",
" spectacle vivant | \n",
" mucem | \n",
- " NaN | \n",
"
\n",
" \n",
"\n",
@@ -5978,36 +5927,29 @@
"3 5957 582 1 4 4 \n",
"4 8337 582 1 4 4 \n",
"\n",
- " facility_key_id fidelity_delay street_id \\\n",
- "0 1 36 1 \n",
- "1 1 36 1 \n",
- "2 1 36 1 \n",
- "3 1 36 1 \n",
- "4 1 36 1 \n",
+ " facility_key_id street_id \\\n",
+ "0 1 1 \n",
+ "1 1 1 \n",
+ "2 1 1 \n",
+ "3 1 1 \n",
+ "4 1 1 \n",
"\n",
- " name_events manual_added is_display \\\n",
- "0 frontières False True \n",
- "1 visite guidée une autre histoire du monde (1h00) False True \n",
- "2 visite contée les chercheurs d'or indiv False True \n",
- "3 we dreamt of utopia and we woke up screaming. False True \n",
- "4 jeff koons épisodes 4 False True \n",
+ " name_events name_seasons \\\n",
+ "0 frontières 2018 \n",
+ "1 visite guidée une autre histoire du monde (1h00) 2023 \n",
+ "2 visite contée les chercheurs d'or indiv 2018 \n",
+ "3 we dreamt of utopia and we woke up screaming. 2021 \n",
+ "4 jeff koons épisodes 4 2021 \n",
"\n",
- " name_seasons start_date_time name_event_types name_facilties \\\n",
- "0 2018 NaN spectacle vivant mucem \n",
- "1 2023 NaN offre muséale groupe mucem \n",
- "2 2018 NaN offre muséale individuel mucem \n",
- "3 2021 NaN spectacle vivant mucem \n",
- "4 2021 NaN spectacle vivant mucem \n",
- "\n",
- " fixed_capacity \n",
- "0 NaN \n",
- "1 NaN \n",
- "2 NaN \n",
- "3 NaN \n",
- "4 NaN "
+ " name_event_types name_facilities \n",
+ "0 spectacle vivant mucem \n",
+ "1 offre muséale groupe mucem \n",
+ "2 offre muséale individuel mucem \n",
+ "3 spectacle vivant mucem \n",
+ "4 spectacle vivant mucem "
]
},
- "execution_count": 104,
+ "execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
@@ -6027,14 +5969,19 @@
},
{
"cell_type": "code",
- "execution_count": 105,
+ "execution_count": 96,
"id": "481dddd6-80a8-4b9e-a05e-ed06fa3ed7a6",
"metadata": {},
"outputs": [],
"source": [
"def create_representations_table():\n",
" representations = load_dataset(\"1representations.csv\")\n",
+ " representations = representations.drop(columns = ['serial', 'open', 'satisfaction', 'is_display', 'expected_filling',\n",
+ " 'max_filling', 'extra_field', 'start_date_time', 'end_date_time', 'name',\n",
+ " 'representation_type_id'])\n",
+ " \n",
" representations_capacity = load_dataset(\"1representation_category_capacities.csv\")\n",
+ " representations_capacity = representations_capacity.drop(columns = ['expected_filling', 'max_filling'])\n",
"\n",
" representations_theme = representations.merge(representations_capacity, how='left',\n",
" left_on='id', right_on='representation_id',\n",
@@ -6047,7 +5994,7 @@
},
{
"cell_type": "code",
- "execution_count": 106,
+ "execution_count": 97,
"id": "677f4ed8-ef58-45f2-9056-ede0898c6a64",
"metadata": {},
"outputs": [
@@ -6092,167 +6039,61 @@
" \n",
" | \n",
" event_id | \n",
- " representation_type_id | \n",
" id_representation_cap | \n",
" representation_id | \n",
" category_id | \n",
- " serial | \n",
- " start_date_time | \n",
- " open | \n",
- " satisfaction | \n",
- " end_date_time | \n",
- " name | \n",
- " is_display | \n",
- " expected_filling_representation | \n",
- " max_filling_representation | \n",
- " extra_field | \n",
- " expected_filling_representation_cap | \n",
- " max_filling_representation_cap | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 12384 | \n",
- " NaN | \n",
" 123058 | \n",
" 84820 | \n",
" 2 | \n",
- " NaN | \n",
- " 2018-09-26 15:15:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
"
\n",
" \n",
" 1 | \n",
" 37 | \n",
- " NaN | \n",
" 2514 | \n",
" 269 | \n",
" 2 | \n",
- " NaN | \n",
- " 2016-04-27 17:00:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 2016-04-27 18:00:00+02:00 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
"
\n",
" \n",
" 2 | \n",
" 37 | \n",
- " NaN | \n",
" 384 | \n",
" 269 | \n",
" 5 | \n",
- " NaN | \n",
- " 2016-04-27 17:00:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 2016-04-27 18:00:00+02:00 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
"
\n",
" \n",
" 3 | \n",
" 37 | \n",
- " NaN | \n",
" 2515 | \n",
" 269 | \n",
" 10 | \n",
- " NaN | \n",
- " 2016-04-27 17:00:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 2016-04-27 18:00:00+02:00 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
"
\n",
" \n",
" 4 | \n",
" 37 | \n",
- " NaN | \n",
" 383 | \n",
" 269 | \n",
" 1 | \n",
- " NaN | \n",
- " 2016-04-27 17:00:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 2016-04-27 18:00:00+02:00 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " event_id representation_type_id id_representation_cap representation_id \\\n",
- "0 12384 NaN 123058 84820 \n",
- "1 37 NaN 2514 269 \n",
- "2 37 NaN 384 269 \n",
- "3 37 NaN 2515 269 \n",
- "4 37 NaN 383 269 \n",
- "\n",
- " category_id serial start_date_time open satisfaction \\\n",
- "0 2 NaN 2018-09-26 15:15:00+02:00 True NaN \n",
- "1 2 NaN 2016-04-27 17:00:00+02:00 True NaN \n",
- "2 5 NaN 2016-04-27 17:00:00+02:00 True NaN \n",
- "3 10 NaN 2016-04-27 17:00:00+02:00 True NaN \n",
- "4 1 NaN 2016-04-27 17:00:00+02:00 True NaN \n",
- "\n",
- " end_date_time name is_display \\\n",
- "0 1901-01-01 00:09:21+00:09 NaN True \n",
- "1 2016-04-27 18:00:00+02:00 NaN True \n",
- "2 2016-04-27 18:00:00+02:00 NaN True \n",
- "3 2016-04-27 18:00:00+02:00 NaN True \n",
- "4 2016-04-27 18:00:00+02:00 NaN True \n",
- "\n",
- " expected_filling_representation max_filling_representation extra_field \\\n",
- "0 NaN NaN NaN \n",
- "1 NaN NaN NaN \n",
- "2 NaN NaN NaN \n",
- "3 NaN NaN NaN \n",
- "4 NaN NaN NaN \n",
- "\n",
- " expected_filling_representation_cap max_filling_representation_cap \n",
- "0 NaN NaN \n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN "
+ " event_id id_representation_cap representation_id category_id\n",
+ "0 12384 123058 84820 2\n",
+ "1 37 2514 269 2\n",
+ "2 37 384 269 5\n",
+ "3 37 2515 269 10\n",
+ "4 37 383 269 1"
]
},
- "execution_count": 106,
+ "execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
@@ -6272,7 +6113,7 @@
},
{
"cell_type": "code",
- "execution_count": 107,
+ "execution_count": 98,
"id": "b26f4e7e-134d-4e32-a615-4b0e6bb80b25",
"metadata": {},
"outputs": [
@@ -6282,24 +6123,16 @@
"text": [
"Products theme columns : Index(['id_products', 'representation_id', 'pricing_formula_id', 'category_id',\n",
" 'products_group_id', 'product_pack_id', 'type_of_id', 'amount',\n",
- " 'is_full_price', 'apply_price', 'extra_field_products',\n",
- " 'amount_consumption', 'name_categories', 'extra_field_categories',\n",
- " 'quota'],\n",
+ " 'is_full_price', 'name_categories'],\n",
" dtype='object')\n",
"\n",
- " Representation theme columns : Index(['event_id', 'representation_type_id', 'id_representation_cap',\n",
- " 'representation_id', 'category_id', 'serial', 'start_date_time', 'open',\n",
- " 'satisfaction', 'end_date_time', 'name', 'is_display',\n",
- " 'expected_filling_representation', 'max_filling_representation',\n",
- " 'extra_field', 'expected_filling_representation_cap',\n",
- " 'max_filling_representation_cap'],\n",
+ " Representation theme columns : Index(['event_id', 'id_representation_cap', 'representation_id',\n",
+ " 'category_id'],\n",
" dtype='object')\n",
"\n",
" Events theme columns : Index(['event_id', 'season_id', 'facility_id', 'event_type_id',\n",
- " 'event_type_key_id', 'facility_key_id', 'fidelity_delay', 'street_id',\n",
- " 'name_events', 'manual_added', 'is_display', 'name_seasons',\n",
- " 'start_date_time', 'name_event_types', 'name_facilties',\n",
- " 'fixed_capacity'],\n",
+ " 'event_type_key_id', 'facility_key_id', 'street_id', 'name_events',\n",
+ " 'name_seasons', 'name_event_types', 'name_facilities'],\n",
" dtype='object')\n"
]
}
@@ -6312,7 +6145,7 @@
},
{
"cell_type": "code",
- "execution_count": 115,
+ "execution_count": 99,
"id": "d40b1e3b-b1f3-4915-8ebc-6bb7856da42a",
"metadata": {},
"outputs": [
@@ -6346,18 +6179,9 @@
" type_of_id | \n",
" amount | \n",
" is_full_price | \n",
- " apply_price | \n",
- " ... | \n",
- " open | \n",
- " satisfaction | \n",
- " end_date_time | \n",
- " name | \n",
- " is_display | \n",
- " expected_filling_representation | \n",
- " max_filling_representation | \n",
- " extra_field | \n",
- " expected_filling_representation_cap | \n",
- " max_filling_representation_cap | \n",
+ " name_categories | \n",
+ " event_id | \n",
+ " id_representation_cap | \n",
" \n",
" \n",
" \n",
@@ -6372,18 +6196,9 @@
" NaN | \n",
" 9.0 | \n",
" False | \n",
- " 0.0 | \n",
- " ... | \n",
- " True | \n",
- " NaN | \n",
- " 2017-11-19 16:30:00+01:00 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " indiv activité tr | \n",
+ " 132 | \n",
+ " 8789 | \n",
" \n",
" \n",
" 1 | \n",
@@ -6396,18 +6211,9 @@
" 12.0 | \n",
" 9.5 | \n",
" False | \n",
- " 0.0 | \n",
- " ... | \n",
- " True | \n",
- " NaN | \n",
- " 2016-04-28 16:00:00+02:00 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " indiv entrées tp | \n",
+ " 37 | \n",
+ " 390 | \n",
"
\n",
" \n",
" 2 | \n",
@@ -6420,18 +6226,9 @@
" 12.0 | \n",
" 11.5 | \n",
" False | \n",
- " 0.0 | \n",
- " ... | \n",
- " True | \n",
- " NaN | \n",
- " 2016-04-28 14:00:00+02:00 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " indiv entrées tp | \n",
+ " 37 | \n",
+ " 395 | \n",
"
\n",
" \n",
" 3 | \n",
@@ -6444,18 +6241,9 @@
" NaN | \n",
" 8.0 | \n",
" False | \n",
- " 0.0 | \n",
- " ... | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " indiv entrées tr | \n",
+ " 12365 | \n",
+ " 120199 | \n",
"
\n",
" \n",
" 4 | \n",
@@ -6468,22 +6256,12 @@
" 12.0 | \n",
" 8.5 | \n",
" False | \n",
- " 0.0 | \n",
- " ... | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " indiv entrées tp | \n",
+ " 8 | \n",
+ " 21 | \n",
"
\n",
" \n",
"\n",
- "5 rows × 30 columns
\n",
""
],
"text/plain": [
@@ -6501,38 +6279,15 @@
"3 156773 1 NaN 8.0 False \n",
"4 1175 1 12.0 8.5 False \n",
"\n",
- " apply_price ... open satisfaction end_date_time name \\\n",
- "0 0.0 ... True NaN 2017-11-19 16:30:00+01:00 NaN \n",
- "1 0.0 ... True NaN 2016-04-28 16:00:00+02:00 NaN \n",
- "2 0.0 ... True NaN 2016-04-28 14:00:00+02:00 NaN \n",
- "3 0.0 ... True NaN 1901-01-01 00:09:21+00:09 NaN \n",
- "4 0.0 ... True NaN 1901-01-01 00:09:21+00:09 NaN \n",
- "\n",
- " is_display expected_filling_representation max_filling_representation \\\n",
- "0 True NaN NaN \n",
- "1 True NaN NaN \n",
- "2 True NaN NaN \n",
- "3 True NaN NaN \n",
- "4 True NaN NaN \n",
- "\n",
- " extra_field expected_filling_representation_cap \\\n",
- "0 NaN NaN \n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " max_filling_representation_cap \n",
- "0 NaN \n",
- "1 NaN \n",
- "2 NaN \n",
- "3 NaN \n",
- "4 NaN \n",
- "\n",
- "[5 rows x 30 columns]"
+ " name_categories event_id id_representation_cap \n",
+ "0 indiv activité tr 132 8789 \n",
+ "1 indiv entrées tp 37 390 \n",
+ "2 indiv entrées tp 37 395 \n",
+ "3 indiv entrées tr 12365 120199 \n",
+ "4 indiv entrées tp 8 21 "
]
},
- "execution_count": 115,
+ "execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
@@ -6547,7 +6302,7 @@
},
{
"cell_type": "code",
- "execution_count": 116,
+ "execution_count": 100,
"id": "78d75a08-e959-429c-847a-7d70a2804806",
"metadata": {},
"outputs": [
@@ -6580,19 +6335,19 @@
" product_pack_id | \n",
" type_of_id | \n",
" event_id | \n",
- " representation_type_id | \n",
" id_representation_cap | \n",
+ " season_id | \n",
" ... | \n",
- " expected_filling_representation_cap | \n",
- " max_filling_representation_cap | \n",
+ " event_type_key_id | \n",
+ " facility_key_id | \n",
+ " street_id | \n",
+ " amount | \n",
+ " is_full_price | \n",
+ " name_categories | \n",
" name_events | \n",
- " manual_added | \n",
- " is_display_event | \n",
" name_seasons | \n",
- " start_date_time_event | \n",
" name_event_types | \n",
- " name_facilties | \n",
- " fixed_capacity | \n",
+ " name_facilities | \n",
" \n",
" \n",
" \n",
@@ -6606,19 +6361,19 @@
" 1 | \n",
" NaN | \n",
" 132 | \n",
- " NaN | \n",
" 8789 | \n",
+ " 4 | \n",
" ... | \n",
- " NaN | \n",
- " NaN | \n",
- " visite-jeu \"le classico des minots\" (1h30) | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 9.0 | \n",
" False | \n",
- " True | \n",
+ " indiv activité tr | \n",
+ " visite-jeu \"le classico des minots\" (1h30) | \n",
" 2017 | \n",
- " NaN | \n",
" offre muséale individuel | \n",
" mucem | \n",
- " NaN | \n",
" \n",
" \n",
" 1 | \n",
@@ -6630,19 +6385,19 @@
" 1 | \n",
" 12.0 | \n",
" 37 | \n",
- " NaN | \n",
" 390 | \n",
+ " 2 | \n",
" ... | \n",
- " NaN | \n",
- " NaN | \n",
- " billet mucem picasso | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 9.5 | \n",
" False | \n",
- " True | \n",
+ " indiv entrées tp | \n",
+ " billet mucem picasso | \n",
" 2016 | \n",
- " NaN | \n",
" offre muséale individuel | \n",
" mucem | \n",
- " NaN | \n",
"
\n",
" \n",
" 2 | \n",
@@ -6654,19 +6409,19 @@
" 1 | \n",
" 12.0 | \n",
" 37 | \n",
- " NaN | \n",
" 395 | \n",
+ " 2 | \n",
" ... | \n",
- " NaN | \n",
- " NaN | \n",
- " billet mucem picasso | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 11.5 | \n",
" False | \n",
- " True | \n",
+ " indiv entrées tp | \n",
+ " billet mucem picasso | \n",
" 2016 | \n",
- " NaN | \n",
" offre muséale individuel | \n",
" mucem | \n",
- " NaN | \n",
"
\n",
" \n",
" 3 | \n",
@@ -6678,19 +6433,19 @@
" 1 | \n",
" NaN | \n",
" 12365 | \n",
- " NaN | \n",
" 120199 | \n",
+ " 1754 | \n",
" ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 8.0 | \n",
" False | \n",
+ " indiv entrées tr | \n",
" NaN | \n",
" NaN | \n",
" offre muséale individuel | \n",
" mucem | \n",
- " NaN | \n",
"
\n",
" \n",
" 4 | \n",
@@ -6702,23 +6457,23 @@
" 1 | \n",
" 12.0 | \n",
" 8 | \n",
- " NaN | \n",
" 21 | \n",
+ " 4 | \n",
" ... | \n",
- " NaN | \n",
- " NaN | \n",
- " non défini | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 8.5 | \n",
" False | \n",
- " True | \n",
+ " indiv entrées tp | \n",
+ " non défini | \n",
" 2017 | \n",
- " NaN | \n",
" non défini | \n",
" mucem | \n",
- " NaN | \n",
"
\n",
" \n",
"\n",
- "5 rows × 45 columns
\n",
+ "5 rows × 22 columns
\n",
""
],
"text/plain": [
@@ -6736,45 +6491,38 @@
"3 156773 1 NaN 12365 \n",
"4 1175 1 12.0 8 \n",
"\n",
- " representation_type_id id_representation_cap ... \\\n",
- "0 NaN 8789 ... \n",
- "1 NaN 390 ... \n",
- "2 NaN 395 ... \n",
- "3 NaN 120199 ... \n",
- "4 NaN 21 ... \n",
+ " id_representation_cap season_id ... event_type_key_id facility_key_id \\\n",
+ "0 8789 4 ... 5 1 \n",
+ "1 390 2 ... 2 1 \n",
+ "2 395 2 ... 2 1 \n",
+ "3 120199 1754 ... 4 1 \n",
+ "4 21 4 ... 6 1 \n",
"\n",
- " expected_filling_representation_cap max_filling_representation_cap \\\n",
- "0 NaN NaN \n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
+ " street_id amount is_full_price name_categories \\\n",
+ "0 1 9.0 False indiv activité tr \n",
+ "1 1 9.5 False indiv entrées tp \n",
+ "2 1 11.5 False indiv entrées tp \n",
+ "3 1 8.0 False indiv entrées tr \n",
+ "4 1 8.5 False indiv entrées tp \n",
"\n",
- " name_events manual_added is_display_event \\\n",
- "0 visite-jeu \"le classico des minots\" (1h30) False True \n",
- "1 billet mucem picasso False True \n",
- "2 billet mucem picasso False True \n",
- "3 NaN False False \n",
- "4 non défini False True \n",
+ " name_events name_seasons \\\n",
+ "0 visite-jeu \"le classico des minots\" (1h30) 2017 \n",
+ "1 billet mucem picasso 2016 \n",
+ "2 billet mucem picasso 2016 \n",
+ "3 NaN NaN \n",
+ "4 non défini 2017 \n",
"\n",
- " name_seasons start_date_time_event name_event_types \\\n",
- "0 2017 NaN offre muséale individuel \n",
- "1 2016 NaN offre muséale individuel \n",
- "2 2016 NaN offre muséale individuel \n",
- "3 NaN NaN offre muséale individuel \n",
- "4 2017 NaN non défini \n",
+ " name_event_types name_facilities \n",
+ "0 offre muséale individuel mucem \n",
+ "1 offre muséale individuel mucem \n",
+ "2 offre muséale individuel mucem \n",
+ "3 offre muséale individuel mucem \n",
+ "4 non défini mucem \n",
"\n",
- " name_facilties fixed_capacity \n",
- "0 mucem NaN \n",
- "1 mucem NaN \n",
- "2 mucem NaN \n",
- "3 mucem NaN \n",
- "4 mucem NaN \n",
- "\n",
- "[5 rows x 45 columns]"
+ "[5 rows x 22 columns]"
]
},
- "execution_count": 116,
+ "execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
@@ -6788,11 +6536,228 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 101,
+ "id": "4a6950e8-4818-4df2-afa9-562e0921698c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['id_products', 'representation_id', 'pricing_formula_id', 'category_id',\n",
+ " 'products_group_id', 'product_pack_id', 'type_of_id', 'event_id',\n",
+ " 'id_representation_cap', 'season_id', 'facility_id', 'event_type_id',\n",
+ " 'event_type_key_id', 'facility_key_id', 'street_id', 'amount',\n",
+ " 'is_full_price', 'name_categories', 'name_events', 'name_seasons',\n",
+ " 'name_event_types', 'name_facilities'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 101,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "products_global.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
"id": "b18f6428-90e0-4b1b-9b8d-bad995fb6c98",
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(94803, 22)"
+ ]
+ },
+ "execution_count": 102,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "products_global.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c3caf2fd-178e-48e9-b95f-5798bd576f5d",
+ "metadata": {},
+ "source": [
+ "## Analysis of Products_global"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "id": "33ee07a2-d871-4436-9860-9be389bc4902",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id_products 0\n",
+ "representation_id 0\n",
+ "pricing_formula_id 0\n",
+ "category_id 0\n",
+ "products_group_id 0\n",
+ "product_pack_id 0\n",
+ "type_of_id 67589\n",
+ "event_id 0\n",
+ "id_representation_cap 0\n",
+ "season_id 0\n",
+ "facility_id 0\n",
+ "event_type_id 0\n",
+ "event_type_key_id 0\n",
+ "facility_key_id 0\n",
+ "street_id 0\n",
+ "amount 0\n",
+ "is_full_price 0\n",
+ "name_categories 3991\n",
+ "name_events 46657\n",
+ "name_seasons 30663\n",
+ "name_event_types 0\n",
+ "name_facilities 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 103,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "products_global.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "id": "557fc475-4417-4d9f-8d4e-8c49bc42367f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['offre muséale individuel', 'non défini', 'spectacle vivant',\n",
+ " 'offre muséale groupe', 'formule adhésion'], dtype=object)"
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# how many event types ?\n",
+ "\n",
+ "products_global['name_event_types'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "id": "a9b9a23c-b0de-4685-97e5-d52dd78349f5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "644"
+ ]
+ },
+ "execution_count": 107,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# how many events ?\n",
+ "\n",
+ "len(products_global['name_events'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "id": "fb374c72-58ca-404d-a86b-e834a2fc4a34",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['indiv activité tr', 'indiv entrées tp', 'indiv entrées tr',\n",
+ " 'indiv prog enfant', 'indiv activité gr', 'indiv prog gr',\n",
+ " 'indiv activité tp', 'indiv activité enfant', 'indiv entrées gr',\n",
+ " 'groupe forfait entrées tr', 'groupe autonome adulte',\n",
+ " 'indiv prog tp', 'indiv prog tr', 'indiv entrées fa',\n",
+ " 'groupe forfait scolaire', 'en nb entrées tr', 'non défini', nan,\n",
+ " 'en nb entrées gr', 'groupe autonome entrées gr',\n",
+ " 'groupe forfait entrées gr', 'groupe autonome entrées tr',\n",
+ " 'en nb entrées tp', 'groupe autonome gr',\n",
+ " 'groupe autonome entrées tp', 'groupe forfait adulte',\n",
+ " 'groupe forfait etudiant'], dtype=object)"
+ ]
+ },
+ "execution_count": 108,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# how many categories ?\n",
+ "products_global['name_categories'].unique()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "id": "11f89771-8d50-4ef4-b34e-53e4f6b419bb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "27"
+ ]
+ },
+ "execution_count": 109,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(products_global['category_id'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8add1ff2-b7e8-4381-90d8-d18d8660ed39",
+ "metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "def uniform_product_df():\n",
+ " print(\"Products theme columns : \", products_theme.columns)\n",
+ " print(\"\\n Representation theme columns : \", representation_theme.columns)\n",
+ " print(\"\\n Events theme columns : \", events_theme.columns)\n",
+ "\n",
+ " products_global = products_theme.merge(representation_theme, how='left',\n",
+ " on= [\"representation_id\", \"category_id\"])\n",
+ " \n",
+ " products_global = products_global.merge(events_theme, how='left', on='event_id',\n",
+ " suffixes = (\"_representation\", \"_event\"))\n",
+ " \n",
+ " products_global = order_columns_id(products_global)\n",
+ "\n",
+ " # remove useless columns \n",
+ " products_global = products_global.drop(columns = ['type_of_id', 'name_events', 'name_seasons', 'name_categories'])\n",
+ " return products_global\n",
+ " "
+ ]
}
],
"metadata": {