From 19d91728f0ac1b7b29b58af55813d71d8991f236 Mon Sep 17 00:00:00 2001 From: ajoubrel-ensae Date: Tue, 2 Jan 2024 22:28:56 +0000 Subject: [PATCH] Update --- Clean-Notebook.ipynb | 2375 +++++------------------------------------- Notebook_AJ.ipynb | 487 ++++++++- 2 files changed, 703 insertions(+), 2159 deletions(-) diff --git a/Clean-Notebook.ipynb b/Clean-Notebook.ipynb index be9a507..23550e0 100644 --- a/Clean-Notebook.ipynb +++ b/Clean-Notebook.ipynb @@ -51,52 +51,291 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "id": "699664b9-eee4-4f8d-a207-e524526560c5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['bdc2324-data/2/2campaign_stats.csv',\n", - " 'bdc2324-data/2/2campaigns.csv',\n", - " 'bdc2324-data/2/2categories.csv',\n", - " 'bdc2324-data/2/2contribution_sites.csv',\n", - " 'bdc2324-data/2/2contributions.csv',\n", - " 'bdc2324-data/2/2countries.csv',\n", - " 'bdc2324-data/2/2currencies.csv',\n", - " 'bdc2324-data/2/2customer_target_mappings.csv',\n", - " 'bdc2324-data/2/2customersplus.csv',\n", - " 'bdc2324-data/2/2event_types.csv',\n", - " 'bdc2324-data/2/2events.csv',\n", - " 'bdc2324-data/2/2facilities.csv',\n", - " 'bdc2324-data/2/2link_stats.csv',\n", - " 'bdc2324-data/2/2pricing_formulas.csv',\n", - " 'bdc2324-data/2/2product_packs.csv',\n", - " 'bdc2324-data/2/2products.csv',\n", - " 'bdc2324-data/2/2products_groups.csv',\n", - " 'bdc2324-data/2/2purchases.csv',\n", - " 'bdc2324-data/2/2representation_category_capacities.csv',\n", - " 'bdc2324-data/2/2representations.csv',\n", - " 'bdc2324-data/2/2seasons.csv',\n", - " 'bdc2324-data/2/2structure_tag_mappings.csv',\n", - " 'bdc2324-data/2/2suppliers.csv',\n", - " 'bdc2324-data/2/2tags.csv',\n", - " 'bdc2324-data/2/2target_types.csv',\n", - " 'bdc2324-data/2/2targets.csv',\n", - " 'bdc2324-data/2/2tickets.csv']" + "['bdc2324-data/11/11campaign_stats.csv',\n", + " 'bdc2324-data/11/11campaigns.csv',\n", + " 'bdc2324-data/11/11categories.csv',\n", + " 'bdc2324-data/11/11countries.csv',\n", + " 'bdc2324-data/11/11currencies.csv',\n", + " 'bdc2324-data/11/11customer_target_mappings.csv',\n", + " 'bdc2324-data/11/11customersplus.csv',\n", + " 'bdc2324-data/11/11event_types.csv',\n", + " 'bdc2324-data/11/11events.csv',\n", + " 'bdc2324-data/11/11facilities.csv',\n", + " 'bdc2324-data/11/11link_stats.csv',\n", + " 'bdc2324-data/11/11pricing_formulas.csv',\n", + " 'bdc2324-data/11/11product_packs.csv',\n", + " 'bdc2324-data/11/11products.csv',\n", + " 'bdc2324-data/11/11products_groups.csv',\n", + " 'bdc2324-data/11/11purchases.csv',\n", + " 'bdc2324-data/11/11representation_category_capacities.csv',\n", + " 'bdc2324-data/11/11representations.csv',\n", + " 'bdc2324-data/11/11seasons.csv',\n", + " 'bdc2324-data/11/11structure_tag_mappings.csv',\n", + " 'bdc2324-data/11/11suppliers.csv',\n", + " 'bdc2324-data/11/11tags.csv',\n", + " 'bdc2324-data/11/11target_types.csv',\n", + " 'bdc2324-data/11/11targets.csv',\n", + " 'bdc2324-data/11/11tickets.csv']" ] }, - "execution_count": 3, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "BUCKET = \"bdc2324-data/2\"\n", + "BUCKET = \"bdc2324-data/11\"\n", "fs.ls(BUCKET)" ] }, + { + "cell_type": "code", + "execution_count": 23, + "id": "6d6201cd-a00b-4984-bcd8-72838717ad13", + "metadata": {}, + "outputs": [], + "source": [ + "# Chargement de toutes les données\n", + "liste_base = ['customer_target_mappings', 'customersplus', 'target_types', 'tags', 'events', 'tickets', 'representations', 'purchases', 'products']\n", + "\n", + "for nom_base in liste_base:\n", + " FILE_PATH_S3 = 'bdc2324-data/11/11' + nom_base + '.csv'\n", + " with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "afe548fe-d93c-4634-9f53-881404ec4c6c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
id_xpurchase_datetype_ofis_from_subscriptionamountis_full_pricestart_date_timeevent_name
09924232023-01-11 17:08:41+01:003False13.0False2023-02-06 20:00:00+01:00zaide
19924232023-01-11 17:08:41+01:003False13.0False2023-02-06 20:00:00+01:00zaide
210539342023-03-16 16:23:10+01:003False62.0False2023-03-19 16:00:00+01:00luisa miller
310539342023-03-16 16:23:10+01:003False62.0False2023-03-19 16:00:00+01:00luisa miller
411891412020-11-26 13:12:53+01:003False51.3False2020-12-01 20:00:00+01:00iphigenie en tauride
...........................
31896410908392019-05-19 21:18:36+02:001False4.5False2019-05-27 20:00:00+02:00entre femmes
31896510908392019-05-19 21:18:36+02:001False4.5False2019-05-27 20:00:00+02:00entre femmes
31896610908392019-05-19 21:18:36+02:001False4.5False2019-05-27 20:00:00+02:00entre femmes
31896712442772019-12-31 11:04:07+01:001False5.5False2020-02-03 20:00:00+01:00a boire et a manger
31896812442772019-12-31 11:04:07+01:001False5.5False2020-02-03 20:00:00+01:00a boire et a manger
\n", + "

318969 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " id_x purchase_date type_of is_from_subscription \\\n", + "0 992423 2023-01-11 17:08:41+01:00 3 False \n", + "1 992423 2023-01-11 17:08:41+01:00 3 False \n", + "2 1053934 2023-03-16 16:23:10+01:00 3 False \n", + "3 1053934 2023-03-16 16:23:10+01:00 3 False \n", + "4 1189141 2020-11-26 13:12:53+01:00 3 False \n", + "... ... ... ... ... \n", + "318964 1090839 2019-05-19 21:18:36+02:00 1 False \n", + "318965 1090839 2019-05-19 21:18:36+02:00 1 False \n", + "318966 1090839 2019-05-19 21:18:36+02:00 1 False \n", + "318967 1244277 2019-12-31 11:04:07+01:00 1 False \n", + "318968 1244277 2019-12-31 11:04:07+01:00 1 False \n", + "\n", + " amount is_full_price start_date_time event_name \n", + "0 13.0 False 2023-02-06 20:00:00+01:00 zaide \n", + "1 13.0 False 2023-02-06 20:00:00+01:00 zaide \n", + "2 62.0 False 2023-03-19 16:00:00+01:00 luisa miller \n", + "3 62.0 False 2023-03-19 16:00:00+01:00 luisa miller \n", + "4 51.3 False 2020-12-01 20:00:00+01:00 iphigenie en tauride \n", + "... ... ... ... ... \n", + "318964 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n", + "318965 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n", + "318966 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n", + "318967 5.5 False 2020-02-03 20:00:00+01:00 a boire et a manger \n", + "318968 5.5 False 2020-02-03 20:00:00+01:00 a boire et a manger \n", + "\n", + "[318969 rows x 8 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Jointure\n", + "var_choosed = ['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']\n", + "merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[var_choosed]\n", + "\n", + "var_choosed.extend(['amount', 'is_full_price', 'representation_id'])\n", + "merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[var_choosed]\n", + "\n", + "var_choosed.remove('representation_id')\n", + "var_choosed.extend(['start_date_time', 'event_id'])\n", + "merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[var_choosed]\n", + "\n", + "var_choosed.remove('event_id')\n", + "var_choosed.extend(['name', 'customer_id'])\n", + "merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[var_choosed]\n", + "\n", + "# Changement de nom\n", + "merge_4 = merge_4.rename(columns={'name': 'event_name'})\n", + "var_choosed[var_choosed.index('name')] = \"event_name\"\n", + "\n", + "# Base finale\n", + "var_choosed.extend(['age', 'gender', 'country', 'fidelity', 'profession'])\n", + "df_customer_event = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = 'inner')[var_choosed]\n", + "df_customer_event" + ] + }, { "cell_type": "markdown", "id": "779da86b-ac61-4c61-88d2-fa1c0c19efce", @@ -138,250 +377,11 @@ ], "source": [ "# Client\n", - "FILE_PATH_S3 = 'bdc2324-data/11/11customer_target_mappings.csv'\n", - "\n", - "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", - " customer_target_mappings = pd.read_csv(file_in, sep=\",\")\n", - "\n", "print(customer_target_mappings.columns)\n", "print(customer_target_mappings.shape)\n", "customer_target_mappings.info()" ] }, - { - "cell_type": "code", - "execution_count": 4, - "id": "d22aa131-5069-43d4-a42e-24f38cc7240d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Index(['id', 'customer_id', 'target_id', 'created_at', 'updated_at', 'name',\n", - " 'extra_field'],\n", - " dtype='object')\n", - "(124302, 7)\n", - "\n", - "RangeIndex: 124302 entries, 0 to 124301\n", - "Data columns (total 7 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 124302 non-null int64 \n", - " 1 customer_id 124302 non-null int64 \n", - " 2 target_id 124302 non-null int64 \n", - " 3 created_at 124296 non-null object \n", - " 4 updated_at 124296 non-null object \n", - " 5 name 0 non-null float64\n", - " 6 extra_field 0 non-null float64\n", - "dtypes: float64(2), int64(3), object(2)\n", - "memory usage: 6.6+ MB\n" - ] - } - ], - "source": [ - "# Segmentation existante\n", - "FILE_PATH_S3 = 'bdc2324-data/11/11customer_target_mappings.csv'\n", - "\n", - "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", - " customer_target_mappings = pd.read_csv(file_in, sep=\",\")\n", - "\n", - "print(customer_target_mappings.columns)\n", - "print(customer_target_mappings.shape)\n", - "customer_target_mappings.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "967b20e2-5a30-4724-989f-b9e39c7c67e7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_atupdated_atnameextra_field
07938893441511012022-09-29 17:55:41.083666+02:002022-09-29 17:55:41.083666+02:00NaNNaN
17938903441521012022-09-29 19:16:07.252114+02:002022-09-29 19:16:07.252114+02:00NaNNaN
27938913441531012022-09-29 19:55:10.443450+02:002022-09-29 19:55:10.443450+02:00NaNNaN
37938923441541012022-09-29 20:16:08.269407+02:002022-09-29 20:16:08.269407+02:00NaNNaN
47938933441551012022-09-29 21:03:40.541998+02:002022-09-29 21:03:40.541998+02:00NaNNaN
........................
1242977420013298551012022-07-11 18:17:09.607162+02:002022-07-11 18:17:09.607162+02:00NaNNaN
1242987420023298561012022-07-11 18:44:45.636248+02:002022-07-11 18:44:45.636248+02:00NaNNaN
1242997420003298541012022-07-11 17:46:48.914507+02:002022-07-11 17:46:48.914507+02:00NaNNaN
1243007420033298571342022-07-11 18:44:55.915889+02:002022-07-11 18:44:55.915889+02:00NaNNaN
1243017419963298501012022-07-11 16:52:37.227487+02:002022-07-11 16:52:37.227487+02:00NaNNaN
\n", - "

124302 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at \\\n", - "0 793889 344151 101 2022-09-29 17:55:41.083666+02:00 \n", - "1 793890 344152 101 2022-09-29 19:16:07.252114+02:00 \n", - "2 793891 344153 101 2022-09-29 19:55:10.443450+02:00 \n", - "3 793892 344154 101 2022-09-29 20:16:08.269407+02:00 \n", - "4 793893 344155 101 2022-09-29 21:03:40.541998+02:00 \n", - "... ... ... ... ... \n", - "124297 742001 329855 101 2022-07-11 18:17:09.607162+02:00 \n", - "124298 742002 329856 101 2022-07-11 18:44:45.636248+02:00 \n", - "124299 742000 329854 101 2022-07-11 17:46:48.914507+02:00 \n", - "124300 742003 329857 134 2022-07-11 18:44:55.915889+02:00 \n", - "124301 741996 329850 101 2022-07-11 16:52:37.227487+02:00 \n", - "\n", - " updated_at name extra_field \n", - "0 2022-09-29 17:55:41.083666+02:00 NaN NaN \n", - "1 2022-09-29 19:16:07.252114+02:00 NaN NaN \n", - "2 2022-09-29 19:55:10.443450+02:00 NaN NaN \n", - "3 2022-09-29 20:16:08.269407+02:00 NaN NaN \n", - "4 2022-09-29 21:03:40.541998+02:00 NaN NaN \n", - "... ... ... ... \n", - "124297 2022-07-11 18:17:09.607162+02:00 NaN NaN \n", - "124298 2022-07-11 18:44:45.636248+02:00 NaN NaN \n", - "124299 2022-07-11 17:46:48.914507+02:00 NaN NaN \n", - "124300 2022-07-11 18:44:55.915889+02:00 NaN NaN \n", - "124301 2022-07-11 16:52:37.227487+02:00 NaN NaN \n", - "\n", - "[124302 rows x 7 columns]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "customer_target_mappings" - ] - }, { "cell_type": "code", "execution_count": 26, @@ -454,11 +454,6 @@ ], "source": [ "# Segmentation existante\n", - "FILE_PATH_S3 = 'bdc2324-data/11/11target_types.csv'\n", - "\n", - "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", - " target_types = pd.read_csv(file_in, sep=\",\")\n", - "\n", "print(target_types.columns)\n", "print(target_types.shape)\n", "target_types.info()" @@ -3468,1838 +3463,6 @@ "source": [ "purchases" ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "281c48da-e1a0-4298-b2e6-81f9fc6461aa", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_xpurchase_datecustomer_idcreated_at_xupdated_at_xnumber_xidentifier_xid_ynumber_ycreated_at_yupdated_at_ypurchase_idproduct_idis_from_subscriptiontype_ofsupplier_idbarcodeidentifier_y
08617632019-03-01 16:29:17+01:0049662023-09-12 17:42:37.571646+02:002023-09-12 17:42:37.571646+02:00NaNe1155cf26b34f792bdb23e49244d726421190821433_136_194_683562023-09-12 17:42:45.409056+02:002023-09-12 17:42:45.409056+02:00861763209879False11702NaN838d6101db2fc8bc80536d8b91b49859
18617642019-03-01 16:29:19+01:0049662023-09-12 17:42:37.572063+02:002023-09-12 17:42:37.572063+02:00NaNe8b95cc6a1a8b103ffa39755ce3bfc4d21190811433_136_212_683562023-09-12 17:42:45.396336+02:002023-09-12 17:42:45.396336+02:00861764209879False11702NaNf694c255855ce5643c6fcc7fed5e9237
28617672019-03-01 16:33:01+01:004059942023-09-12 17:42:37.573280+02:002023-09-12 17:42:37.573280+02:00NaN6edb259b88fc6f6ae82ede82defaef92211908433158_158_297_683572023-09-12 17:42:45.410447+02:002023-09-12 17:42:45.410447+02:00861767209880False11702NaNb7a3dd0794c0957c942d45b8913e5b96
38617682019-03-01 16:33:03+01:004059942023-09-12 17:42:37.573646+02:002023-09-12 17:42:37.573646+02:00NaN5d3fcb50784bada3731a967ddc9fbba8211908533158_158_318_683572023-09-12 17:42:45.411059+02:002023-09-12 17:42:45.411059+02:00861768209880False11702NaNd7ea7e443581ebe520dd13f6cad31af7
48617692019-03-01 16:33:06+01:004059942023-09-12 17:42:37.574034+02:002023-09-12 17:42:37.574034+02:00NaN5516d19b2331db9ad0b11f7e70299575211908333158_158_343_683572023-09-12 17:42:45.409824+02:002023-09-12 17:42:45.409824+02:00861769209880False11702NaN8a8d938d66a4dc57bcb44c2773c6fdfa
.........................................................
31896412852062023-10-19 22:14:55+02:003542332023-10-21 04:50:44.397308+02:002023-10-21 04:50:44.397308+02:00NaN819dd5c8b312ee583335f32f481d782a259756470649_398_403_1686522023-10-21 04:50:44.991960+02:002023-10-21 04:50:44.991960+02:001285206270350False11702NaN56c452c39089f658ed74a06c96b78725
31896512852092023-10-19 22:59:26+02:005170012023-10-21 04:50:44.399870+02:002023-10-21 04:50:44.399870+02:00NaNef79fbeb3b80de3529df9c65cb4d4ca2259756589203_398_1187_1686562023-10-21 04:50:44.993354+02:002023-10-21 04:50:44.993354+02:001285209268450False11702NaN5ef9912e7b533b8a1b2685db538df7d3
31896612852092023-10-19 22:59:26+02:005170012023-10-21 04:50:44.399870+02:002023-10-21 04:50:44.399870+02:00NaNef79fbeb3b80de3529df9c65cb4d4ca2259756689203_398_1232_1686552023-10-21 04:50:44.994301+02:002023-10-21 04:50:44.994301+02:001285209272403False11702NaN9742a56e9ffbdfb0a31a541dc5ccb889
31896712852092023-10-19 22:59:26+02:005170012023-10-21 04:50:44.399870+02:002023-10-21 04:50:44.399870+02:00NaNef79fbeb3b80de3529df9c65cb4d4ca2259756789203_398_1211_1686552023-10-21 04:50:44.995318+02:002023-10-21 04:50:44.995318+02:001285209272403False11702NaN56a9e032281d7a9c004da644818839cc
31896812859662023-10-21 21:47:20+02:005173092023-10-23 03:43:16.458811+02:002023-10-23 03:43:16.458811+02:00NaN7e825dd352bc6a11ab81cb8068e325e6259826089257_401_2652_1687932023-10-23 03:43:16.856244+02:002023-10-23 03:43:16.856244+02:001285966268428False11702NaN86d6c0c2720435206078ac4bbf4f74f1
\n", - "

318969 rows × 18 columns

\n", - "
" - ], - "text/plain": [ - " id_x purchase_date customer_id \\\n", - "0 861763 2019-03-01 16:29:17+01:00 4966 \n", - "1 861764 2019-03-01 16:29:19+01:00 4966 \n", - "2 861767 2019-03-01 16:33:01+01:00 405994 \n", - "3 861768 2019-03-01 16:33:03+01:00 405994 \n", - "4 861769 2019-03-01 16:33:06+01:00 405994 \n", - "... ... ... ... \n", - "318964 1285206 2023-10-19 22:14:55+02:00 354233 \n", - "318965 1285209 2023-10-19 22:59:26+02:00 517001 \n", - "318966 1285209 2023-10-19 22:59:26+02:00 517001 \n", - "318967 1285209 2023-10-19 22:59:26+02:00 517001 \n", - "318968 1285966 2023-10-21 21:47:20+02:00 517309 \n", - "\n", - " created_at_x updated_at_x \\\n", - "0 2023-09-12 17:42:37.571646+02:00 2023-09-12 17:42:37.571646+02:00 \n", - "1 2023-09-12 17:42:37.572063+02:00 2023-09-12 17:42:37.572063+02:00 \n", - "2 2023-09-12 17:42:37.573280+02:00 2023-09-12 17:42:37.573280+02:00 \n", - "3 2023-09-12 17:42:37.573646+02:00 2023-09-12 17:42:37.573646+02:00 \n", - "4 2023-09-12 17:42:37.574034+02:00 2023-09-12 17:42:37.574034+02:00 \n", - "... ... ... \n", - "318964 2023-10-21 04:50:44.397308+02:00 2023-10-21 04:50:44.397308+02:00 \n", - "318965 2023-10-21 04:50:44.399870+02:00 2023-10-21 04:50:44.399870+02:00 \n", - "318966 2023-10-21 04:50:44.399870+02:00 2023-10-21 04:50:44.399870+02:00 \n", - "318967 2023-10-21 04:50:44.399870+02:00 2023-10-21 04:50:44.399870+02:00 \n", - "318968 2023-10-23 03:43:16.458811+02:00 2023-10-23 03:43:16.458811+02:00 \n", - "\n", - " number_x identifier_x id_y \\\n", - "0 NaN e1155cf26b34f792bdb23e49244d7264 2119082 \n", - "1 NaN e8b95cc6a1a8b103ffa39755ce3bfc4d 2119081 \n", - "2 NaN 6edb259b88fc6f6ae82ede82defaef92 2119084 \n", - "3 NaN 5d3fcb50784bada3731a967ddc9fbba8 2119085 \n", - "4 NaN 5516d19b2331db9ad0b11f7e70299575 2119083 \n", - "... ... ... ... \n", - "318964 NaN 819dd5c8b312ee583335f32f481d782a 2597564 \n", - "318965 NaN ef79fbeb3b80de3529df9c65cb4d4ca2 2597565 \n", - "318966 NaN ef79fbeb3b80de3529df9c65cb4d4ca2 2597566 \n", - "318967 NaN ef79fbeb3b80de3529df9c65cb4d4ca2 2597567 \n", - "318968 NaN 7e825dd352bc6a11ab81cb8068e325e6 2598260 \n", - "\n", - " number_y created_at_y \\\n", - "0 1433_136_194_68356 2023-09-12 17:42:45.409056+02:00 \n", - "1 1433_136_212_68356 2023-09-12 17:42:45.396336+02:00 \n", - "2 33158_158_297_68357 2023-09-12 17:42:45.410447+02:00 \n", - "3 33158_158_318_68357 2023-09-12 17:42:45.411059+02:00 \n", - "4 33158_158_343_68357 2023-09-12 17:42:45.409824+02:00 \n", - "... ... ... \n", - "318964 70649_398_403_168652 2023-10-21 04:50:44.991960+02:00 \n", - "318965 89203_398_1187_168656 2023-10-21 04:50:44.993354+02:00 \n", - "318966 89203_398_1232_168655 2023-10-21 04:50:44.994301+02:00 \n", - "318967 89203_398_1211_168655 2023-10-21 04:50:44.995318+02:00 \n", - "318968 89257_401_2652_168793 2023-10-23 03:43:16.856244+02:00 \n", - "\n", - " updated_at_y purchase_id product_id \\\n", - "0 2023-09-12 17:42:45.409056+02:00 861763 209879 \n", - "1 2023-09-12 17:42:45.396336+02:00 861764 209879 \n", - "2 2023-09-12 17:42:45.410447+02:00 861767 209880 \n", - "3 2023-09-12 17:42:45.411059+02:00 861768 209880 \n", - "4 2023-09-12 17:42:45.409824+02:00 861769 209880 \n", - "... ... ... ... \n", - "318964 2023-10-21 04:50:44.991960+02:00 1285206 270350 \n", - "318965 2023-10-21 04:50:44.993354+02:00 1285209 268450 \n", - "318966 2023-10-21 04:50:44.994301+02:00 1285209 272403 \n", - "318967 2023-10-21 04:50:44.995318+02:00 1285209 272403 \n", - "318968 2023-10-23 03:43:16.856244+02:00 1285966 268428 \n", - "\n", - " is_from_subscription type_of supplier_id barcode \\\n", - "0 False 1 1702 NaN \n", - "1 False 1 1702 NaN \n", - "2 False 1 1702 NaN \n", - "3 False 1 1702 NaN \n", - "4 False 1 1702 NaN \n", - "... ... ... ... ... \n", - "318964 False 1 1702 NaN \n", - "318965 False 1 1702 NaN \n", - "318966 False 1 1702 NaN \n", - "318967 False 1 1702 NaN \n", - "318968 False 1 1702 NaN \n", - "\n", - " identifier_y \n", - "0 838d6101db2fc8bc80536d8b91b49859 \n", - "1 f694c255855ce5643c6fcc7fed5e9237 \n", - "2 b7a3dd0794c0957c942d45b8913e5b96 \n", - "3 d7ea7e443581ebe520dd13f6cad31af7 \n", - "4 8a8d938d66a4dc57bcb44c2773c6fdfa \n", - "... ... \n", - "318964 56c452c39089f658ed74a06c96b78725 \n", - "318965 5ef9912e7b533b8a1b2685db538df7d3 \n", - "318966 9742a56e9ffbdfb0a31a541dc5ccb889 \n", - "318967 56a9e032281d7a9c004da644818839cc \n", - "318968 86d6c0c2720435206078ac4bbf4f74f1 \n", - "\n", - "[318969 rows x 18 columns]" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "e8f340b3-7519-47e7-a8bb-c8d1b68ca683", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_xcustomer_idproduct_idpurchase_datetype_ofis_from_subscription
086176349662098792019-03-01 16:29:17+01:001False
186176449662098792019-03-01 16:29:19+01:001False
28617674059942098802019-03-01 16:33:01+01:001False
38617684059942098802019-03-01 16:33:03+01:001False
48617694059942098802019-03-01 16:33:06+01:001False
.....................
31896412852063542332703502023-10-19 22:14:55+02:001False
31896512852095170012684502023-10-19 22:59:26+02:001False
31896612852095170012724032023-10-19 22:59:26+02:001False
31896712852095170012724032023-10-19 22:59:26+02:001False
31896812859665173092684282023-10-21 21:47:20+02:001False
\n", - "

318969 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " id_x customer_id product_id purchase_date type_of \\\n", - "0 861763 4966 209879 2019-03-01 16:29:17+01:00 1 \n", - "1 861764 4966 209879 2019-03-01 16:29:19+01:00 1 \n", - "2 861767 405994 209880 2019-03-01 16:33:01+01:00 1 \n", - "3 861768 405994 209880 2019-03-01 16:33:03+01:00 1 \n", - "4 861769 405994 209880 2019-03-01 16:33:06+01:00 1 \n", - "... ... ... ... ... ... \n", - "318964 1285206 354233 270350 2023-10-19 22:14:55+02:00 1 \n", - "318965 1285209 517001 268450 2023-10-19 22:59:26+02:00 1 \n", - "318966 1285209 517001 272403 2023-10-19 22:59:26+02:00 1 \n", - "318967 1285209 517001 272403 2023-10-19 22:59:26+02:00 1 \n", - "318968 1285966 517309 268428 2023-10-21 21:47:20+02:00 1 \n", - "\n", - " is_from_subscription \n", - "0 False \n", - "1 False \n", - "2 False \n", - "3 False \n", - "4 False \n", - "... ... \n", - "318964 False \n", - "318965 False \n", - "318966 False \n", - "318967 False \n", - "318968 False \n", - "\n", - "[318969 rows x 6 columns]" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Jonction client et évenement\n", - "merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']]\n", - "merge_1" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "a598b86c-4128-4e5c-ae38-52689f755fd5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_xcustomer_idrepresentation_idpurchase_datetype_ofis_from_subscriptionamountis_full_price
012498477634443322023-09-03 18:43:56+02:001False18.0False
112498477634443322023-09-03 18:43:56+02:001False18.0False
21252665426962443322023-07-06 12:13:08+02:001False18.0False
31252665426962443322023-07-06 12:13:08+02:001False18.0False
41252671426731443322023-07-06 13:10:07+02:003False18.0False
...........................
318964121279711092338102018-11-28 13:44:22+01:001False30.0False
318965121347625851338102018-12-28 16:53:36+01:001False30.0False
318966122603926314338102018-12-29 16:30:47+01:001False30.0False
31896712442763104338102018-12-31 19:54:09+01:001False30.0False
318968124428586337662019-12-31 13:02:47+01:003False21.0False
\n", - "

318969 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " id_x customer_id representation_id purchase_date \\\n", - "0 1249847 7634 44332 2023-09-03 18:43:56+02:00 \n", - "1 1249847 7634 44332 2023-09-03 18:43:56+02:00 \n", - "2 1252665 426962 44332 2023-07-06 12:13:08+02:00 \n", - "3 1252665 426962 44332 2023-07-06 12:13:08+02:00 \n", - "4 1252671 426731 44332 2023-07-06 13:10:07+02:00 \n", - "... ... ... ... ... \n", - "318964 1212797 11092 33810 2018-11-28 13:44:22+01:00 \n", - "318965 1213476 25851 33810 2018-12-28 16:53:36+01:00 \n", - "318966 1226039 26314 33810 2018-12-29 16:30:47+01:00 \n", - "318967 1244276 3104 33810 2018-12-31 19:54:09+01:00 \n", - "318968 1244285 86 33766 2019-12-31 13:02:47+01:00 \n", - "\n", - " type_of is_from_subscription amount is_full_price \n", - "0 1 False 18.0 False \n", - "1 1 False 18.0 False \n", - "2 1 False 18.0 False \n", - "3 1 False 18.0 False \n", - "4 3 False 18.0 False \n", - "... ... ... ... ... \n", - "318964 1 False 30.0 False \n", - "318965 1 False 30.0 False \n", - "318966 1 False 30.0 False \n", - "318967 1 False 30.0 False \n", - "318968 3 False 21.0 False \n", - "\n", - "[318969 rows x 8 columns]" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[['id_x', 'customer_id', 'representation_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price']]\n", - "merge_2" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "9d394f79-2615-448e-8ebd-074e225f1584", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idserialevent_idcreated_atupdated_atstart_date_timeopensatisfactionend_date_timename...extra_fieldidentifierid_xcustomer_idrepresentation_idpurchase_datetype_ofis_from_subscriptionamountis_full_price
044351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaN33520762e8cc28982e3841cbc2be8ce21293590627443512023-11-08 12:25:21+01:000False22.4False
144351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaN33520762e8cc28982e3841cbc2be8ce21293590627443512023-11-08 12:25:21+01:000False22.4False
244351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaN33520762e8cc28982e3841cbc2be8ce21293590627443512023-11-08 12:25:21+01:000False22.4False
344351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaN33520762e8cc28982e3841cbc2be8ce21293590627443512023-11-08 12:25:21+01:000False22.4False
444351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaN33520762e8cc28982e3841cbc2be8ce21293590627443512023-11-08 12:25:21+01:000False22.4False
..................................................................
31896433639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaNfae68f1e09710ec8747957af6e22f61d118302615258336392023-03-26 16:09:31+02:001False0.0False
31896533639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaNfae68f1e09710ec8747957af6e22f61d118302615258336392023-03-26 16:09:31+02:001False0.0False
31896633639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaNfae68f1e09710ec8747957af6e22f61d118302615258336392023-03-26 16:09:31+02:001False0.0False
31896733639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaNfae68f1e09710ec8747957af6e22f61d1194433412831336392023-03-27 17:38:59+02:001False0.0False
31896833639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaNfae68f1e09710ec8747957af6e22f61d1194433412831336392023-03-27 17:38:59+02:001False0.0False
\n", - "

318969 rows × 24 columns

\n", - "
" - ], - "text/plain": [ - " id serial event_id created_at \\\n", - "0 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", - "1 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", - "2 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", - "3 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", - "4 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", - "... ... ... ... ... \n", - "318964 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", - "318965 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", - "318966 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", - "318967 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", - "318968 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", - "\n", - " updated_at start_date_time open \\\n", - "0 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", - "1 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", - "2 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", - "3 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", - "4 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", - "... ... ... ... \n", - "318964 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", - "318965 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", - "318966 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", - "318967 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", - "318968 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", - "\n", - " satisfaction end_date_time name ... extra_field \\\n", - "0 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", - "1 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", - "2 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", - "3 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", - "4 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", - "... ... ... ... ... ... \n", - "318964 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", - "318965 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", - "318966 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", - "318967 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", - "318968 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", - "\n", - " identifier id_x customer_id \\\n", - "0 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n", - "1 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n", - "2 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n", - "3 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n", - "4 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n", - "... ... ... ... \n", - "318964 fae68f1e09710ec8747957af6e22f61d 1183026 15258 \n", - "318965 fae68f1e09710ec8747957af6e22f61d 1183026 15258 \n", - "318966 fae68f1e09710ec8747957af6e22f61d 1183026 15258 \n", - "318967 fae68f1e09710ec8747957af6e22f61d 1194433 412831 \n", - "318968 fae68f1e09710ec8747957af6e22f61d 1194433 412831 \n", - "\n", - " representation_id purchase_date type_of \\\n", - "0 44351 2023-11-08 12:25:21+01:00 0 \n", - "1 44351 2023-11-08 12:25:21+01:00 0 \n", - "2 44351 2023-11-08 12:25:21+01:00 0 \n", - "3 44351 2023-11-08 12:25:21+01:00 0 \n", - "4 44351 2023-11-08 12:25:21+01:00 0 \n", - "... ... ... ... \n", - "318964 33639 2023-03-26 16:09:31+02:00 1 \n", - "318965 33639 2023-03-26 16:09:31+02:00 1 \n", - "318966 33639 2023-03-26 16:09:31+02:00 1 \n", - "318967 33639 2023-03-27 17:38:59+02:00 1 \n", - "318968 33639 2023-03-27 17:38:59+02:00 1 \n", - "\n", - " is_from_subscription amount is_full_price \n", - "0 False 22.4 False \n", - "1 False 22.4 False \n", - "2 False 22.4 False \n", - "3 False 22.4 False \n", - "4 False 22.4 False \n", - "... ... ... ... \n", - "318964 False 0.0 False \n", - "318965 False 0.0 False \n", - "318966 False 0.0 False \n", - "318967 False 0.0 False \n", - "318968 False 0.0 False \n", - "\n", - "[318969 rows x 24 columns]" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "63bcbfad-fa20-425a-881f-ca9aa212c419", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_xcustomer_idevent_idpurchase_datetype_ofis_from_subscriptionamountis_full_pricestart_date_time
01293590627203712023-11-08 12:25:21+01:000False22.4False2023-12-21 20:00:00+01:00
11293590627203712023-11-08 12:25:21+01:000False22.4False2023-12-21 20:00:00+01:00
21293590627203712023-11-08 12:25:21+01:000False22.4False2023-12-21 20:00:00+01:00
31293590627203712023-11-08 12:25:21+01:000False22.4False2023-12-21 20:00:00+01:00
41293590627203712023-11-08 12:25:21+01:000False22.4False2023-12-21 20:00:00+01:00
..............................
318964118302615258155332023-03-26 16:09:31+02:001False0.0False2023-04-15 17:30:00+02:00
318965118302615258155332023-03-26 16:09:31+02:001False0.0False2023-04-15 17:30:00+02:00
318966118302615258155332023-03-26 16:09:31+02:001False0.0False2023-04-15 17:30:00+02:00
3189671194433412831155332023-03-27 17:38:59+02:001False0.0False2023-04-15 17:30:00+02:00
3189681194433412831155332023-03-27 17:38:59+02:001False0.0False2023-04-15 17:30:00+02:00
\n", - "

318969 rows × 9 columns

\n", - "
" - ], - "text/plain": [ - " id_x customer_id event_id purchase_date type_of \\\n", - "0 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n", - "1 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n", - "2 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n", - "3 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n", - "4 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n", - "... ... ... ... ... ... \n", - "318964 1183026 15258 15533 2023-03-26 16:09:31+02:00 1 \n", - "318965 1183026 15258 15533 2023-03-26 16:09:31+02:00 1 \n", - "318966 1183026 15258 15533 2023-03-26 16:09:31+02:00 1 \n", - "318967 1194433 412831 15533 2023-03-27 17:38:59+02:00 1 \n", - "318968 1194433 412831 15533 2023-03-27 17:38:59+02:00 1 \n", - "\n", - " is_from_subscription amount is_full_price start_date_time \n", - "0 False 22.4 False 2023-12-21 20:00:00+01:00 \n", - "1 False 22.4 False 2023-12-21 20:00:00+01:00 \n", - "2 False 22.4 False 2023-12-21 20:00:00+01:00 \n", - "3 False 22.4 False 2023-12-21 20:00:00+01:00 \n", - "4 False 22.4 False 2023-12-21 20:00:00+01:00 \n", - "... ... ... ... ... \n", - "318964 False 0.0 False 2023-04-15 17:30:00+02:00 \n", - "318965 False 0.0 False 2023-04-15 17:30:00+02:00 \n", - "318966 False 0.0 False 2023-04-15 17:30:00+02:00 \n", - "318967 False 0.0 False 2023-04-15 17:30:00+02:00 \n", - "318968 False 0.0 False 2023-04-15 17:30:00+02:00 \n", - "\n", - "[318969 rows x 9 columns]" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[['id_x', 'customer_id', 'event_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time']]\n", - "merge_3" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "db52559b-6562-439b-b16e-f5d8dc9bc891", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcreated_atupdated_atseason_idfacility_idnameevent_type_idmanual_addedis_displayevent_type_key_id...identifierid_xcustomer_idevent_idpurchase_datetype_ofis_from_subscriptionamountis_full_pricestart_date_time
0203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055...26d1e9a4acad18b9cf79244334c86c931253614432123203672023-09-07 18:02:58+02:003False2.0False2023-11-29 14:30:00+01:00
1203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055...26d1e9a4acad18b9cf79244334c86c931253614432123203672023-09-07 18:02:58+02:003False2.0False2023-11-29 14:30:00+01:00
2203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055...26d1e9a4acad18b9cf79244334c86c931252930431824203672023-09-06 16:06:40+02:001False5.0False2023-11-29 14:30:00+01:00
3203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055...26d1e9a4acad18b9cf79244334c86c931252931431824203672023-09-06 16:06:42+02:001False5.0False2023-11-29 14:30:00+01:00
4203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055...26d1e9a4acad18b9cf79244334c86c931252932431824203672023-09-06 16:06:44+02:001False5.0False2023-11-29 14:30:00+01:00
..................................................................
318964154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue1055...4f015946bcbd856aa573cadb7ac42b9f1206691358863154392023-03-28 17:53:40+02:003False4.0False2023-03-29 20:00:00+02:00
318965154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue1055...4f015946bcbd856aa573cadb7ac42b9f1218071413015154392023-03-29 17:01:01+02:001False4.0False2023-03-29 20:00:00+02:00
318966154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue1055...4f015946bcbd856aa573cadb7ac42b9f1218125344045154392023-03-29 18:20:05+02:001False4.0False2023-03-29 20:00:00+02:00
318967154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue1055...4f015946bcbd856aa573cadb7ac42b9f1218185381006154392023-03-29 19:50:18+02:001False4.0False2023-03-29 20:00:00+02:00
318968154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue1055...4f015946bcbd856aa573cadb7ac42b9f12390744512154392023-01-31 16:14:27+01:001False4.0False2023-03-29 20:00:00+02:00
\n", - "

318969 rows × 21 columns

\n", - "
" - ], - "text/plain": [ - " id created_at \\\n", - "0 20367 2023-09-13 03:42:45.214293+02:00 \n", - "1 20367 2023-09-13 03:42:45.214293+02:00 \n", - "2 20367 2023-09-13 03:42:45.214293+02:00 \n", - "3 20367 2023-09-13 03:42:45.214293+02:00 \n", - "4 20367 2023-09-13 03:42:45.214293+02:00 \n", - "... ... ... \n", - "318964 15439 2023-09-12 17:42:25.252747+02:00 \n", - "318965 15439 2023-09-12 17:42:25.252747+02:00 \n", - "318966 15439 2023-09-12 17:42:25.252747+02:00 \n", - "318967 15439 2023-09-12 17:42:25.252747+02:00 \n", - "318968 15439 2023-09-12 17:42:25.252747+02:00 \n", - "\n", - " updated_at season_id facility_id name \\\n", - "0 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n", - "1 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n", - "2 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n", - "3 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n", - "4 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n", - "... ... ... ... ... \n", - "318964 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n", - "318965 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n", - "318966 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n", - "318967 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n", - "318968 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n", - "\n", - " event_type_id manual_added is_display event_type_key_id ... \\\n", - "0 1055 False True 1055 ... \n", - "1 1055 False True 1055 ... \n", - "2 1055 False True 1055 ... \n", - "3 1055 False True 1055 ... \n", - "4 1055 False True 1055 ... \n", - "... ... ... ... ... ... \n", - "318964 1055 False True 1055 ... \n", - "318965 1055 False True 1055 ... \n", - "318966 1055 False True 1055 ... \n", - "318967 1055 False True 1055 ... \n", - "318968 1055 False True 1055 ... \n", - "\n", - " identifier id_x customer_id event_id \\\n", - "0 26d1e9a4acad18b9cf79244334c86c93 1253614 432123 20367 \n", - "1 26d1e9a4acad18b9cf79244334c86c93 1253614 432123 20367 \n", - "2 26d1e9a4acad18b9cf79244334c86c93 1252930 431824 20367 \n", - "3 26d1e9a4acad18b9cf79244334c86c93 1252931 431824 20367 \n", - "4 26d1e9a4acad18b9cf79244334c86c93 1252932 431824 20367 \n", - "... ... ... ... ... \n", - "318964 4f015946bcbd856aa573cadb7ac42b9f 1206691 358863 15439 \n", - "318965 4f015946bcbd856aa573cadb7ac42b9f 1218071 413015 15439 \n", - "318966 4f015946bcbd856aa573cadb7ac42b9f 1218125 344045 15439 \n", - "318967 4f015946bcbd856aa573cadb7ac42b9f 1218185 381006 15439 \n", - "318968 4f015946bcbd856aa573cadb7ac42b9f 1239074 4512 15439 \n", - "\n", - " purchase_date type_of is_from_subscription amount \\\n", - "0 2023-09-07 18:02:58+02:00 3 False 2.0 \n", - "1 2023-09-07 18:02:58+02:00 3 False 2.0 \n", - "2 2023-09-06 16:06:40+02:00 1 False 5.0 \n", - "3 2023-09-06 16:06:42+02:00 1 False 5.0 \n", - "4 2023-09-06 16:06:44+02:00 1 False 5.0 \n", - "... ... ... ... ... \n", - "318964 2023-03-28 17:53:40+02:00 3 False 4.0 \n", - "318965 2023-03-29 17:01:01+02:00 1 False 4.0 \n", - "318966 2023-03-29 18:20:05+02:00 1 False 4.0 \n", - "318967 2023-03-29 19:50:18+02:00 1 False 4.0 \n", - "318968 2023-01-31 16:14:27+01:00 1 False 4.0 \n", - "\n", - " is_full_price start_date_time \n", - "0 False 2023-11-29 14:30:00+01:00 \n", - "1 False 2023-11-29 14:30:00+01:00 \n", - "2 False 2023-11-29 14:30:00+01:00 \n", - "3 False 2023-11-29 14:30:00+01:00 \n", - "4 False 2023-11-29 14:30:00+01:00 \n", - "... ... ... \n", - "318964 False 2023-03-29 20:00:00+02:00 \n", - "318965 False 2023-03-29 20:00:00+02:00 \n", - "318966 False 2023-03-29 20:00:00+02:00 \n", - "318967 False 2023-03-29 20:00:00+02:00 \n", - "318968 False 2023-03-29 20:00:00+02:00 \n", - "\n", - "[318969 rows x 21 columns]" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "d8ab2477-c199-4815-88d9-c5683e466772", - "metadata": {}, - "outputs": [], - "source": [ - "merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[['id_x', 'customer_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'name']]\n", - "merge_4 = merge_4.rename(columns={'name': 'event_name'})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c0917b77-6a73-4ae3-a58a-0bb7964f1406", - "metadata": {}, - "outputs": [], - "source": [ - "merge_5 = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = " - ] } ], "metadata": { diff --git a/Notebook_AJ.ipynb b/Notebook_AJ.ipynb index c59dff1..19272b5 100644 --- a/Notebook_AJ.ipynb +++ b/Notebook_AJ.ipynb @@ -69,59 +69,6 @@ "fs.ls(BUCKET)" ] }, - { - "cell_type": "code", - "execution_count": 3, - "id": "d60f6b27-00b4-4655-9325-79169d1e68df", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bdc2324-data/1\n", - "['bdc2324-data/1/1campaign_stats.csv', 'bdc2324-data/1/1campaigns.csv', 'bdc2324-data/1/1categories.csv', 'bdc2324-data/1/1countries.csv', 'bdc2324-data/1/1currencies.csv', 'bdc2324-data/1/1customer_target_mappings.csv', 'bdc2324-data/1/1customersplus.csv', 'bdc2324-data/1/1event_types.csv', 'bdc2324-data/1/1events.csv', 'bdc2324-data/1/1facilities.csv', 'bdc2324-data/1/1link_stats.csv', 'bdc2324-data/1/1pricing_formulas.csv', 'bdc2324-data/1/1product_packs.csv', 'bdc2324-data/1/1products.csv', 'bdc2324-data/1/1products_groups.csv', 'bdc2324-data/1/1purchases.csv', 'bdc2324-data/1/1representation_category_capacities.csv', 'bdc2324-data/1/1representations.csv', 'bdc2324-data/1/1seasons.csv', 'bdc2324-data/1/1structure_tag_mappings.csv', 'bdc2324-data/1/1suppliers.csv', 'bdc2324-data/1/1tags.csv', 'bdc2324-data/1/1target_types.csv', 'bdc2324-data/1/1targets.csv', 'bdc2324-data/1/1tickets.csv', 'bdc2324-data/1/1type_of_categories.csv', 'bdc2324-data/1/1type_of_pricing_formulas.csv', 'bdc2324-data/1/1type_ofs.csv']\n", - "bdc2324-data/2\n", - "['bdc2324-data/2/2campaign_stats.csv', 'bdc2324-data/2/2campaigns.csv', 'bdc2324-data/2/2categories.csv', 'bdc2324-data/2/2contribution_sites.csv', 'bdc2324-data/2/2contributions.csv', 'bdc2324-data/2/2countries.csv', 'bdc2324-data/2/2currencies.csv', 'bdc2324-data/2/2customer_target_mappings.csv', 'bdc2324-data/2/2customersplus.csv', 'bdc2324-data/2/2event_types.csv', 'bdc2324-data/2/2events.csv', 'bdc2324-data/2/2facilities.csv', 'bdc2324-data/2/2link_stats.csv', 'bdc2324-data/2/2pricing_formulas.csv', 'bdc2324-data/2/2product_packs.csv', 'bdc2324-data/2/2products.csv', 'bdc2324-data/2/2products_groups.csv', 'bdc2324-data/2/2purchases.csv', 'bdc2324-data/2/2representation_category_capacities.csv', 'bdc2324-data/2/2representations.csv', 'bdc2324-data/2/2seasons.csv', 'bdc2324-data/2/2structure_tag_mappings.csv', 'bdc2324-data/2/2suppliers.csv', 'bdc2324-data/2/2tags.csv', 'bdc2324-data/2/2target_types.csv', 'bdc2324-data/2/2targets.csv', 'bdc2324-data/2/2tickets.csv']\n", - "bdc2324-data/3\n", - "['bdc2324-data/3/3campaign_stats.csv', 'bdc2324-data/3/3campaigns.csv', 'bdc2324-data/3/3categories.csv', 'bdc2324-data/3/3consumptions.csv', 'bdc2324-data/3/3contribution_sites.csv', 'bdc2324-data/3/3contributions.csv', 'bdc2324-data/3/3countries.csv', 'bdc2324-data/3/3currencies.csv', 'bdc2324-data/3/3customer_target_mappings.csv', 'bdc2324-data/3/3customersplus.csv', 'bdc2324-data/3/3event_types.csv', 'bdc2324-data/3/3events.csv', 'bdc2324-data/3/3facilities.csv', 'bdc2324-data/3/3link_stats.csv', 'bdc2324-data/3/3pricing_formulas.csv', 'bdc2324-data/3/3product_packs.csv', 'bdc2324-data/3/3products.csv', 'bdc2324-data/3/3products_groups.csv', 'bdc2324-data/3/3purchases.csv', 'bdc2324-data/3/3representation_category_capacities.csv', 'bdc2324-data/3/3representations.csv', 'bdc2324-data/3/3seasons.csv', 'bdc2324-data/3/3structure_tag_mappings.csv', 'bdc2324-data/3/3suppliers.csv', 'bdc2324-data/3/3tags.csv', 'bdc2324-data/3/3target_types.csv', 'bdc2324-data/3/3targets.csv', 'bdc2324-data/3/3tickets.csv']\n", - "bdc2324-data/4\n", - "['bdc2324-data/4/4campaign_stats.csv', 'bdc2324-data/4/4campaigns.csv', 'bdc2324-data/4/4categories.csv', 'bdc2324-data/4/4contribution_sites.csv', 'bdc2324-data/4/4contributions.csv', 'bdc2324-data/4/4countries.csv', 'bdc2324-data/4/4currencies.csv', 'bdc2324-data/4/4customer_target_mappings.csv', 'bdc2324-data/4/4customersplus.csv', 'bdc2324-data/4/4event_types.csv', 'bdc2324-data/4/4events.csv', 'bdc2324-data/4/4facilities.csv', 'bdc2324-data/4/4link_stats.csv', 'bdc2324-data/4/4pricing_formulas.csv', 'bdc2324-data/4/4product_packs.csv', 'bdc2324-data/4/4products.csv', 'bdc2324-data/4/4products_groups.csv', 'bdc2324-data/4/4purchases.csv', 'bdc2324-data/4/4representation_category_capacities.csv', 'bdc2324-data/4/4representations.csv', 'bdc2324-data/4/4seasons.csv', 'bdc2324-data/4/4structure_tag_mappings.csv', 'bdc2324-data/4/4suppliers.csv', 'bdc2324-data/4/4tags.csv', 'bdc2324-data/4/4target_types.csv', 'bdc2324-data/4/4targets.csv', 'bdc2324-data/4/4tickets.csv', 'bdc2324-data/4/4type_of_pricing_formulas.csv', 'bdc2324-data/4/4type_ofs.csv']\n", - "bdc2324-data/5\n", - "['bdc2324-data/5/5campaign_stats.csv', 'bdc2324-data/5/5campaigns.csv', 'bdc2324-data/5/5categories.csv', 'bdc2324-data/5/5consumptions.csv', 'bdc2324-data/5/5countries.csv', 'bdc2324-data/5/5currencies.csv', 'bdc2324-data/5/5customer_target_mappings.csv', 'bdc2324-data/5/5customersplus.csv', 'bdc2324-data/5/5event_types.csv', 'bdc2324-data/5/5events.csv', 'bdc2324-data/5/5facilities.csv', 'bdc2324-data/5/5link_stats.csv', 'bdc2324-data/5/5pricing_formulas.csv', 'bdc2324-data/5/5product_packs.csv', 'bdc2324-data/5/5products.csv', 'bdc2324-data/5/5products_groups.csv', 'bdc2324-data/5/5purchases.csv', 'bdc2324-data/5/5representation_category_capacities.csv', 'bdc2324-data/5/5representations.csv', 'bdc2324-data/5/5seasons.csv', 'bdc2324-data/5/5suppliers.csv', 'bdc2324-data/5/5target_types.csv', 'bdc2324-data/5/5targets.csv', 'bdc2324-data/5/5tickets.csv']\n", - "bdc2324-data/6\n", - "['bdc2324-data/6/6campaign_stats.csv', 'bdc2324-data/6/6campaigns.csv', 'bdc2324-data/6/6categories.csv', 'bdc2324-data/6/6consumptions.csv', 'bdc2324-data/6/6countries.csv', 'bdc2324-data/6/6currencies.csv', 'bdc2324-data/6/6customer_target_mappings.csv', 'bdc2324-data/6/6customersplus.csv', 'bdc2324-data/6/6event_types.csv', 'bdc2324-data/6/6events.csv', 'bdc2324-data/6/6facilities.csv', 'bdc2324-data/6/6link_stats.csv', 'bdc2324-data/6/6pricing_formulas.csv', 'bdc2324-data/6/6product_packs.csv', 'bdc2324-data/6/6products.csv', 'bdc2324-data/6/6products_groups.csv', 'bdc2324-data/6/6purchases.csv', 'bdc2324-data/6/6representation_category_capacities.csv', 'bdc2324-data/6/6representations.csv', 'bdc2324-data/6/6seasons.csv', 'bdc2324-data/6/6structure_tag_mappings.csv', 'bdc2324-data/6/6suppliers.csv', 'bdc2324-data/6/6tags.csv', 'bdc2324-data/6/6target_types.csv', 'bdc2324-data/6/6targets.csv', 'bdc2324-data/6/6tickets.csv', 'bdc2324-data/6/6type_of_pricing_formulas.csv', 'bdc2324-data/6/6type_ofs.csv']\n", - "bdc2324-data/7\n", - "['bdc2324-data/7/7campaign_stats.csv', 'bdc2324-data/7/7campaigns.csv', 'bdc2324-data/7/7categories.csv', 'bdc2324-data/7/7consumptions.csv', 'bdc2324-data/7/7countries.csv', 'bdc2324-data/7/7currencies.csv', 'bdc2324-data/7/7customer_target_mappings.csv', 'bdc2324-data/7/7customersplus.csv', 'bdc2324-data/7/7event_types.csv', 'bdc2324-data/7/7events.csv', 'bdc2324-data/7/7facilities.csv', 'bdc2324-data/7/7link_stats.csv', 'bdc2324-data/7/7pricing_formulas.csv', 'bdc2324-data/7/7product_packs.csv', 'bdc2324-data/7/7products.csv', 'bdc2324-data/7/7products_groups.csv', 'bdc2324-data/7/7purchases.csv', 'bdc2324-data/7/7representation_category_capacities.csv', 'bdc2324-data/7/7representation_types.csv', 'bdc2324-data/7/7representations.csv', 'bdc2324-data/7/7seasons.csv', 'bdc2324-data/7/7structure_tag_mappings.csv', 'bdc2324-data/7/7suppliers.csv', 'bdc2324-data/7/7tags.csv', 'bdc2324-data/7/7target_types.csv', 'bdc2324-data/7/7targets.csv', 'bdc2324-data/7/7tickets.csv', 'bdc2324-data/7/7type_of_categories.csv', 'bdc2324-data/7/7type_of_pricing_formulas.csv', 'bdc2324-data/7/7type_ofs.csv']\n", - "bdc2324-data/8\n", - "['bdc2324-data/8/8campaign_stats.csv', 'bdc2324-data/8/8campaigns.csv', 'bdc2324-data/8/8categories.csv', 'bdc2324-data/8/8countries.csv', 'bdc2324-data/8/8currencies.csv', 'bdc2324-data/8/8customer_target_mappings.csv', 'bdc2324-data/8/8customersplus.csv', 'bdc2324-data/8/8event_types.csv', 'bdc2324-data/8/8events.csv', 'bdc2324-data/8/8facilities.csv', 'bdc2324-data/8/8link_stats.csv', 'bdc2324-data/8/8pricing_formulas.csv', 'bdc2324-data/8/8product_packs.csv', 'bdc2324-data/8/8products.csv', 'bdc2324-data/8/8products_groups.csv', 'bdc2324-data/8/8purchases.csv', 'bdc2324-data/8/8representation_category_capacities.csv', 'bdc2324-data/8/8representations.csv', 'bdc2324-data/8/8seasons.csv', 'bdc2324-data/8/8suppliers.csv', 'bdc2324-data/8/8target_types.csv', 'bdc2324-data/8/8targets.csv', 'bdc2324-data/8/8tickets.csv', 'bdc2324-data/8/8type_of_categories.csv', 'bdc2324-data/8/8type_of_pricing_formulas.csv', 'bdc2324-data/8/8type_ofs.csv']\n", - "bdc2324-data/9\n", - "['bdc2324-data/9/9campaign_stats.csv', 'bdc2324-data/9/9campaigns.csv', 'bdc2324-data/9/9categories.csv', 'bdc2324-data/9/9countries.csv', 'bdc2324-data/9/9currencies.csv', 'bdc2324-data/9/9customer_target_mappings.csv', 'bdc2324-data/9/9customersplus.csv', 'bdc2324-data/9/9event_types.csv', 'bdc2324-data/9/9events.csv', 'bdc2324-data/9/9facilities.csv', 'bdc2324-data/9/9link_stats.csv', 'bdc2324-data/9/9pricing_formulas.csv', 'bdc2324-data/9/9product_packs.csv', 'bdc2324-data/9/9products.csv', 'bdc2324-data/9/9products_groups.csv', 'bdc2324-data/9/9purchases.csv', 'bdc2324-data/9/9representation_category_capacities.csv', 'bdc2324-data/9/9representations.csv', 'bdc2324-data/9/9seasons.csv', 'bdc2324-data/9/9suppliers.csv', 'bdc2324-data/9/9target_types.csv', 'bdc2324-data/9/9targets.csv', 'bdc2324-data/9/9tickets.csv']\n", - "bdc2324-data/10\n", - "['bdc2324-data/10/10campaign_stats.csv', 'bdc2324-data/10/10campaigns.csv', 'bdc2324-data/10/10categories.csv', 'bdc2324-data/10/10countries.csv', 'bdc2324-data/10/10currencies.csv', 'bdc2324-data/10/10customer_target_mappings.csv', 'bdc2324-data/10/10customersplus.csv', 'bdc2324-data/10/10event_types.csv', 'bdc2324-data/10/10events.csv', 'bdc2324-data/10/10facilities.csv', 'bdc2324-data/10/10link_stats.csv', 'bdc2324-data/10/10pricing_formulas.csv', 'bdc2324-data/10/10product_packs.csv', 'bdc2324-data/10/10products.csv', 'bdc2324-data/10/10products_groups.csv', 'bdc2324-data/10/10purchases.csv', 'bdc2324-data/10/10representation_category_capacities.csv', 'bdc2324-data/10/10representation_types.csv', 'bdc2324-data/10/10representations.csv', 'bdc2324-data/10/10seasons.csv', 'bdc2324-data/10/10suppliers.csv', 'bdc2324-data/10/10tags.csv', 'bdc2324-data/10/10target_types.csv', 'bdc2324-data/10/10targets.csv', 'bdc2324-data/10/10tickets.csv', 'bdc2324-data/10/10type_of_pricing_formulas.csv', 'bdc2324-data/10/10type_ofs.csv']\n", - "bdc2324-data/11\n", - "['bdc2324-data/11/11campaign_stats.csv', 'bdc2324-data/11/11campaigns.csv', 'bdc2324-data/11/11categories.csv', 'bdc2324-data/11/11countries.csv', 'bdc2324-data/11/11currencies.csv', 'bdc2324-data/11/11customer_target_mappings.csv', 'bdc2324-data/11/11customersplus.csv', 'bdc2324-data/11/11event_types.csv', 'bdc2324-data/11/11events.csv', 'bdc2324-data/11/11facilities.csv', 'bdc2324-data/11/11link_stats.csv', 'bdc2324-data/11/11pricing_formulas.csv', 'bdc2324-data/11/11product_packs.csv', 'bdc2324-data/11/11products.csv', 'bdc2324-data/11/11products_groups.csv', 'bdc2324-data/11/11purchases.csv', 'bdc2324-data/11/11representation_category_capacities.csv', 'bdc2324-data/11/11representations.csv', 'bdc2324-data/11/11seasons.csv', 'bdc2324-data/11/11structure_tag_mappings.csv', 'bdc2324-data/11/11suppliers.csv', 'bdc2324-data/11/11tags.csv', 'bdc2324-data/11/11target_types.csv', 'bdc2324-data/11/11targets.csv', 'bdc2324-data/11/11tickets.csv']\n", - "bdc2324-data/12\n", - "['bdc2324-data/12/12campaign_stats.csv', 'bdc2324-data/12/12campaigns.csv', 'bdc2324-data/12/12categories.csv', 'bdc2324-data/12/12consumptions.csv', 'bdc2324-data/12/12countries.csv', 'bdc2324-data/12/12currencies.csv', 'bdc2324-data/12/12customer_target_mappings.csv', 'bdc2324-data/12/12customersplus.csv', 'bdc2324-data/12/12event_types.csv', 'bdc2324-data/12/12events.csv', 'bdc2324-data/12/12facilities.csv', 'bdc2324-data/12/12link_stats.csv', 'bdc2324-data/12/12pricing_formulas.csv', 'bdc2324-data/12/12product_packs.csv', 'bdc2324-data/12/12products.csv', 'bdc2324-data/12/12products_groups.csv', 'bdc2324-data/12/12purchases.csv', 'bdc2324-data/12/12representation_category_capacities.csv', 'bdc2324-data/12/12representations.csv', 'bdc2324-data/12/12seasons.csv', 'bdc2324-data/12/12suppliers.csv', 'bdc2324-data/12/12target_types.csv', 'bdc2324-data/12/12targets.csv', 'bdc2324-data/12/12tickets.csv', 'bdc2324-data/12/12type_ofs.csv']\n", - "bdc2324-data/13\n", - "['bdc2324-data/13/13campaign_stats.csv', 'bdc2324-data/13/13campaigns.csv', 'bdc2324-data/13/13categories.csv', 'bdc2324-data/13/13countries.csv', 'bdc2324-data/13/13currencies.csv', 'bdc2324-data/13/13customer_target_mappings.csv', 'bdc2324-data/13/13customersplus.csv', 'bdc2324-data/13/13event_types.csv', 'bdc2324-data/13/13events.csv', 'bdc2324-data/13/13facilities.csv', 'bdc2324-data/13/13link_stats.csv', 'bdc2324-data/13/13pricing_formulas.csv', 'bdc2324-data/13/13product_packs.csv', 'bdc2324-data/13/13products.csv', 'bdc2324-data/13/13products_groups.csv', 'bdc2324-data/13/13purchases.csv', 'bdc2324-data/13/13representation_category_capacities.csv', 'bdc2324-data/13/13representation_types.csv', 'bdc2324-data/13/13representations.csv', 'bdc2324-data/13/13seasons.csv', 'bdc2324-data/13/13structure_tag_mappings.csv', 'bdc2324-data/13/13suppliers.csv', 'bdc2324-data/13/13tags.csv', 'bdc2324-data/13/13target_types.csv', 'bdc2324-data/13/13targets.csv', 'bdc2324-data/13/13tickets.csv']\n", - "bdc2324-data/14\n", - "['bdc2324-data/14/14campaign_stats.csv', 'bdc2324-data/14/14campaigns.csv', 'bdc2324-data/14/14categories.csv', 'bdc2324-data/14/14countries.csv', 'bdc2324-data/14/14currencies.csv', 'bdc2324-data/14/14customer_target_mappings.csv', 'bdc2324-data/14/14customersplus.csv', 'bdc2324-data/14/14event_types.csv', 'bdc2324-data/14/14events.csv', 'bdc2324-data/14/14facilities.csv', 'bdc2324-data/14/14link_stats.csv', 'bdc2324-data/14/14pricing_formulas.csv', 'bdc2324-data/14/14product_packs.csv', 'bdc2324-data/14/14products.csv', 'bdc2324-data/14/14products_groups.csv', 'bdc2324-data/14/14purchases.csv', 'bdc2324-data/14/14representation_category_capacities.csv', 'bdc2324-data/14/14representation_types.csv', 'bdc2324-data/14/14representations.csv', 'bdc2324-data/14/14seasons.csv', 'bdc2324-data/14/14suppliers.csv', 'bdc2324-data/14/14target_types.csv', 'bdc2324-data/14/14targets.csv', 'bdc2324-data/14/14tickets.csv', 'bdc2324-data/14/14type_of_categories.csv', 'bdc2324-data/14/14type_of_pricing_formulas.csv', 'bdc2324-data/14/14type_ofs.csv']\n", - "bdc2324-data/101\n", - "['bdc2324-data/101/101campaign_stats.csv', 'bdc2324-data/101/101campaigns.csv', 'bdc2324-data/101/101categories.csv', 'bdc2324-data/101/101contribution_sites.csv', 'bdc2324-data/101/101contributions.csv', 'bdc2324-data/101/101countries.csv', 'bdc2324-data/101/101currencies.csv', 'bdc2324-data/101/101customer_target_mappings.csv', 'bdc2324-data/101/101customersplus.csv', 'bdc2324-data/101/101event_types.csv', 'bdc2324-data/101/101events.csv', 'bdc2324-data/101/101facilities.csv', 'bdc2324-data/101/101link_stats.csv', 'bdc2324-data/101/101pricing_formulas.csv', 'bdc2324-data/101/101product_packs.csv', 'bdc2324-data/101/101products.csv', 'bdc2324-data/101/101products_groups.csv', 'bdc2324-data/101/101purchases.csv', 'bdc2324-data/101/101representation_category_capacities.csv', 'bdc2324-data/101/101representations.csv', 'bdc2324-data/101/101seasons.csv', 'bdc2324-data/101/101structure_tag_mappings.csv', 'bdc2324-data/101/101suppliers.csv', 'bdc2324-data/101/101tags.csv', 'bdc2324-data/101/101target_types.csv', 'bdc2324-data/101/101targets.csv', 'bdc2324-data/101/101tickets.csv', 'bdc2324-data/101/101tickets_1.csv', 'bdc2324-data/101/101type_of_pricing_formulas.csv', 'bdc2324-data/101/101type_ofs.csv']\n" - ] - } - ], - "source": [ - "# Liste des jeu de données par dossier\n", - "for i in range(1, 15):\n", - " FILE_PATH_S3 = BUCKET + \"/\" + str(i)\n", - " print(FILE_PATH_S3)\n", - " print(fs.ls(FILE_PATH_S3))\n", - "print(BUCKET + \"/101\")\n", - "print(fs.ls(BUCKET + \"/101\"))" - ] - }, { "cell_type": "code", "execution_count": 4, @@ -416,6 +363,440 @@ "source": [ "pd.DataFrame(customers_plus_1.isna().mean()*100)" ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6f6ce60d-0912-497d-9108-330acccef394", + "metadata": {}, + "outputs": [], + "source": [ + "# Chargement de toutes les données\n", + "liste_base = ['customer_target_mappings', 'customersplus', 'target_types', 'tags', 'events', 'tickets', 'representations', 'purchases', 'products']\n", + "\n", + "for nom_base in liste_base:\n", + " FILE_PATH_S3 = 'bdc2324-data/11/11' + nom_base + '.csv'\n", + " with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "fa8ee17d-5092-40ac-8a0a-3790b016dd4e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlastnamefirstnamebirthdateemailstreet_idcreated_atupdated_atcivilityis_partner...tenant_idid_xcustomer_idpurchase_datetype_ofis_from_subscriptionamountis_full_pricestart_date_timeevent_name
0405082lastname405082NaNNaNNaN62023-01-12 06:30:31.197484+01:002023-01-12 06:30:31.197484+01:00NaNFalse...15569924234050822023-01-11 17:08:41+01:003False13.0False2023-02-06 20:00:00+01:00zaide
1405082lastname405082NaNNaNNaN62023-01-12 06:30:31.197484+01:002023-01-12 06:30:31.197484+01:00NaNFalse...15569924234050822023-01-11 17:08:41+01:003False13.0False2023-02-06 20:00:00+01:00zaide
2411168lastname411168NaNNaNNaN62023-03-17 06:30:35.431967+01:002023-03-17 06:30:35.431967+01:00NaNFalse...155610539344111682023-03-16 16:23:10+01:003False62.0False2023-03-19 16:00:00+01:00luisa miller
3411168lastname411168NaNNaNNaN62023-03-17 06:30:35.431967+01:002023-03-17 06:30:35.431967+01:00NaNFalse...155610539344111682023-03-16 16:23:10+01:003False62.0False2023-03-19 16:00:00+01:00luisa miller
44380lastname4380firstname4380NaNNaN12021-04-22 14:51:55.432952+02:002022-04-14 11:41:33.738500+02:00NaNFalse...1556118914143802020-11-26 13:12:53+01:003False51.3False2020-12-01 20:00:00+01:00iphigenie en tauride
..................................................................
31896419095lastname19095firstname190951979-07-16email1909562021-04-22 15:06:30.120537+02:002023-09-12 18:27:36.904104+02:00NaNFalse...15561090839190952019-05-19 21:18:36+02:001False4.5False2019-05-27 20:00:00+02:00entre femmes
31896519095lastname19095firstname190951979-07-16email1909562021-04-22 15:06:30.120537+02:002023-09-12 18:27:36.904104+02:00NaNFalse...15561090839190952019-05-19 21:18:36+02:001False4.5False2019-05-27 20:00:00+02:00entre femmes
31896619095lastname19095firstname190951979-07-16email1909562021-04-22 15:06:30.120537+02:002023-09-12 18:27:36.904104+02:00NaNFalse...15561090839190952019-05-19 21:18:36+02:001False4.5False2019-05-27 20:00:00+02:00entre femmes
31896719095lastname19095firstname190951979-07-16email1909562021-04-22 15:06:30.120537+02:002023-09-12 18:27:36.904104+02:00NaNFalse...15561244277190952019-12-31 11:04:07+01:001False5.5False2020-02-03 20:00:00+01:00a boire et a manger
31896819095lastname19095firstname190951979-07-16email1909562021-04-22 15:06:30.120537+02:002023-09-12 18:27:36.904104+02:00NaNFalse...15561244277190952019-12-31 11:04:07+01:001False5.5False2020-02-03 20:00:00+01:00a boire et a manger
\n", + "

318969 rows × 52 columns

\n", + "
" + ], + "text/plain": [ + " id lastname firstname birthdate email \\\n", + "0 405082 lastname405082 NaN NaN NaN \n", + "1 405082 lastname405082 NaN NaN NaN \n", + "2 411168 lastname411168 NaN NaN NaN \n", + "3 411168 lastname411168 NaN NaN NaN \n", + "4 4380 lastname4380 firstname4380 NaN NaN \n", + "... ... ... ... ... ... \n", + "318964 19095 lastname19095 firstname19095 1979-07-16 email19095 \n", + "318965 19095 lastname19095 firstname19095 1979-07-16 email19095 \n", + "318966 19095 lastname19095 firstname19095 1979-07-16 email19095 \n", + "318967 19095 lastname19095 firstname19095 1979-07-16 email19095 \n", + "318968 19095 lastname19095 firstname19095 1979-07-16 email19095 \n", + "\n", + " street_id created_at \\\n", + "0 6 2023-01-12 06:30:31.197484+01:00 \n", + "1 6 2023-01-12 06:30:31.197484+01:00 \n", + "2 6 2023-03-17 06:30:35.431967+01:00 \n", + "3 6 2023-03-17 06:30:35.431967+01:00 \n", + "4 1 2021-04-22 14:51:55.432952+02:00 \n", + "... ... ... \n", + "318964 6 2021-04-22 15:06:30.120537+02:00 \n", + "318965 6 2021-04-22 15:06:30.120537+02:00 \n", + "318966 6 2021-04-22 15:06:30.120537+02:00 \n", + "318967 6 2021-04-22 15:06:30.120537+02:00 \n", + "318968 6 2021-04-22 15:06:30.120537+02:00 \n", + "\n", + " updated_at civility is_partner ... \\\n", + "0 2023-01-12 06:30:31.197484+01:00 NaN False ... \n", + "1 2023-01-12 06:30:31.197484+01:00 NaN False ... \n", + "2 2023-03-17 06:30:35.431967+01:00 NaN False ... \n", + "3 2023-03-17 06:30:35.431967+01:00 NaN False ... \n", + "4 2022-04-14 11:41:33.738500+02:00 NaN False ... \n", + "... ... ... ... ... \n", + "318964 2023-09-12 18:27:36.904104+02:00 NaN False ... \n", + "318965 2023-09-12 18:27:36.904104+02:00 NaN False ... \n", + "318966 2023-09-12 18:27:36.904104+02:00 NaN False ... \n", + "318967 2023-09-12 18:27:36.904104+02:00 NaN False ... \n", + "318968 2023-09-12 18:27:36.904104+02:00 NaN False ... \n", + "\n", + " tenant_id id_x customer_id purchase_date type_of \\\n", + "0 1556 992423 405082 2023-01-11 17:08:41+01:00 3 \n", + "1 1556 992423 405082 2023-01-11 17:08:41+01:00 3 \n", + "2 1556 1053934 411168 2023-03-16 16:23:10+01:00 3 \n", + "3 1556 1053934 411168 2023-03-16 16:23:10+01:00 3 \n", + "4 1556 1189141 4380 2020-11-26 13:12:53+01:00 3 \n", + "... ... ... ... ... ... \n", + "318964 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n", + "318965 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n", + "318966 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n", + "318967 1556 1244277 19095 2019-12-31 11:04:07+01:00 1 \n", + "318968 1556 1244277 19095 2019-12-31 11:04:07+01:00 1 \n", + "\n", + " is_from_subscription amount is_full_price start_date_time \\\n", + "0 False 13.0 False 2023-02-06 20:00:00+01:00 \n", + "1 False 13.0 False 2023-02-06 20:00:00+01:00 \n", + "2 False 62.0 False 2023-03-19 16:00:00+01:00 \n", + "3 False 62.0 False 2023-03-19 16:00:00+01:00 \n", + "4 False 51.3 False 2020-12-01 20:00:00+01:00 \n", + "... ... ... ... ... \n", + "318964 False 4.5 False 2019-05-27 20:00:00+02:00 \n", + "318965 False 4.5 False 2019-05-27 20:00:00+02:00 \n", + "318966 False 4.5 False 2019-05-27 20:00:00+02:00 \n", + "318967 False 5.5 False 2020-02-03 20:00:00+01:00 \n", + "318968 False 5.5 False 2020-02-03 20:00:00+01:00 \n", + "\n", + " event_name \n", + "0 zaide \n", + "1 zaide \n", + "2 luisa miller \n", + "3 luisa miller \n", + "4 iphigenie en tauride \n", + "... ... \n", + "318964 entre femmes \n", + "318965 entre femmes \n", + "318966 entre femmes \n", + "318967 a boire et a manger \n", + "318968 a boire et a manger \n", + "\n", + "[318969 rows x 52 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Jointure\n", + "merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']]\n", + "merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[['id_x', 'customer_id', 'representation_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price']]\n", + "merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[['id_x', 'customer_id', 'event_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time']]\n", + "merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[['id_x', 'customer_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'name']]\n", + "merge_4 = merge_4.rename(columns={'name': 'event_name'})\n", + "df_customer_event = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = 'inner')[['id_x', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'event_name']]\n", + "df_customer_event" + ] } ], "metadata": {