diff --git a/Clean-Notebook.ipynb b/Clean-Notebook.ipynb
index be9a507..23550e0 100644
--- a/Clean-Notebook.ipynb
+++ b/Clean-Notebook.ipynb
@@ -51,52 +51,291 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 10,
"id": "699664b9-eee4-4f8d-a207-e524526560c5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "['bdc2324-data/2/2campaign_stats.csv',\n",
- " 'bdc2324-data/2/2campaigns.csv',\n",
- " 'bdc2324-data/2/2categories.csv',\n",
- " 'bdc2324-data/2/2contribution_sites.csv',\n",
- " 'bdc2324-data/2/2contributions.csv',\n",
- " 'bdc2324-data/2/2countries.csv',\n",
- " 'bdc2324-data/2/2currencies.csv',\n",
- " 'bdc2324-data/2/2customer_target_mappings.csv',\n",
- " 'bdc2324-data/2/2customersplus.csv',\n",
- " 'bdc2324-data/2/2event_types.csv',\n",
- " 'bdc2324-data/2/2events.csv',\n",
- " 'bdc2324-data/2/2facilities.csv',\n",
- " 'bdc2324-data/2/2link_stats.csv',\n",
- " 'bdc2324-data/2/2pricing_formulas.csv',\n",
- " 'bdc2324-data/2/2product_packs.csv',\n",
- " 'bdc2324-data/2/2products.csv',\n",
- " 'bdc2324-data/2/2products_groups.csv',\n",
- " 'bdc2324-data/2/2purchases.csv',\n",
- " 'bdc2324-data/2/2representation_category_capacities.csv',\n",
- " 'bdc2324-data/2/2representations.csv',\n",
- " 'bdc2324-data/2/2seasons.csv',\n",
- " 'bdc2324-data/2/2structure_tag_mappings.csv',\n",
- " 'bdc2324-data/2/2suppliers.csv',\n",
- " 'bdc2324-data/2/2tags.csv',\n",
- " 'bdc2324-data/2/2target_types.csv',\n",
- " 'bdc2324-data/2/2targets.csv',\n",
- " 'bdc2324-data/2/2tickets.csv']"
+ "['bdc2324-data/11/11campaign_stats.csv',\n",
+ " 'bdc2324-data/11/11campaigns.csv',\n",
+ " 'bdc2324-data/11/11categories.csv',\n",
+ " 'bdc2324-data/11/11countries.csv',\n",
+ " 'bdc2324-data/11/11currencies.csv',\n",
+ " 'bdc2324-data/11/11customer_target_mappings.csv',\n",
+ " 'bdc2324-data/11/11customersplus.csv',\n",
+ " 'bdc2324-data/11/11event_types.csv',\n",
+ " 'bdc2324-data/11/11events.csv',\n",
+ " 'bdc2324-data/11/11facilities.csv',\n",
+ " 'bdc2324-data/11/11link_stats.csv',\n",
+ " 'bdc2324-data/11/11pricing_formulas.csv',\n",
+ " 'bdc2324-data/11/11product_packs.csv',\n",
+ " 'bdc2324-data/11/11products.csv',\n",
+ " 'bdc2324-data/11/11products_groups.csv',\n",
+ " 'bdc2324-data/11/11purchases.csv',\n",
+ " 'bdc2324-data/11/11representation_category_capacities.csv',\n",
+ " 'bdc2324-data/11/11representations.csv',\n",
+ " 'bdc2324-data/11/11seasons.csv',\n",
+ " 'bdc2324-data/11/11structure_tag_mappings.csv',\n",
+ " 'bdc2324-data/11/11suppliers.csv',\n",
+ " 'bdc2324-data/11/11tags.csv',\n",
+ " 'bdc2324-data/11/11target_types.csv',\n",
+ " 'bdc2324-data/11/11targets.csv',\n",
+ " 'bdc2324-data/11/11tickets.csv']"
]
},
- "execution_count": 3,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "BUCKET = \"bdc2324-data/2\"\n",
+ "BUCKET = \"bdc2324-data/11\"\n",
"fs.ls(BUCKET)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "6d6201cd-a00b-4984-bcd8-72838717ad13",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Chargement de toutes les données\n",
+ "liste_base = ['customer_target_mappings', 'customersplus', 'target_types', 'tags', 'events', 'tickets', 'representations', 'purchases', 'products']\n",
+ "\n",
+ "for nom_base in liste_base:\n",
+ " FILE_PATH_S3 = 'bdc2324-data/11/11' + nom_base + '.csv'\n",
+ " with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
+ " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "afe548fe-d93c-4634-9f53-881404ec4c6c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id_x | \n",
+ " purchase_date | \n",
+ " type_of | \n",
+ " is_from_subscription | \n",
+ " amount | \n",
+ " is_full_price | \n",
+ " start_date_time | \n",
+ " event_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 992423 | \n",
+ " 2023-01-11 17:08:41+01:00 | \n",
+ " 3 | \n",
+ " False | \n",
+ " 13.0 | \n",
+ " False | \n",
+ " 2023-02-06 20:00:00+01:00 | \n",
+ " zaide | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 992423 | \n",
+ " 2023-01-11 17:08:41+01:00 | \n",
+ " 3 | \n",
+ " False | \n",
+ " 13.0 | \n",
+ " False | \n",
+ " 2023-02-06 20:00:00+01:00 | \n",
+ " zaide | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1053934 | \n",
+ " 2023-03-16 16:23:10+01:00 | \n",
+ " 3 | \n",
+ " False | \n",
+ " 62.0 | \n",
+ " False | \n",
+ " 2023-03-19 16:00:00+01:00 | \n",
+ " luisa miller | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1053934 | \n",
+ " 2023-03-16 16:23:10+01:00 | \n",
+ " 3 | \n",
+ " False | \n",
+ " 62.0 | \n",
+ " False | \n",
+ " 2023-03-19 16:00:00+01:00 | \n",
+ " luisa miller | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1189141 | \n",
+ " 2020-11-26 13:12:53+01:00 | \n",
+ " 3 | \n",
+ " False | \n",
+ " 51.3 | \n",
+ " False | \n",
+ " 2020-12-01 20:00:00+01:00 | \n",
+ " iphigenie en tauride | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 318964 | \n",
+ " 1090839 | \n",
+ " 2019-05-19 21:18:36+02:00 | \n",
+ " 1 | \n",
+ " False | \n",
+ " 4.5 | \n",
+ " False | \n",
+ " 2019-05-27 20:00:00+02:00 | \n",
+ " entre femmes | \n",
+ "
\n",
+ " \n",
+ " | 318965 | \n",
+ " 1090839 | \n",
+ " 2019-05-19 21:18:36+02:00 | \n",
+ " 1 | \n",
+ " False | \n",
+ " 4.5 | \n",
+ " False | \n",
+ " 2019-05-27 20:00:00+02:00 | \n",
+ " entre femmes | \n",
+ "
\n",
+ " \n",
+ " | 318966 | \n",
+ " 1090839 | \n",
+ " 2019-05-19 21:18:36+02:00 | \n",
+ " 1 | \n",
+ " False | \n",
+ " 4.5 | \n",
+ " False | \n",
+ " 2019-05-27 20:00:00+02:00 | \n",
+ " entre femmes | \n",
+ "
\n",
+ " \n",
+ " | 318967 | \n",
+ " 1244277 | \n",
+ " 2019-12-31 11:04:07+01:00 | \n",
+ " 1 | \n",
+ " False | \n",
+ " 5.5 | \n",
+ " False | \n",
+ " 2020-02-03 20:00:00+01:00 | \n",
+ " a boire et a manger | \n",
+ "
\n",
+ " \n",
+ " | 318968 | \n",
+ " 1244277 | \n",
+ " 2019-12-31 11:04:07+01:00 | \n",
+ " 1 | \n",
+ " False | \n",
+ " 5.5 | \n",
+ " False | \n",
+ " 2020-02-03 20:00:00+01:00 | \n",
+ " a boire et a manger | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
318969 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id_x purchase_date type_of is_from_subscription \\\n",
+ "0 992423 2023-01-11 17:08:41+01:00 3 False \n",
+ "1 992423 2023-01-11 17:08:41+01:00 3 False \n",
+ "2 1053934 2023-03-16 16:23:10+01:00 3 False \n",
+ "3 1053934 2023-03-16 16:23:10+01:00 3 False \n",
+ "4 1189141 2020-11-26 13:12:53+01:00 3 False \n",
+ "... ... ... ... ... \n",
+ "318964 1090839 2019-05-19 21:18:36+02:00 1 False \n",
+ "318965 1090839 2019-05-19 21:18:36+02:00 1 False \n",
+ "318966 1090839 2019-05-19 21:18:36+02:00 1 False \n",
+ "318967 1244277 2019-12-31 11:04:07+01:00 1 False \n",
+ "318968 1244277 2019-12-31 11:04:07+01:00 1 False \n",
+ "\n",
+ " amount is_full_price start_date_time event_name \n",
+ "0 13.0 False 2023-02-06 20:00:00+01:00 zaide \n",
+ "1 13.0 False 2023-02-06 20:00:00+01:00 zaide \n",
+ "2 62.0 False 2023-03-19 16:00:00+01:00 luisa miller \n",
+ "3 62.0 False 2023-03-19 16:00:00+01:00 luisa miller \n",
+ "4 51.3 False 2020-12-01 20:00:00+01:00 iphigenie en tauride \n",
+ "... ... ... ... ... \n",
+ "318964 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n",
+ "318965 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n",
+ "318966 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n",
+ "318967 5.5 False 2020-02-03 20:00:00+01:00 a boire et a manger \n",
+ "318968 5.5 False 2020-02-03 20:00:00+01:00 a boire et a manger \n",
+ "\n",
+ "[318969 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Jointure\n",
+ "var_choosed = ['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']\n",
+ "merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[var_choosed]\n",
+ "\n",
+ "var_choosed.extend(['amount', 'is_full_price', 'representation_id'])\n",
+ "merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[var_choosed]\n",
+ "\n",
+ "var_choosed.remove('representation_id')\n",
+ "var_choosed.extend(['start_date_time', 'event_id'])\n",
+ "merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[var_choosed]\n",
+ "\n",
+ "var_choosed.remove('event_id')\n",
+ "var_choosed.extend(['name', 'customer_id'])\n",
+ "merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[var_choosed]\n",
+ "\n",
+ "# Changement de nom\n",
+ "merge_4 = merge_4.rename(columns={'name': 'event_name'})\n",
+ "var_choosed[var_choosed.index('name')] = \"event_name\"\n",
+ "\n",
+ "# Base finale\n",
+ "var_choosed.extend(['age', 'gender', 'country', 'fidelity', 'profession'])\n",
+ "df_customer_event = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = 'inner')[var_choosed]\n",
+ "df_customer_event"
+ ]
+ },
{
"cell_type": "markdown",
"id": "779da86b-ac61-4c61-88d2-fa1c0c19efce",
@@ -138,250 +377,11 @@
],
"source": [
"# Client\n",
- "FILE_PATH_S3 = 'bdc2324-data/11/11customer_target_mappings.csv'\n",
- "\n",
- "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
- " customer_target_mappings = pd.read_csv(file_in, sep=\",\")\n",
- "\n",
"print(customer_target_mappings.columns)\n",
"print(customer_target_mappings.shape)\n",
"customer_target_mappings.info()"
]
},
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "d22aa131-5069-43d4-a42e-24f38cc7240d",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'customer_id', 'target_id', 'created_at', 'updated_at', 'name',\n",
- " 'extra_field'],\n",
- " dtype='object')\n",
- "(124302, 7)\n",
- "\n",
- "RangeIndex: 124302 entries, 0 to 124301\n",
- "Data columns (total 7 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 124302 non-null int64 \n",
- " 1 customer_id 124302 non-null int64 \n",
- " 2 target_id 124302 non-null int64 \n",
- " 3 created_at 124296 non-null object \n",
- " 4 updated_at 124296 non-null object \n",
- " 5 name 0 non-null float64\n",
- " 6 extra_field 0 non-null float64\n",
- "dtypes: float64(2), int64(3), object(2)\n",
- "memory usage: 6.6+ MB\n"
- ]
- }
- ],
- "source": [
- "# Segmentation existante\n",
- "FILE_PATH_S3 = 'bdc2324-data/11/11customer_target_mappings.csv'\n",
- "\n",
- "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
- " customer_target_mappings = pd.read_csv(file_in, sep=\",\")\n",
- "\n",
- "print(customer_target_mappings.columns)\n",
- "print(customer_target_mappings.shape)\n",
- "customer_target_mappings.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "967b20e2-5a30-4724-989f-b9e39c7c67e7",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " customer_id | \n",
- " target_id | \n",
- " created_at | \n",
- " updated_at | \n",
- " name | \n",
- " extra_field | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 793889 | \n",
- " 344151 | \n",
- " 101 | \n",
- " 2022-09-29 17:55:41.083666+02:00 | \n",
- " 2022-09-29 17:55:41.083666+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 793890 | \n",
- " 344152 | \n",
- " 101 | \n",
- " 2022-09-29 19:16:07.252114+02:00 | \n",
- " 2022-09-29 19:16:07.252114+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 793891 | \n",
- " 344153 | \n",
- " 101 | \n",
- " 2022-09-29 19:55:10.443450+02:00 | \n",
- " 2022-09-29 19:55:10.443450+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 793892 | \n",
- " 344154 | \n",
- " 101 | \n",
- " 2022-09-29 20:16:08.269407+02:00 | \n",
- " 2022-09-29 20:16:08.269407+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 793893 | \n",
- " 344155 | \n",
- " 101 | \n",
- " 2022-09-29 21:03:40.541998+02:00 | \n",
- " 2022-09-29 21:03:40.541998+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 124297 | \n",
- " 742001 | \n",
- " 329855 | \n",
- " 101 | \n",
- " 2022-07-11 18:17:09.607162+02:00 | \n",
- " 2022-07-11 18:17:09.607162+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 124298 | \n",
- " 742002 | \n",
- " 329856 | \n",
- " 101 | \n",
- " 2022-07-11 18:44:45.636248+02:00 | \n",
- " 2022-07-11 18:44:45.636248+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 124299 | \n",
- " 742000 | \n",
- " 329854 | \n",
- " 101 | \n",
- " 2022-07-11 17:46:48.914507+02:00 | \n",
- " 2022-07-11 17:46:48.914507+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 124300 | \n",
- " 742003 | \n",
- " 329857 | \n",
- " 134 | \n",
- " 2022-07-11 18:44:55.915889+02:00 | \n",
- " 2022-07-11 18:44:55.915889+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 124301 | \n",
- " 741996 | \n",
- " 329850 | \n",
- " 101 | \n",
- " 2022-07-11 16:52:37.227487+02:00 | \n",
- " 2022-07-11 16:52:37.227487+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
124302 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " id customer_id target_id created_at \\\n",
- "0 793889 344151 101 2022-09-29 17:55:41.083666+02:00 \n",
- "1 793890 344152 101 2022-09-29 19:16:07.252114+02:00 \n",
- "2 793891 344153 101 2022-09-29 19:55:10.443450+02:00 \n",
- "3 793892 344154 101 2022-09-29 20:16:08.269407+02:00 \n",
- "4 793893 344155 101 2022-09-29 21:03:40.541998+02:00 \n",
- "... ... ... ... ... \n",
- "124297 742001 329855 101 2022-07-11 18:17:09.607162+02:00 \n",
- "124298 742002 329856 101 2022-07-11 18:44:45.636248+02:00 \n",
- "124299 742000 329854 101 2022-07-11 17:46:48.914507+02:00 \n",
- "124300 742003 329857 134 2022-07-11 18:44:55.915889+02:00 \n",
- "124301 741996 329850 101 2022-07-11 16:52:37.227487+02:00 \n",
- "\n",
- " updated_at name extra_field \n",
- "0 2022-09-29 17:55:41.083666+02:00 NaN NaN \n",
- "1 2022-09-29 19:16:07.252114+02:00 NaN NaN \n",
- "2 2022-09-29 19:55:10.443450+02:00 NaN NaN \n",
- "3 2022-09-29 20:16:08.269407+02:00 NaN NaN \n",
- "4 2022-09-29 21:03:40.541998+02:00 NaN NaN \n",
- "... ... ... ... \n",
- "124297 2022-07-11 18:17:09.607162+02:00 NaN NaN \n",
- "124298 2022-07-11 18:44:45.636248+02:00 NaN NaN \n",
- "124299 2022-07-11 17:46:48.914507+02:00 NaN NaN \n",
- "124300 2022-07-11 18:44:55.915889+02:00 NaN NaN \n",
- "124301 2022-07-11 16:52:37.227487+02:00 NaN NaN \n",
- "\n",
- "[124302 rows x 7 columns]"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "customer_target_mappings"
- ]
- },
{
"cell_type": "code",
"execution_count": 26,
@@ -454,11 +454,6 @@
],
"source": [
"# Segmentation existante\n",
- "FILE_PATH_S3 = 'bdc2324-data/11/11target_types.csv'\n",
- "\n",
- "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
- " target_types = pd.read_csv(file_in, sep=\",\")\n",
- "\n",
"print(target_types.columns)\n",
"print(target_types.shape)\n",
"target_types.info()"
@@ -3468,1838 +3463,6 @@
"source": [
"purchases"
]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "id": "281c48da-e1a0-4298-b2e6-81f9fc6461aa",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id_x | \n",
- " purchase_date | \n",
- " customer_id | \n",
- " created_at_x | \n",
- " updated_at_x | \n",
- " number_x | \n",
- " identifier_x | \n",
- " id_y | \n",
- " number_y | \n",
- " created_at_y | \n",
- " updated_at_y | \n",
- " purchase_id | \n",
- " product_id | \n",
- " is_from_subscription | \n",
- " type_of | \n",
- " supplier_id | \n",
- " barcode | \n",
- " identifier_y | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 861763 | \n",
- " 2019-03-01 16:29:17+01:00 | \n",
- " 4966 | \n",
- " 2023-09-12 17:42:37.571646+02:00 | \n",
- " 2023-09-12 17:42:37.571646+02:00 | \n",
- " NaN | \n",
- " e1155cf26b34f792bdb23e49244d7264 | \n",
- " 2119082 | \n",
- " 1433_136_194_68356 | \n",
- " 2023-09-12 17:42:45.409056+02:00 | \n",
- " 2023-09-12 17:42:45.409056+02:00 | \n",
- " 861763 | \n",
- " 209879 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 838d6101db2fc8bc80536d8b91b49859 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 861764 | \n",
- " 2019-03-01 16:29:19+01:00 | \n",
- " 4966 | \n",
- " 2023-09-12 17:42:37.572063+02:00 | \n",
- " 2023-09-12 17:42:37.572063+02:00 | \n",
- " NaN | \n",
- " e8b95cc6a1a8b103ffa39755ce3bfc4d | \n",
- " 2119081 | \n",
- " 1433_136_212_68356 | \n",
- " 2023-09-12 17:42:45.396336+02:00 | \n",
- " 2023-09-12 17:42:45.396336+02:00 | \n",
- " 861764 | \n",
- " 209879 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " f694c255855ce5643c6fcc7fed5e9237 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 861767 | \n",
- " 2019-03-01 16:33:01+01:00 | \n",
- " 405994 | \n",
- " 2023-09-12 17:42:37.573280+02:00 | \n",
- " 2023-09-12 17:42:37.573280+02:00 | \n",
- " NaN | \n",
- " 6edb259b88fc6f6ae82ede82defaef92 | \n",
- " 2119084 | \n",
- " 33158_158_297_68357 | \n",
- " 2023-09-12 17:42:45.410447+02:00 | \n",
- " 2023-09-12 17:42:45.410447+02:00 | \n",
- " 861767 | \n",
- " 209880 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " b7a3dd0794c0957c942d45b8913e5b96 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 861768 | \n",
- " 2019-03-01 16:33:03+01:00 | \n",
- " 405994 | \n",
- " 2023-09-12 17:42:37.573646+02:00 | \n",
- " 2023-09-12 17:42:37.573646+02:00 | \n",
- " NaN | \n",
- " 5d3fcb50784bada3731a967ddc9fbba8 | \n",
- " 2119085 | \n",
- " 33158_158_318_68357 | \n",
- " 2023-09-12 17:42:45.411059+02:00 | \n",
- " 2023-09-12 17:42:45.411059+02:00 | \n",
- " 861768 | \n",
- " 209880 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " d7ea7e443581ebe520dd13f6cad31af7 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 861769 | \n",
- " 2019-03-01 16:33:06+01:00 | \n",
- " 405994 | \n",
- " 2023-09-12 17:42:37.574034+02:00 | \n",
- " 2023-09-12 17:42:37.574034+02:00 | \n",
- " NaN | \n",
- " 5516d19b2331db9ad0b11f7e70299575 | \n",
- " 2119083 | \n",
- " 33158_158_343_68357 | \n",
- " 2023-09-12 17:42:45.409824+02:00 | \n",
- " 2023-09-12 17:42:45.409824+02:00 | \n",
- " 861769 | \n",
- " 209880 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 8a8d938d66a4dc57bcb44c2773c6fdfa | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 318964 | \n",
- " 1285206 | \n",
- " 2023-10-19 22:14:55+02:00 | \n",
- " 354233 | \n",
- " 2023-10-21 04:50:44.397308+02:00 | \n",
- " 2023-10-21 04:50:44.397308+02:00 | \n",
- " NaN | \n",
- " 819dd5c8b312ee583335f32f481d782a | \n",
- " 2597564 | \n",
- " 70649_398_403_168652 | \n",
- " 2023-10-21 04:50:44.991960+02:00 | \n",
- " 2023-10-21 04:50:44.991960+02:00 | \n",
- " 1285206 | \n",
- " 270350 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 56c452c39089f658ed74a06c96b78725 | \n",
- "
\n",
- " \n",
- " | 318965 | \n",
- " 1285209 | \n",
- " 2023-10-19 22:59:26+02:00 | \n",
- " 517001 | \n",
- " 2023-10-21 04:50:44.399870+02:00 | \n",
- " 2023-10-21 04:50:44.399870+02:00 | \n",
- " NaN | \n",
- " ef79fbeb3b80de3529df9c65cb4d4ca2 | \n",
- " 2597565 | \n",
- " 89203_398_1187_168656 | \n",
- " 2023-10-21 04:50:44.993354+02:00 | \n",
- " 2023-10-21 04:50:44.993354+02:00 | \n",
- " 1285209 | \n",
- " 268450 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 5ef9912e7b533b8a1b2685db538df7d3 | \n",
- "
\n",
- " \n",
- " | 318966 | \n",
- " 1285209 | \n",
- " 2023-10-19 22:59:26+02:00 | \n",
- " 517001 | \n",
- " 2023-10-21 04:50:44.399870+02:00 | \n",
- " 2023-10-21 04:50:44.399870+02:00 | \n",
- " NaN | \n",
- " ef79fbeb3b80de3529df9c65cb4d4ca2 | \n",
- " 2597566 | \n",
- " 89203_398_1232_168655 | \n",
- " 2023-10-21 04:50:44.994301+02:00 | \n",
- " 2023-10-21 04:50:44.994301+02:00 | \n",
- " 1285209 | \n",
- " 272403 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 9742a56e9ffbdfb0a31a541dc5ccb889 | \n",
- "
\n",
- " \n",
- " | 318967 | \n",
- " 1285209 | \n",
- " 2023-10-19 22:59:26+02:00 | \n",
- " 517001 | \n",
- " 2023-10-21 04:50:44.399870+02:00 | \n",
- " 2023-10-21 04:50:44.399870+02:00 | \n",
- " NaN | \n",
- " ef79fbeb3b80de3529df9c65cb4d4ca2 | \n",
- " 2597567 | \n",
- " 89203_398_1211_168655 | \n",
- " 2023-10-21 04:50:44.995318+02:00 | \n",
- " 2023-10-21 04:50:44.995318+02:00 | \n",
- " 1285209 | \n",
- " 272403 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 56a9e032281d7a9c004da644818839cc | \n",
- "
\n",
- " \n",
- " | 318968 | \n",
- " 1285966 | \n",
- " 2023-10-21 21:47:20+02:00 | \n",
- " 517309 | \n",
- " 2023-10-23 03:43:16.458811+02:00 | \n",
- " 2023-10-23 03:43:16.458811+02:00 | \n",
- " NaN | \n",
- " 7e825dd352bc6a11ab81cb8068e325e6 | \n",
- " 2598260 | \n",
- " 89257_401_2652_168793 | \n",
- " 2023-10-23 03:43:16.856244+02:00 | \n",
- " 2023-10-23 03:43:16.856244+02:00 | \n",
- " 1285966 | \n",
- " 268428 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 86d6c0c2720435206078ac4bbf4f74f1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
318969 rows × 18 columns
\n",
- "
"
- ],
- "text/plain": [
- " id_x purchase_date customer_id \\\n",
- "0 861763 2019-03-01 16:29:17+01:00 4966 \n",
- "1 861764 2019-03-01 16:29:19+01:00 4966 \n",
- "2 861767 2019-03-01 16:33:01+01:00 405994 \n",
- "3 861768 2019-03-01 16:33:03+01:00 405994 \n",
- "4 861769 2019-03-01 16:33:06+01:00 405994 \n",
- "... ... ... ... \n",
- "318964 1285206 2023-10-19 22:14:55+02:00 354233 \n",
- "318965 1285209 2023-10-19 22:59:26+02:00 517001 \n",
- "318966 1285209 2023-10-19 22:59:26+02:00 517001 \n",
- "318967 1285209 2023-10-19 22:59:26+02:00 517001 \n",
- "318968 1285966 2023-10-21 21:47:20+02:00 517309 \n",
- "\n",
- " created_at_x updated_at_x \\\n",
- "0 2023-09-12 17:42:37.571646+02:00 2023-09-12 17:42:37.571646+02:00 \n",
- "1 2023-09-12 17:42:37.572063+02:00 2023-09-12 17:42:37.572063+02:00 \n",
- "2 2023-09-12 17:42:37.573280+02:00 2023-09-12 17:42:37.573280+02:00 \n",
- "3 2023-09-12 17:42:37.573646+02:00 2023-09-12 17:42:37.573646+02:00 \n",
- "4 2023-09-12 17:42:37.574034+02:00 2023-09-12 17:42:37.574034+02:00 \n",
- "... ... ... \n",
- "318964 2023-10-21 04:50:44.397308+02:00 2023-10-21 04:50:44.397308+02:00 \n",
- "318965 2023-10-21 04:50:44.399870+02:00 2023-10-21 04:50:44.399870+02:00 \n",
- "318966 2023-10-21 04:50:44.399870+02:00 2023-10-21 04:50:44.399870+02:00 \n",
- "318967 2023-10-21 04:50:44.399870+02:00 2023-10-21 04:50:44.399870+02:00 \n",
- "318968 2023-10-23 03:43:16.458811+02:00 2023-10-23 03:43:16.458811+02:00 \n",
- "\n",
- " number_x identifier_x id_y \\\n",
- "0 NaN e1155cf26b34f792bdb23e49244d7264 2119082 \n",
- "1 NaN e8b95cc6a1a8b103ffa39755ce3bfc4d 2119081 \n",
- "2 NaN 6edb259b88fc6f6ae82ede82defaef92 2119084 \n",
- "3 NaN 5d3fcb50784bada3731a967ddc9fbba8 2119085 \n",
- "4 NaN 5516d19b2331db9ad0b11f7e70299575 2119083 \n",
- "... ... ... ... \n",
- "318964 NaN 819dd5c8b312ee583335f32f481d782a 2597564 \n",
- "318965 NaN ef79fbeb3b80de3529df9c65cb4d4ca2 2597565 \n",
- "318966 NaN ef79fbeb3b80de3529df9c65cb4d4ca2 2597566 \n",
- "318967 NaN ef79fbeb3b80de3529df9c65cb4d4ca2 2597567 \n",
- "318968 NaN 7e825dd352bc6a11ab81cb8068e325e6 2598260 \n",
- "\n",
- " number_y created_at_y \\\n",
- "0 1433_136_194_68356 2023-09-12 17:42:45.409056+02:00 \n",
- "1 1433_136_212_68356 2023-09-12 17:42:45.396336+02:00 \n",
- "2 33158_158_297_68357 2023-09-12 17:42:45.410447+02:00 \n",
- "3 33158_158_318_68357 2023-09-12 17:42:45.411059+02:00 \n",
- "4 33158_158_343_68357 2023-09-12 17:42:45.409824+02:00 \n",
- "... ... ... \n",
- "318964 70649_398_403_168652 2023-10-21 04:50:44.991960+02:00 \n",
- "318965 89203_398_1187_168656 2023-10-21 04:50:44.993354+02:00 \n",
- "318966 89203_398_1232_168655 2023-10-21 04:50:44.994301+02:00 \n",
- "318967 89203_398_1211_168655 2023-10-21 04:50:44.995318+02:00 \n",
- "318968 89257_401_2652_168793 2023-10-23 03:43:16.856244+02:00 \n",
- "\n",
- " updated_at_y purchase_id product_id \\\n",
- "0 2023-09-12 17:42:45.409056+02:00 861763 209879 \n",
- "1 2023-09-12 17:42:45.396336+02:00 861764 209879 \n",
- "2 2023-09-12 17:42:45.410447+02:00 861767 209880 \n",
- "3 2023-09-12 17:42:45.411059+02:00 861768 209880 \n",
- "4 2023-09-12 17:42:45.409824+02:00 861769 209880 \n",
- "... ... ... ... \n",
- "318964 2023-10-21 04:50:44.991960+02:00 1285206 270350 \n",
- "318965 2023-10-21 04:50:44.993354+02:00 1285209 268450 \n",
- "318966 2023-10-21 04:50:44.994301+02:00 1285209 272403 \n",
- "318967 2023-10-21 04:50:44.995318+02:00 1285209 272403 \n",
- "318968 2023-10-23 03:43:16.856244+02:00 1285966 268428 \n",
- "\n",
- " is_from_subscription type_of supplier_id barcode \\\n",
- "0 False 1 1702 NaN \n",
- "1 False 1 1702 NaN \n",
- "2 False 1 1702 NaN \n",
- "3 False 1 1702 NaN \n",
- "4 False 1 1702 NaN \n",
- "... ... ... ... ... \n",
- "318964 False 1 1702 NaN \n",
- "318965 False 1 1702 NaN \n",
- "318966 False 1 1702 NaN \n",
- "318967 False 1 1702 NaN \n",
- "318968 False 1 1702 NaN \n",
- "\n",
- " identifier_y \n",
- "0 838d6101db2fc8bc80536d8b91b49859 \n",
- "1 f694c255855ce5643c6fcc7fed5e9237 \n",
- "2 b7a3dd0794c0957c942d45b8913e5b96 \n",
- "3 d7ea7e443581ebe520dd13f6cad31af7 \n",
- "4 8a8d938d66a4dc57bcb44c2773c6fdfa \n",
- "... ... \n",
- "318964 56c452c39089f658ed74a06c96b78725 \n",
- "318965 5ef9912e7b533b8a1b2685db538df7d3 \n",
- "318966 9742a56e9ffbdfb0a31a541dc5ccb889 \n",
- "318967 56a9e032281d7a9c004da644818839cc \n",
- "318968 86d6c0c2720435206078ac4bbf4f74f1 \n",
- "\n",
- "[318969 rows x 18 columns]"
- ]
- },
- "execution_count": 42,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 43,
- "id": "e8f340b3-7519-47e7-a8bb-c8d1b68ca683",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id_x | \n",
- " customer_id | \n",
- " product_id | \n",
- " purchase_date | \n",
- " type_of | \n",
- " is_from_subscription | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 861763 | \n",
- " 4966 | \n",
- " 209879 | \n",
- " 2019-03-01 16:29:17+01:00 | \n",
- " 1 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 861764 | \n",
- " 4966 | \n",
- " 209879 | \n",
- " 2019-03-01 16:29:19+01:00 | \n",
- " 1 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 861767 | \n",
- " 405994 | \n",
- " 209880 | \n",
- " 2019-03-01 16:33:01+01:00 | \n",
- " 1 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 861768 | \n",
- " 405994 | \n",
- " 209880 | \n",
- " 2019-03-01 16:33:03+01:00 | \n",
- " 1 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 861769 | \n",
- " 405994 | \n",
- " 209880 | \n",
- " 2019-03-01 16:33:06+01:00 | \n",
- " 1 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 318964 | \n",
- " 1285206 | \n",
- " 354233 | \n",
- " 270350 | \n",
- " 2023-10-19 22:14:55+02:00 | \n",
- " 1 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318965 | \n",
- " 1285209 | \n",
- " 517001 | \n",
- " 268450 | \n",
- " 2023-10-19 22:59:26+02:00 | \n",
- " 1 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318966 | \n",
- " 1285209 | \n",
- " 517001 | \n",
- " 272403 | \n",
- " 2023-10-19 22:59:26+02:00 | \n",
- " 1 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318967 | \n",
- " 1285209 | \n",
- " 517001 | \n",
- " 272403 | \n",
- " 2023-10-19 22:59:26+02:00 | \n",
- " 1 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318968 | \n",
- " 1285966 | \n",
- " 517309 | \n",
- " 268428 | \n",
- " 2023-10-21 21:47:20+02:00 | \n",
- " 1 | \n",
- " False | \n",
- "
\n",
- " \n",
- "
\n",
- "
318969 rows × 6 columns
\n",
- "
"
- ],
- "text/plain": [
- " id_x customer_id product_id purchase_date type_of \\\n",
- "0 861763 4966 209879 2019-03-01 16:29:17+01:00 1 \n",
- "1 861764 4966 209879 2019-03-01 16:29:19+01:00 1 \n",
- "2 861767 405994 209880 2019-03-01 16:33:01+01:00 1 \n",
- "3 861768 405994 209880 2019-03-01 16:33:03+01:00 1 \n",
- "4 861769 405994 209880 2019-03-01 16:33:06+01:00 1 \n",
- "... ... ... ... ... ... \n",
- "318964 1285206 354233 270350 2023-10-19 22:14:55+02:00 1 \n",
- "318965 1285209 517001 268450 2023-10-19 22:59:26+02:00 1 \n",
- "318966 1285209 517001 272403 2023-10-19 22:59:26+02:00 1 \n",
- "318967 1285209 517001 272403 2023-10-19 22:59:26+02:00 1 \n",
- "318968 1285966 517309 268428 2023-10-21 21:47:20+02:00 1 \n",
- "\n",
- " is_from_subscription \n",
- "0 False \n",
- "1 False \n",
- "2 False \n",
- "3 False \n",
- "4 False \n",
- "... ... \n",
- "318964 False \n",
- "318965 False \n",
- "318966 False \n",
- "318967 False \n",
- "318968 False \n",
- "\n",
- "[318969 rows x 6 columns]"
- ]
- },
- "execution_count": 43,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Jonction client et évenement\n",
- "merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']]\n",
- "merge_1"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 46,
- "id": "a598b86c-4128-4e5c-ae38-52689f755fd5",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id_x | \n",
- " customer_id | \n",
- " representation_id | \n",
- " purchase_date | \n",
- " type_of | \n",
- " is_from_subscription | \n",
- " amount | \n",
- " is_full_price | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 1249847 | \n",
- " 7634 | \n",
- " 44332 | \n",
- " 2023-09-03 18:43:56+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 18.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1249847 | \n",
- " 7634 | \n",
- " 44332 | \n",
- " 2023-09-03 18:43:56+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 18.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 1252665 | \n",
- " 426962 | \n",
- " 44332 | \n",
- " 2023-07-06 12:13:08+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 18.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 1252665 | \n",
- " 426962 | \n",
- " 44332 | \n",
- " 2023-07-06 12:13:08+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 18.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 1252671 | \n",
- " 426731 | \n",
- " 44332 | \n",
- " 2023-07-06 13:10:07+02:00 | \n",
- " 3 | \n",
- " False | \n",
- " 18.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 318964 | \n",
- " 1212797 | \n",
- " 11092 | \n",
- " 33810 | \n",
- " 2018-11-28 13:44:22+01:00 | \n",
- " 1 | \n",
- " False | \n",
- " 30.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318965 | \n",
- " 1213476 | \n",
- " 25851 | \n",
- " 33810 | \n",
- " 2018-12-28 16:53:36+01:00 | \n",
- " 1 | \n",
- " False | \n",
- " 30.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318966 | \n",
- " 1226039 | \n",
- " 26314 | \n",
- " 33810 | \n",
- " 2018-12-29 16:30:47+01:00 | \n",
- " 1 | \n",
- " False | \n",
- " 30.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318967 | \n",
- " 1244276 | \n",
- " 3104 | \n",
- " 33810 | \n",
- " 2018-12-31 19:54:09+01:00 | \n",
- " 1 | \n",
- " False | \n",
- " 30.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318968 | \n",
- " 1244285 | \n",
- " 86 | \n",
- " 33766 | \n",
- " 2019-12-31 13:02:47+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 21.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- "
\n",
- "
318969 rows × 8 columns
\n",
- "
"
- ],
- "text/plain": [
- " id_x customer_id representation_id purchase_date \\\n",
- "0 1249847 7634 44332 2023-09-03 18:43:56+02:00 \n",
- "1 1249847 7634 44332 2023-09-03 18:43:56+02:00 \n",
- "2 1252665 426962 44332 2023-07-06 12:13:08+02:00 \n",
- "3 1252665 426962 44332 2023-07-06 12:13:08+02:00 \n",
- "4 1252671 426731 44332 2023-07-06 13:10:07+02:00 \n",
- "... ... ... ... ... \n",
- "318964 1212797 11092 33810 2018-11-28 13:44:22+01:00 \n",
- "318965 1213476 25851 33810 2018-12-28 16:53:36+01:00 \n",
- "318966 1226039 26314 33810 2018-12-29 16:30:47+01:00 \n",
- "318967 1244276 3104 33810 2018-12-31 19:54:09+01:00 \n",
- "318968 1244285 86 33766 2019-12-31 13:02:47+01:00 \n",
- "\n",
- " type_of is_from_subscription amount is_full_price \n",
- "0 1 False 18.0 False \n",
- "1 1 False 18.0 False \n",
- "2 1 False 18.0 False \n",
- "3 1 False 18.0 False \n",
- "4 3 False 18.0 False \n",
- "... ... ... ... ... \n",
- "318964 1 False 30.0 False \n",
- "318965 1 False 30.0 False \n",
- "318966 1 False 30.0 False \n",
- "318967 1 False 30.0 False \n",
- "318968 3 False 21.0 False \n",
- "\n",
- "[318969 rows x 8 columns]"
- ]
- },
- "execution_count": 46,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[['id_x', 'customer_id', 'representation_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price']]\n",
- "merge_2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 48,
- "id": "9d394f79-2615-448e-8ebd-074e225f1584",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " serial | \n",
- " event_id | \n",
- " created_at | \n",
- " updated_at | \n",
- " start_date_time | \n",
- " open | \n",
- " satisfaction | \n",
- " end_date_time | \n",
- " name | \n",
- " ... | \n",
- " extra_field | \n",
- " identifier | \n",
- " id_x | \n",
- " customer_id | \n",
- " representation_id | \n",
- " purchase_date | \n",
- " type_of | \n",
- " is_from_subscription | \n",
- " amount | \n",
- " is_full_price | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 44351 | \n",
- " NaN | \n",
- " 20371 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " ... | \n",
- " NaN | \n",
- " 33520762e8cc28982e3841cbc2be8ce2 | \n",
- " 1293590 | \n",
- " 627 | \n",
- " 44351 | \n",
- " 2023-11-08 12:25:21+01:00 | \n",
- " 0 | \n",
- " False | \n",
- " 22.4 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 44351 | \n",
- " NaN | \n",
- " 20371 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " ... | \n",
- " NaN | \n",
- " 33520762e8cc28982e3841cbc2be8ce2 | \n",
- " 1293590 | \n",
- " 627 | \n",
- " 44351 | \n",
- " 2023-11-08 12:25:21+01:00 | \n",
- " 0 | \n",
- " False | \n",
- " 22.4 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 44351 | \n",
- " NaN | \n",
- " 20371 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " ... | \n",
- " NaN | \n",
- " 33520762e8cc28982e3841cbc2be8ce2 | \n",
- " 1293590 | \n",
- " 627 | \n",
- " 44351 | \n",
- " 2023-11-08 12:25:21+01:00 | \n",
- " 0 | \n",
- " False | \n",
- " 22.4 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 44351 | \n",
- " NaN | \n",
- " 20371 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " ... | \n",
- " NaN | \n",
- " 33520762e8cc28982e3841cbc2be8ce2 | \n",
- " 1293590 | \n",
- " 627 | \n",
- " 44351 | \n",
- " 2023-11-08 12:25:21+01:00 | \n",
- " 0 | \n",
- " False | \n",
- " 22.4 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 44351 | \n",
- " NaN | \n",
- " 20371 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " ... | \n",
- " NaN | \n",
- " 33520762e8cc28982e3841cbc2be8ce2 | \n",
- " 1293590 | \n",
- " 627 | \n",
- " 44351 | \n",
- " 2023-11-08 12:25:21+01:00 | \n",
- " 0 | \n",
- " False | \n",
- " 22.4 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 318964 | \n",
- " 33639 | \n",
- " NaN | \n",
- " 15533 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " ... | \n",
- " NaN | \n",
- " fae68f1e09710ec8747957af6e22f61d | \n",
- " 1183026 | \n",
- " 15258 | \n",
- " 33639 | \n",
- " 2023-03-26 16:09:31+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 0.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318965 | \n",
- " 33639 | \n",
- " NaN | \n",
- " 15533 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " ... | \n",
- " NaN | \n",
- " fae68f1e09710ec8747957af6e22f61d | \n",
- " 1183026 | \n",
- " 15258 | \n",
- " 33639 | \n",
- " 2023-03-26 16:09:31+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 0.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318966 | \n",
- " 33639 | \n",
- " NaN | \n",
- " 15533 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " ... | \n",
- " NaN | \n",
- " fae68f1e09710ec8747957af6e22f61d | \n",
- " 1183026 | \n",
- " 15258 | \n",
- " 33639 | \n",
- " 2023-03-26 16:09:31+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 0.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318967 | \n",
- " 33639 | \n",
- " NaN | \n",
- " 15533 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " ... | \n",
- " NaN | \n",
- " fae68f1e09710ec8747957af6e22f61d | \n",
- " 1194433 | \n",
- " 412831 | \n",
- " 33639 | \n",
- " 2023-03-27 17:38:59+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 0.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 318968 | \n",
- " 33639 | \n",
- " NaN | \n",
- " 15533 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " ... | \n",
- " NaN | \n",
- " fae68f1e09710ec8747957af6e22f61d | \n",
- " 1194433 | \n",
- " 412831 | \n",
- " 33639 | \n",
- " 2023-03-27 17:38:59+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 0.0 | \n",
- " False | \n",
- "
\n",
- " \n",
- "
\n",
- "
318969 rows × 24 columns
\n",
- "
"
- ],
- "text/plain": [
- " id serial event_id created_at \\\n",
- "0 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n",
- "1 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n",
- "2 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n",
- "3 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n",
- "4 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n",
- "... ... ... ... ... \n",
- "318964 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n",
- "318965 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n",
- "318966 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n",
- "318967 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n",
- "318968 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n",
- "\n",
- " updated_at start_date_time open \\\n",
- "0 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n",
- "1 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n",
- "2 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n",
- "3 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n",
- "4 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n",
- "... ... ... ... \n",
- "318964 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n",
- "318965 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n",
- "318966 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n",
- "318967 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n",
- "318968 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n",
- "\n",
- " satisfaction end_date_time name ... extra_field \\\n",
- "0 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n",
- "1 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n",
- "2 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n",
- "3 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n",
- "4 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n",
- "... ... ... ... ... ... \n",
- "318964 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n",
- "318965 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n",
- "318966 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n",
- "318967 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n",
- "318968 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n",
- "\n",
- " identifier id_x customer_id \\\n",
- "0 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n",
- "1 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n",
- "2 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n",
- "3 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n",
- "4 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n",
- "... ... ... ... \n",
- "318964 fae68f1e09710ec8747957af6e22f61d 1183026 15258 \n",
- "318965 fae68f1e09710ec8747957af6e22f61d 1183026 15258 \n",
- "318966 fae68f1e09710ec8747957af6e22f61d 1183026 15258 \n",
- "318967 fae68f1e09710ec8747957af6e22f61d 1194433 412831 \n",
- "318968 fae68f1e09710ec8747957af6e22f61d 1194433 412831 \n",
- "\n",
- " representation_id purchase_date type_of \\\n",
- "0 44351 2023-11-08 12:25:21+01:00 0 \n",
- "1 44351 2023-11-08 12:25:21+01:00 0 \n",
- "2 44351 2023-11-08 12:25:21+01:00 0 \n",
- "3 44351 2023-11-08 12:25:21+01:00 0 \n",
- "4 44351 2023-11-08 12:25:21+01:00 0 \n",
- "... ... ... ... \n",
- "318964 33639 2023-03-26 16:09:31+02:00 1 \n",
- "318965 33639 2023-03-26 16:09:31+02:00 1 \n",
- "318966 33639 2023-03-26 16:09:31+02:00 1 \n",
- "318967 33639 2023-03-27 17:38:59+02:00 1 \n",
- "318968 33639 2023-03-27 17:38:59+02:00 1 \n",
- "\n",
- " is_from_subscription amount is_full_price \n",
- "0 False 22.4 False \n",
- "1 False 22.4 False \n",
- "2 False 22.4 False \n",
- "3 False 22.4 False \n",
- "4 False 22.4 False \n",
- "... ... ... ... \n",
- "318964 False 0.0 False \n",
- "318965 False 0.0 False \n",
- "318966 False 0.0 False \n",
- "318967 False 0.0 False \n",
- "318968 False 0.0 False \n",
- "\n",
- "[318969 rows x 24 columns]"
- ]
- },
- "execution_count": 48,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 49,
- "id": "63bcbfad-fa20-425a-881f-ca9aa212c419",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id_x | \n",
- " customer_id | \n",
- " event_id | \n",
- " purchase_date | \n",
- " type_of | \n",
- " is_from_subscription | \n",
- " amount | \n",
- " is_full_price | \n",
- " start_date_time | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 1293590 | \n",
- " 627 | \n",
- " 20371 | \n",
- " 2023-11-08 12:25:21+01:00 | \n",
- " 0 | \n",
- " False | \n",
- " 22.4 | \n",
- " False | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1293590 | \n",
- " 627 | \n",
- " 20371 | \n",
- " 2023-11-08 12:25:21+01:00 | \n",
- " 0 | \n",
- " False | \n",
- " 22.4 | \n",
- " False | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 1293590 | \n",
- " 627 | \n",
- " 20371 | \n",
- " 2023-11-08 12:25:21+01:00 | \n",
- " 0 | \n",
- " False | \n",
- " 22.4 | \n",
- " False | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 1293590 | \n",
- " 627 | \n",
- " 20371 | \n",
- " 2023-11-08 12:25:21+01:00 | \n",
- " 0 | \n",
- " False | \n",
- " 22.4 | \n",
- " False | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 1293590 | \n",
- " 627 | \n",
- " 20371 | \n",
- " 2023-11-08 12:25:21+01:00 | \n",
- " 0 | \n",
- " False | \n",
- " 22.4 | \n",
- " False | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 318964 | \n",
- " 1183026 | \n",
- " 15258 | \n",
- " 15533 | \n",
- " 2023-03-26 16:09:31+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 0.0 | \n",
- " False | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- "
\n",
- " \n",
- " | 318965 | \n",
- " 1183026 | \n",
- " 15258 | \n",
- " 15533 | \n",
- " 2023-03-26 16:09:31+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 0.0 | \n",
- " False | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- "
\n",
- " \n",
- " | 318966 | \n",
- " 1183026 | \n",
- " 15258 | \n",
- " 15533 | \n",
- " 2023-03-26 16:09:31+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 0.0 | \n",
- " False | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- "
\n",
- " \n",
- " | 318967 | \n",
- " 1194433 | \n",
- " 412831 | \n",
- " 15533 | \n",
- " 2023-03-27 17:38:59+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 0.0 | \n",
- " False | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- "
\n",
- " \n",
- " | 318968 | \n",
- " 1194433 | \n",
- " 412831 | \n",
- " 15533 | \n",
- " 2023-03-27 17:38:59+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 0.0 | \n",
- " False | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
318969 rows × 9 columns
\n",
- "
"
- ],
- "text/plain": [
- " id_x customer_id event_id purchase_date type_of \\\n",
- "0 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n",
- "1 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n",
- "2 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n",
- "3 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n",
- "4 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n",
- "... ... ... ... ... ... \n",
- "318964 1183026 15258 15533 2023-03-26 16:09:31+02:00 1 \n",
- "318965 1183026 15258 15533 2023-03-26 16:09:31+02:00 1 \n",
- "318966 1183026 15258 15533 2023-03-26 16:09:31+02:00 1 \n",
- "318967 1194433 412831 15533 2023-03-27 17:38:59+02:00 1 \n",
- "318968 1194433 412831 15533 2023-03-27 17:38:59+02:00 1 \n",
- "\n",
- " is_from_subscription amount is_full_price start_date_time \n",
- "0 False 22.4 False 2023-12-21 20:00:00+01:00 \n",
- "1 False 22.4 False 2023-12-21 20:00:00+01:00 \n",
- "2 False 22.4 False 2023-12-21 20:00:00+01:00 \n",
- "3 False 22.4 False 2023-12-21 20:00:00+01:00 \n",
- "4 False 22.4 False 2023-12-21 20:00:00+01:00 \n",
- "... ... ... ... ... \n",
- "318964 False 0.0 False 2023-04-15 17:30:00+02:00 \n",
- "318965 False 0.0 False 2023-04-15 17:30:00+02:00 \n",
- "318966 False 0.0 False 2023-04-15 17:30:00+02:00 \n",
- "318967 False 0.0 False 2023-04-15 17:30:00+02:00 \n",
- "318968 False 0.0 False 2023-04-15 17:30:00+02:00 \n",
- "\n",
- "[318969 rows x 9 columns]"
- ]
- },
- "execution_count": 49,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[['id_x', 'customer_id', 'event_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time']]\n",
- "merge_3"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 51,
- "id": "db52559b-6562-439b-b16e-f5d8dc9bc891",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " created_at | \n",
- " updated_at | \n",
- " season_id | \n",
- " facility_id | \n",
- " name | \n",
- " event_type_id | \n",
- " manual_added | \n",
- " is_display | \n",
- " event_type_key_id | \n",
- " ... | \n",
- " identifier | \n",
- " id_x | \n",
- " customer_id | \n",
- " event_id | \n",
- " purchase_date | \n",
- " type_of | \n",
- " is_from_subscription | \n",
- " amount | \n",
- " is_full_price | \n",
- " start_date_time | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 20367 | \n",
- " 2023-09-13 03:42:45.214293+02:00 | \n",
- " 2023-09-13 03:54:30.086969+02:00 | \n",
- " 1865 | \n",
- " 1054 | \n",
- " marelle | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " ... | \n",
- " 26d1e9a4acad18b9cf79244334c86c93 | \n",
- " 1253614 | \n",
- " 432123 | \n",
- " 20367 | \n",
- " 2023-09-07 18:02:58+02:00 | \n",
- " 3 | \n",
- " False | \n",
- " 2.0 | \n",
- " False | \n",
- " 2023-11-29 14:30:00+01:00 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 20367 | \n",
- " 2023-09-13 03:42:45.214293+02:00 | \n",
- " 2023-09-13 03:54:30.086969+02:00 | \n",
- " 1865 | \n",
- " 1054 | \n",
- " marelle | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " ... | \n",
- " 26d1e9a4acad18b9cf79244334c86c93 | \n",
- " 1253614 | \n",
- " 432123 | \n",
- " 20367 | \n",
- " 2023-09-07 18:02:58+02:00 | \n",
- " 3 | \n",
- " False | \n",
- " 2.0 | \n",
- " False | \n",
- " 2023-11-29 14:30:00+01:00 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 20367 | \n",
- " 2023-09-13 03:42:45.214293+02:00 | \n",
- " 2023-09-13 03:54:30.086969+02:00 | \n",
- " 1865 | \n",
- " 1054 | \n",
- " marelle | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " ... | \n",
- " 26d1e9a4acad18b9cf79244334c86c93 | \n",
- " 1252930 | \n",
- " 431824 | \n",
- " 20367 | \n",
- " 2023-09-06 16:06:40+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 5.0 | \n",
- " False | \n",
- " 2023-11-29 14:30:00+01:00 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 20367 | \n",
- " 2023-09-13 03:42:45.214293+02:00 | \n",
- " 2023-09-13 03:54:30.086969+02:00 | \n",
- " 1865 | \n",
- " 1054 | \n",
- " marelle | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " ... | \n",
- " 26d1e9a4acad18b9cf79244334c86c93 | \n",
- " 1252931 | \n",
- " 431824 | \n",
- " 20367 | \n",
- " 2023-09-06 16:06:42+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 5.0 | \n",
- " False | \n",
- " 2023-11-29 14:30:00+01:00 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 20367 | \n",
- " 2023-09-13 03:42:45.214293+02:00 | \n",
- " 2023-09-13 03:54:30.086969+02:00 | \n",
- " 1865 | \n",
- " 1054 | \n",
- " marelle | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " ... | \n",
- " 26d1e9a4acad18b9cf79244334c86c93 | \n",
- " 1252932 | \n",
- " 431824 | \n",
- " 20367 | \n",
- " 2023-09-06 16:06:44+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 5.0 | \n",
- " False | \n",
- " 2023-11-29 14:30:00+01:00 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 318964 | \n",
- " 15439 | \n",
- " 2023-09-12 17:42:25.252747+02:00 | \n",
- " 2023-09-12 19:00:00.735990+02:00 | \n",
- " 1708 | \n",
- " 1054 | \n",
- " florilege | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " ... | \n",
- " 4f015946bcbd856aa573cadb7ac42b9f | \n",
- " 1206691 | \n",
- " 358863 | \n",
- " 15439 | \n",
- " 2023-03-28 17:53:40+02:00 | \n",
- " 3 | \n",
- " False | \n",
- " 4.0 | \n",
- " False | \n",
- " 2023-03-29 20:00:00+02:00 | \n",
- "
\n",
- " \n",
- " | 318965 | \n",
- " 15439 | \n",
- " 2023-09-12 17:42:25.252747+02:00 | \n",
- " 2023-09-12 19:00:00.735990+02:00 | \n",
- " 1708 | \n",
- " 1054 | \n",
- " florilege | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " ... | \n",
- " 4f015946bcbd856aa573cadb7ac42b9f | \n",
- " 1218071 | \n",
- " 413015 | \n",
- " 15439 | \n",
- " 2023-03-29 17:01:01+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 4.0 | \n",
- " False | \n",
- " 2023-03-29 20:00:00+02:00 | \n",
- "
\n",
- " \n",
- " | 318966 | \n",
- " 15439 | \n",
- " 2023-09-12 17:42:25.252747+02:00 | \n",
- " 2023-09-12 19:00:00.735990+02:00 | \n",
- " 1708 | \n",
- " 1054 | \n",
- " florilege | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " ... | \n",
- " 4f015946bcbd856aa573cadb7ac42b9f | \n",
- " 1218125 | \n",
- " 344045 | \n",
- " 15439 | \n",
- " 2023-03-29 18:20:05+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 4.0 | \n",
- " False | \n",
- " 2023-03-29 20:00:00+02:00 | \n",
- "
\n",
- " \n",
- " | 318967 | \n",
- " 15439 | \n",
- " 2023-09-12 17:42:25.252747+02:00 | \n",
- " 2023-09-12 19:00:00.735990+02:00 | \n",
- " 1708 | \n",
- " 1054 | \n",
- " florilege | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " ... | \n",
- " 4f015946bcbd856aa573cadb7ac42b9f | \n",
- " 1218185 | \n",
- " 381006 | \n",
- " 15439 | \n",
- " 2023-03-29 19:50:18+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 4.0 | \n",
- " False | \n",
- " 2023-03-29 20:00:00+02:00 | \n",
- "
\n",
- " \n",
- " | 318968 | \n",
- " 15439 | \n",
- " 2023-09-12 17:42:25.252747+02:00 | \n",
- " 2023-09-12 19:00:00.735990+02:00 | \n",
- " 1708 | \n",
- " 1054 | \n",
- " florilege | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " ... | \n",
- " 4f015946bcbd856aa573cadb7ac42b9f | \n",
- " 1239074 | \n",
- " 4512 | \n",
- " 15439 | \n",
- " 2023-01-31 16:14:27+01:00 | \n",
- " 1 | \n",
- " False | \n",
- " 4.0 | \n",
- " False | \n",
- " 2023-03-29 20:00:00+02:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
318969 rows × 21 columns
\n",
- "
"
- ],
- "text/plain": [
- " id created_at \\\n",
- "0 20367 2023-09-13 03:42:45.214293+02:00 \n",
- "1 20367 2023-09-13 03:42:45.214293+02:00 \n",
- "2 20367 2023-09-13 03:42:45.214293+02:00 \n",
- "3 20367 2023-09-13 03:42:45.214293+02:00 \n",
- "4 20367 2023-09-13 03:42:45.214293+02:00 \n",
- "... ... ... \n",
- "318964 15439 2023-09-12 17:42:25.252747+02:00 \n",
- "318965 15439 2023-09-12 17:42:25.252747+02:00 \n",
- "318966 15439 2023-09-12 17:42:25.252747+02:00 \n",
- "318967 15439 2023-09-12 17:42:25.252747+02:00 \n",
- "318968 15439 2023-09-12 17:42:25.252747+02:00 \n",
- "\n",
- " updated_at season_id facility_id name \\\n",
- "0 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n",
- "1 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n",
- "2 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n",
- "3 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n",
- "4 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n",
- "... ... ... ... ... \n",
- "318964 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n",
- "318965 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n",
- "318966 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n",
- "318967 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n",
- "318968 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n",
- "\n",
- " event_type_id manual_added is_display event_type_key_id ... \\\n",
- "0 1055 False True 1055 ... \n",
- "1 1055 False True 1055 ... \n",
- "2 1055 False True 1055 ... \n",
- "3 1055 False True 1055 ... \n",
- "4 1055 False True 1055 ... \n",
- "... ... ... ... ... ... \n",
- "318964 1055 False True 1055 ... \n",
- "318965 1055 False True 1055 ... \n",
- "318966 1055 False True 1055 ... \n",
- "318967 1055 False True 1055 ... \n",
- "318968 1055 False True 1055 ... \n",
- "\n",
- " identifier id_x customer_id event_id \\\n",
- "0 26d1e9a4acad18b9cf79244334c86c93 1253614 432123 20367 \n",
- "1 26d1e9a4acad18b9cf79244334c86c93 1253614 432123 20367 \n",
- "2 26d1e9a4acad18b9cf79244334c86c93 1252930 431824 20367 \n",
- "3 26d1e9a4acad18b9cf79244334c86c93 1252931 431824 20367 \n",
- "4 26d1e9a4acad18b9cf79244334c86c93 1252932 431824 20367 \n",
- "... ... ... ... ... \n",
- "318964 4f015946bcbd856aa573cadb7ac42b9f 1206691 358863 15439 \n",
- "318965 4f015946bcbd856aa573cadb7ac42b9f 1218071 413015 15439 \n",
- "318966 4f015946bcbd856aa573cadb7ac42b9f 1218125 344045 15439 \n",
- "318967 4f015946bcbd856aa573cadb7ac42b9f 1218185 381006 15439 \n",
- "318968 4f015946bcbd856aa573cadb7ac42b9f 1239074 4512 15439 \n",
- "\n",
- " purchase_date type_of is_from_subscription amount \\\n",
- "0 2023-09-07 18:02:58+02:00 3 False 2.0 \n",
- "1 2023-09-07 18:02:58+02:00 3 False 2.0 \n",
- "2 2023-09-06 16:06:40+02:00 1 False 5.0 \n",
- "3 2023-09-06 16:06:42+02:00 1 False 5.0 \n",
- "4 2023-09-06 16:06:44+02:00 1 False 5.0 \n",
- "... ... ... ... ... \n",
- "318964 2023-03-28 17:53:40+02:00 3 False 4.0 \n",
- "318965 2023-03-29 17:01:01+02:00 1 False 4.0 \n",
- "318966 2023-03-29 18:20:05+02:00 1 False 4.0 \n",
- "318967 2023-03-29 19:50:18+02:00 1 False 4.0 \n",
- "318968 2023-01-31 16:14:27+01:00 1 False 4.0 \n",
- "\n",
- " is_full_price start_date_time \n",
- "0 False 2023-11-29 14:30:00+01:00 \n",
- "1 False 2023-11-29 14:30:00+01:00 \n",
- "2 False 2023-11-29 14:30:00+01:00 \n",
- "3 False 2023-11-29 14:30:00+01:00 \n",
- "4 False 2023-11-29 14:30:00+01:00 \n",
- "... ... ... \n",
- "318964 False 2023-03-29 20:00:00+02:00 \n",
- "318965 False 2023-03-29 20:00:00+02:00 \n",
- "318966 False 2023-03-29 20:00:00+02:00 \n",
- "318967 False 2023-03-29 20:00:00+02:00 \n",
- "318968 False 2023-03-29 20:00:00+02:00 \n",
- "\n",
- "[318969 rows x 21 columns]"
- ]
- },
- "execution_count": 51,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 54,
- "id": "d8ab2477-c199-4815-88d9-c5683e466772",
- "metadata": {},
- "outputs": [],
- "source": [
- "merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[['id_x', 'customer_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'name']]\n",
- "merge_4 = merge_4.rename(columns={'name': 'event_name'})"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c0917b77-6a73-4ae3-a58a-0bb7964f1406",
- "metadata": {},
- "outputs": [],
- "source": [
- "merge_5 = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = "
- ]
}
],
"metadata": {
diff --git a/Notebook_AJ.ipynb b/Notebook_AJ.ipynb
index c59dff1..19272b5 100644
--- a/Notebook_AJ.ipynb
+++ b/Notebook_AJ.ipynb
@@ -69,59 +69,6 @@
"fs.ls(BUCKET)"
]
},
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "d60f6b27-00b4-4655-9325-79169d1e68df",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "bdc2324-data/1\n",
- "['bdc2324-data/1/1campaign_stats.csv', 'bdc2324-data/1/1campaigns.csv', 'bdc2324-data/1/1categories.csv', 'bdc2324-data/1/1countries.csv', 'bdc2324-data/1/1currencies.csv', 'bdc2324-data/1/1customer_target_mappings.csv', 'bdc2324-data/1/1customersplus.csv', 'bdc2324-data/1/1event_types.csv', 'bdc2324-data/1/1events.csv', 'bdc2324-data/1/1facilities.csv', 'bdc2324-data/1/1link_stats.csv', 'bdc2324-data/1/1pricing_formulas.csv', 'bdc2324-data/1/1product_packs.csv', 'bdc2324-data/1/1products.csv', 'bdc2324-data/1/1products_groups.csv', 'bdc2324-data/1/1purchases.csv', 'bdc2324-data/1/1representation_category_capacities.csv', 'bdc2324-data/1/1representations.csv', 'bdc2324-data/1/1seasons.csv', 'bdc2324-data/1/1structure_tag_mappings.csv', 'bdc2324-data/1/1suppliers.csv', 'bdc2324-data/1/1tags.csv', 'bdc2324-data/1/1target_types.csv', 'bdc2324-data/1/1targets.csv', 'bdc2324-data/1/1tickets.csv', 'bdc2324-data/1/1type_of_categories.csv', 'bdc2324-data/1/1type_of_pricing_formulas.csv', 'bdc2324-data/1/1type_ofs.csv']\n",
- "bdc2324-data/2\n",
- "['bdc2324-data/2/2campaign_stats.csv', 'bdc2324-data/2/2campaigns.csv', 'bdc2324-data/2/2categories.csv', 'bdc2324-data/2/2contribution_sites.csv', 'bdc2324-data/2/2contributions.csv', 'bdc2324-data/2/2countries.csv', 'bdc2324-data/2/2currencies.csv', 'bdc2324-data/2/2customer_target_mappings.csv', 'bdc2324-data/2/2customersplus.csv', 'bdc2324-data/2/2event_types.csv', 'bdc2324-data/2/2events.csv', 'bdc2324-data/2/2facilities.csv', 'bdc2324-data/2/2link_stats.csv', 'bdc2324-data/2/2pricing_formulas.csv', 'bdc2324-data/2/2product_packs.csv', 'bdc2324-data/2/2products.csv', 'bdc2324-data/2/2products_groups.csv', 'bdc2324-data/2/2purchases.csv', 'bdc2324-data/2/2representation_category_capacities.csv', 'bdc2324-data/2/2representations.csv', 'bdc2324-data/2/2seasons.csv', 'bdc2324-data/2/2structure_tag_mappings.csv', 'bdc2324-data/2/2suppliers.csv', 'bdc2324-data/2/2tags.csv', 'bdc2324-data/2/2target_types.csv', 'bdc2324-data/2/2targets.csv', 'bdc2324-data/2/2tickets.csv']\n",
- "bdc2324-data/3\n",
- "['bdc2324-data/3/3campaign_stats.csv', 'bdc2324-data/3/3campaigns.csv', 'bdc2324-data/3/3categories.csv', 'bdc2324-data/3/3consumptions.csv', 'bdc2324-data/3/3contribution_sites.csv', 'bdc2324-data/3/3contributions.csv', 'bdc2324-data/3/3countries.csv', 'bdc2324-data/3/3currencies.csv', 'bdc2324-data/3/3customer_target_mappings.csv', 'bdc2324-data/3/3customersplus.csv', 'bdc2324-data/3/3event_types.csv', 'bdc2324-data/3/3events.csv', 'bdc2324-data/3/3facilities.csv', 'bdc2324-data/3/3link_stats.csv', 'bdc2324-data/3/3pricing_formulas.csv', 'bdc2324-data/3/3product_packs.csv', 'bdc2324-data/3/3products.csv', 'bdc2324-data/3/3products_groups.csv', 'bdc2324-data/3/3purchases.csv', 'bdc2324-data/3/3representation_category_capacities.csv', 'bdc2324-data/3/3representations.csv', 'bdc2324-data/3/3seasons.csv', 'bdc2324-data/3/3structure_tag_mappings.csv', 'bdc2324-data/3/3suppliers.csv', 'bdc2324-data/3/3tags.csv', 'bdc2324-data/3/3target_types.csv', 'bdc2324-data/3/3targets.csv', 'bdc2324-data/3/3tickets.csv']\n",
- "bdc2324-data/4\n",
- "['bdc2324-data/4/4campaign_stats.csv', 'bdc2324-data/4/4campaigns.csv', 'bdc2324-data/4/4categories.csv', 'bdc2324-data/4/4contribution_sites.csv', 'bdc2324-data/4/4contributions.csv', 'bdc2324-data/4/4countries.csv', 'bdc2324-data/4/4currencies.csv', 'bdc2324-data/4/4customer_target_mappings.csv', 'bdc2324-data/4/4customersplus.csv', 'bdc2324-data/4/4event_types.csv', 'bdc2324-data/4/4events.csv', 'bdc2324-data/4/4facilities.csv', 'bdc2324-data/4/4link_stats.csv', 'bdc2324-data/4/4pricing_formulas.csv', 'bdc2324-data/4/4product_packs.csv', 'bdc2324-data/4/4products.csv', 'bdc2324-data/4/4products_groups.csv', 'bdc2324-data/4/4purchases.csv', 'bdc2324-data/4/4representation_category_capacities.csv', 'bdc2324-data/4/4representations.csv', 'bdc2324-data/4/4seasons.csv', 'bdc2324-data/4/4structure_tag_mappings.csv', 'bdc2324-data/4/4suppliers.csv', 'bdc2324-data/4/4tags.csv', 'bdc2324-data/4/4target_types.csv', 'bdc2324-data/4/4targets.csv', 'bdc2324-data/4/4tickets.csv', 'bdc2324-data/4/4type_of_pricing_formulas.csv', 'bdc2324-data/4/4type_ofs.csv']\n",
- "bdc2324-data/5\n",
- "['bdc2324-data/5/5campaign_stats.csv', 'bdc2324-data/5/5campaigns.csv', 'bdc2324-data/5/5categories.csv', 'bdc2324-data/5/5consumptions.csv', 'bdc2324-data/5/5countries.csv', 'bdc2324-data/5/5currencies.csv', 'bdc2324-data/5/5customer_target_mappings.csv', 'bdc2324-data/5/5customersplus.csv', 'bdc2324-data/5/5event_types.csv', 'bdc2324-data/5/5events.csv', 'bdc2324-data/5/5facilities.csv', 'bdc2324-data/5/5link_stats.csv', 'bdc2324-data/5/5pricing_formulas.csv', 'bdc2324-data/5/5product_packs.csv', 'bdc2324-data/5/5products.csv', 'bdc2324-data/5/5products_groups.csv', 'bdc2324-data/5/5purchases.csv', 'bdc2324-data/5/5representation_category_capacities.csv', 'bdc2324-data/5/5representations.csv', 'bdc2324-data/5/5seasons.csv', 'bdc2324-data/5/5suppliers.csv', 'bdc2324-data/5/5target_types.csv', 'bdc2324-data/5/5targets.csv', 'bdc2324-data/5/5tickets.csv']\n",
- "bdc2324-data/6\n",
- "['bdc2324-data/6/6campaign_stats.csv', 'bdc2324-data/6/6campaigns.csv', 'bdc2324-data/6/6categories.csv', 'bdc2324-data/6/6consumptions.csv', 'bdc2324-data/6/6countries.csv', 'bdc2324-data/6/6currencies.csv', 'bdc2324-data/6/6customer_target_mappings.csv', 'bdc2324-data/6/6customersplus.csv', 'bdc2324-data/6/6event_types.csv', 'bdc2324-data/6/6events.csv', 'bdc2324-data/6/6facilities.csv', 'bdc2324-data/6/6link_stats.csv', 'bdc2324-data/6/6pricing_formulas.csv', 'bdc2324-data/6/6product_packs.csv', 'bdc2324-data/6/6products.csv', 'bdc2324-data/6/6products_groups.csv', 'bdc2324-data/6/6purchases.csv', 'bdc2324-data/6/6representation_category_capacities.csv', 'bdc2324-data/6/6representations.csv', 'bdc2324-data/6/6seasons.csv', 'bdc2324-data/6/6structure_tag_mappings.csv', 'bdc2324-data/6/6suppliers.csv', 'bdc2324-data/6/6tags.csv', 'bdc2324-data/6/6target_types.csv', 'bdc2324-data/6/6targets.csv', 'bdc2324-data/6/6tickets.csv', 'bdc2324-data/6/6type_of_pricing_formulas.csv', 'bdc2324-data/6/6type_ofs.csv']\n",
- "bdc2324-data/7\n",
- "['bdc2324-data/7/7campaign_stats.csv', 'bdc2324-data/7/7campaigns.csv', 'bdc2324-data/7/7categories.csv', 'bdc2324-data/7/7consumptions.csv', 'bdc2324-data/7/7countries.csv', 'bdc2324-data/7/7currencies.csv', 'bdc2324-data/7/7customer_target_mappings.csv', 'bdc2324-data/7/7customersplus.csv', 'bdc2324-data/7/7event_types.csv', 'bdc2324-data/7/7events.csv', 'bdc2324-data/7/7facilities.csv', 'bdc2324-data/7/7link_stats.csv', 'bdc2324-data/7/7pricing_formulas.csv', 'bdc2324-data/7/7product_packs.csv', 'bdc2324-data/7/7products.csv', 'bdc2324-data/7/7products_groups.csv', 'bdc2324-data/7/7purchases.csv', 'bdc2324-data/7/7representation_category_capacities.csv', 'bdc2324-data/7/7representation_types.csv', 'bdc2324-data/7/7representations.csv', 'bdc2324-data/7/7seasons.csv', 'bdc2324-data/7/7structure_tag_mappings.csv', 'bdc2324-data/7/7suppliers.csv', 'bdc2324-data/7/7tags.csv', 'bdc2324-data/7/7target_types.csv', 'bdc2324-data/7/7targets.csv', 'bdc2324-data/7/7tickets.csv', 'bdc2324-data/7/7type_of_categories.csv', 'bdc2324-data/7/7type_of_pricing_formulas.csv', 'bdc2324-data/7/7type_ofs.csv']\n",
- "bdc2324-data/8\n",
- "['bdc2324-data/8/8campaign_stats.csv', 'bdc2324-data/8/8campaigns.csv', 'bdc2324-data/8/8categories.csv', 'bdc2324-data/8/8countries.csv', 'bdc2324-data/8/8currencies.csv', 'bdc2324-data/8/8customer_target_mappings.csv', 'bdc2324-data/8/8customersplus.csv', 'bdc2324-data/8/8event_types.csv', 'bdc2324-data/8/8events.csv', 'bdc2324-data/8/8facilities.csv', 'bdc2324-data/8/8link_stats.csv', 'bdc2324-data/8/8pricing_formulas.csv', 'bdc2324-data/8/8product_packs.csv', 'bdc2324-data/8/8products.csv', 'bdc2324-data/8/8products_groups.csv', 'bdc2324-data/8/8purchases.csv', 'bdc2324-data/8/8representation_category_capacities.csv', 'bdc2324-data/8/8representations.csv', 'bdc2324-data/8/8seasons.csv', 'bdc2324-data/8/8suppliers.csv', 'bdc2324-data/8/8target_types.csv', 'bdc2324-data/8/8targets.csv', 'bdc2324-data/8/8tickets.csv', 'bdc2324-data/8/8type_of_categories.csv', 'bdc2324-data/8/8type_of_pricing_formulas.csv', 'bdc2324-data/8/8type_ofs.csv']\n",
- "bdc2324-data/9\n",
- "['bdc2324-data/9/9campaign_stats.csv', 'bdc2324-data/9/9campaigns.csv', 'bdc2324-data/9/9categories.csv', 'bdc2324-data/9/9countries.csv', 'bdc2324-data/9/9currencies.csv', 'bdc2324-data/9/9customer_target_mappings.csv', 'bdc2324-data/9/9customersplus.csv', 'bdc2324-data/9/9event_types.csv', 'bdc2324-data/9/9events.csv', 'bdc2324-data/9/9facilities.csv', 'bdc2324-data/9/9link_stats.csv', 'bdc2324-data/9/9pricing_formulas.csv', 'bdc2324-data/9/9product_packs.csv', 'bdc2324-data/9/9products.csv', 'bdc2324-data/9/9products_groups.csv', 'bdc2324-data/9/9purchases.csv', 'bdc2324-data/9/9representation_category_capacities.csv', 'bdc2324-data/9/9representations.csv', 'bdc2324-data/9/9seasons.csv', 'bdc2324-data/9/9suppliers.csv', 'bdc2324-data/9/9target_types.csv', 'bdc2324-data/9/9targets.csv', 'bdc2324-data/9/9tickets.csv']\n",
- "bdc2324-data/10\n",
- "['bdc2324-data/10/10campaign_stats.csv', 'bdc2324-data/10/10campaigns.csv', 'bdc2324-data/10/10categories.csv', 'bdc2324-data/10/10countries.csv', 'bdc2324-data/10/10currencies.csv', 'bdc2324-data/10/10customer_target_mappings.csv', 'bdc2324-data/10/10customersplus.csv', 'bdc2324-data/10/10event_types.csv', 'bdc2324-data/10/10events.csv', 'bdc2324-data/10/10facilities.csv', 'bdc2324-data/10/10link_stats.csv', 'bdc2324-data/10/10pricing_formulas.csv', 'bdc2324-data/10/10product_packs.csv', 'bdc2324-data/10/10products.csv', 'bdc2324-data/10/10products_groups.csv', 'bdc2324-data/10/10purchases.csv', 'bdc2324-data/10/10representation_category_capacities.csv', 'bdc2324-data/10/10representation_types.csv', 'bdc2324-data/10/10representations.csv', 'bdc2324-data/10/10seasons.csv', 'bdc2324-data/10/10suppliers.csv', 'bdc2324-data/10/10tags.csv', 'bdc2324-data/10/10target_types.csv', 'bdc2324-data/10/10targets.csv', 'bdc2324-data/10/10tickets.csv', 'bdc2324-data/10/10type_of_pricing_formulas.csv', 'bdc2324-data/10/10type_ofs.csv']\n",
- "bdc2324-data/11\n",
- "['bdc2324-data/11/11campaign_stats.csv', 'bdc2324-data/11/11campaigns.csv', 'bdc2324-data/11/11categories.csv', 'bdc2324-data/11/11countries.csv', 'bdc2324-data/11/11currencies.csv', 'bdc2324-data/11/11customer_target_mappings.csv', 'bdc2324-data/11/11customersplus.csv', 'bdc2324-data/11/11event_types.csv', 'bdc2324-data/11/11events.csv', 'bdc2324-data/11/11facilities.csv', 'bdc2324-data/11/11link_stats.csv', 'bdc2324-data/11/11pricing_formulas.csv', 'bdc2324-data/11/11product_packs.csv', 'bdc2324-data/11/11products.csv', 'bdc2324-data/11/11products_groups.csv', 'bdc2324-data/11/11purchases.csv', 'bdc2324-data/11/11representation_category_capacities.csv', 'bdc2324-data/11/11representations.csv', 'bdc2324-data/11/11seasons.csv', 'bdc2324-data/11/11structure_tag_mappings.csv', 'bdc2324-data/11/11suppliers.csv', 'bdc2324-data/11/11tags.csv', 'bdc2324-data/11/11target_types.csv', 'bdc2324-data/11/11targets.csv', 'bdc2324-data/11/11tickets.csv']\n",
- "bdc2324-data/12\n",
- "['bdc2324-data/12/12campaign_stats.csv', 'bdc2324-data/12/12campaigns.csv', 'bdc2324-data/12/12categories.csv', 'bdc2324-data/12/12consumptions.csv', 'bdc2324-data/12/12countries.csv', 'bdc2324-data/12/12currencies.csv', 'bdc2324-data/12/12customer_target_mappings.csv', 'bdc2324-data/12/12customersplus.csv', 'bdc2324-data/12/12event_types.csv', 'bdc2324-data/12/12events.csv', 'bdc2324-data/12/12facilities.csv', 'bdc2324-data/12/12link_stats.csv', 'bdc2324-data/12/12pricing_formulas.csv', 'bdc2324-data/12/12product_packs.csv', 'bdc2324-data/12/12products.csv', 'bdc2324-data/12/12products_groups.csv', 'bdc2324-data/12/12purchases.csv', 'bdc2324-data/12/12representation_category_capacities.csv', 'bdc2324-data/12/12representations.csv', 'bdc2324-data/12/12seasons.csv', 'bdc2324-data/12/12suppliers.csv', 'bdc2324-data/12/12target_types.csv', 'bdc2324-data/12/12targets.csv', 'bdc2324-data/12/12tickets.csv', 'bdc2324-data/12/12type_ofs.csv']\n",
- "bdc2324-data/13\n",
- "['bdc2324-data/13/13campaign_stats.csv', 'bdc2324-data/13/13campaigns.csv', 'bdc2324-data/13/13categories.csv', 'bdc2324-data/13/13countries.csv', 'bdc2324-data/13/13currencies.csv', 'bdc2324-data/13/13customer_target_mappings.csv', 'bdc2324-data/13/13customersplus.csv', 'bdc2324-data/13/13event_types.csv', 'bdc2324-data/13/13events.csv', 'bdc2324-data/13/13facilities.csv', 'bdc2324-data/13/13link_stats.csv', 'bdc2324-data/13/13pricing_formulas.csv', 'bdc2324-data/13/13product_packs.csv', 'bdc2324-data/13/13products.csv', 'bdc2324-data/13/13products_groups.csv', 'bdc2324-data/13/13purchases.csv', 'bdc2324-data/13/13representation_category_capacities.csv', 'bdc2324-data/13/13representation_types.csv', 'bdc2324-data/13/13representations.csv', 'bdc2324-data/13/13seasons.csv', 'bdc2324-data/13/13structure_tag_mappings.csv', 'bdc2324-data/13/13suppliers.csv', 'bdc2324-data/13/13tags.csv', 'bdc2324-data/13/13target_types.csv', 'bdc2324-data/13/13targets.csv', 'bdc2324-data/13/13tickets.csv']\n",
- "bdc2324-data/14\n",
- "['bdc2324-data/14/14campaign_stats.csv', 'bdc2324-data/14/14campaigns.csv', 'bdc2324-data/14/14categories.csv', 'bdc2324-data/14/14countries.csv', 'bdc2324-data/14/14currencies.csv', 'bdc2324-data/14/14customer_target_mappings.csv', 'bdc2324-data/14/14customersplus.csv', 'bdc2324-data/14/14event_types.csv', 'bdc2324-data/14/14events.csv', 'bdc2324-data/14/14facilities.csv', 'bdc2324-data/14/14link_stats.csv', 'bdc2324-data/14/14pricing_formulas.csv', 'bdc2324-data/14/14product_packs.csv', 'bdc2324-data/14/14products.csv', 'bdc2324-data/14/14products_groups.csv', 'bdc2324-data/14/14purchases.csv', 'bdc2324-data/14/14representation_category_capacities.csv', 'bdc2324-data/14/14representation_types.csv', 'bdc2324-data/14/14representations.csv', 'bdc2324-data/14/14seasons.csv', 'bdc2324-data/14/14suppliers.csv', 'bdc2324-data/14/14target_types.csv', 'bdc2324-data/14/14targets.csv', 'bdc2324-data/14/14tickets.csv', 'bdc2324-data/14/14type_of_categories.csv', 'bdc2324-data/14/14type_of_pricing_formulas.csv', 'bdc2324-data/14/14type_ofs.csv']\n",
- "bdc2324-data/101\n",
- "['bdc2324-data/101/101campaign_stats.csv', 'bdc2324-data/101/101campaigns.csv', 'bdc2324-data/101/101categories.csv', 'bdc2324-data/101/101contribution_sites.csv', 'bdc2324-data/101/101contributions.csv', 'bdc2324-data/101/101countries.csv', 'bdc2324-data/101/101currencies.csv', 'bdc2324-data/101/101customer_target_mappings.csv', 'bdc2324-data/101/101customersplus.csv', 'bdc2324-data/101/101event_types.csv', 'bdc2324-data/101/101events.csv', 'bdc2324-data/101/101facilities.csv', 'bdc2324-data/101/101link_stats.csv', 'bdc2324-data/101/101pricing_formulas.csv', 'bdc2324-data/101/101product_packs.csv', 'bdc2324-data/101/101products.csv', 'bdc2324-data/101/101products_groups.csv', 'bdc2324-data/101/101purchases.csv', 'bdc2324-data/101/101representation_category_capacities.csv', 'bdc2324-data/101/101representations.csv', 'bdc2324-data/101/101seasons.csv', 'bdc2324-data/101/101structure_tag_mappings.csv', 'bdc2324-data/101/101suppliers.csv', 'bdc2324-data/101/101tags.csv', 'bdc2324-data/101/101target_types.csv', 'bdc2324-data/101/101targets.csv', 'bdc2324-data/101/101tickets.csv', 'bdc2324-data/101/101tickets_1.csv', 'bdc2324-data/101/101type_of_pricing_formulas.csv', 'bdc2324-data/101/101type_ofs.csv']\n"
- ]
- }
- ],
- "source": [
- "# Liste des jeu de données par dossier\n",
- "for i in range(1, 15):\n",
- " FILE_PATH_S3 = BUCKET + \"/\" + str(i)\n",
- " print(FILE_PATH_S3)\n",
- " print(fs.ls(FILE_PATH_S3))\n",
- "print(BUCKET + \"/101\")\n",
- "print(fs.ls(BUCKET + \"/101\"))"
- ]
- },
{
"cell_type": "code",
"execution_count": 4,
@@ -416,6 +363,440 @@
"source": [
"pd.DataFrame(customers_plus_1.isna().mean()*100)"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "6f6ce60d-0912-497d-9108-330acccef394",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Chargement de toutes les données\n",
+ "liste_base = ['customer_target_mappings', 'customersplus', 'target_types', 'tags', 'events', 'tickets', 'representations', 'purchases', 'products']\n",
+ "\n",
+ "for nom_base in liste_base:\n",
+ " FILE_PATH_S3 = 'bdc2324-data/11/11' + nom_base + '.csv'\n",
+ " with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
+ " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "fa8ee17d-5092-40ac-8a0a-3790b016dd4e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " lastname | \n",
+ " firstname | \n",
+ " birthdate | \n",
+ " email | \n",
+ " street_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " civility | \n",
+ " is_partner | \n",
+ " ... | \n",
+ " tenant_id | \n",
+ " id_x | \n",
+ " customer_id | \n",
+ " purchase_date | \n",
+ " type_of | \n",
+ " is_from_subscription | \n",
+ " amount | \n",
+ " is_full_price | \n",
+ " start_date_time | \n",
+ " event_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 405082 | \n",
+ " lastname405082 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 6 | \n",
+ " 2023-01-12 06:30:31.197484+01:00 | \n",
+ " 2023-01-12 06:30:31.197484+01:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " 1556 | \n",
+ " 992423 | \n",
+ " 405082 | \n",
+ " 2023-01-11 17:08:41+01:00 | \n",
+ " 3 | \n",
+ " False | \n",
+ " 13.0 | \n",
+ " False | \n",
+ " 2023-02-06 20:00:00+01:00 | \n",
+ " zaide | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 405082 | \n",
+ " lastname405082 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 6 | \n",
+ " 2023-01-12 06:30:31.197484+01:00 | \n",
+ " 2023-01-12 06:30:31.197484+01:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " 1556 | \n",
+ " 992423 | \n",
+ " 405082 | \n",
+ " 2023-01-11 17:08:41+01:00 | \n",
+ " 3 | \n",
+ " False | \n",
+ " 13.0 | \n",
+ " False | \n",
+ " 2023-02-06 20:00:00+01:00 | \n",
+ " zaide | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 411168 | \n",
+ " lastname411168 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 6 | \n",
+ " 2023-03-17 06:30:35.431967+01:00 | \n",
+ " 2023-03-17 06:30:35.431967+01:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " 1556 | \n",
+ " 1053934 | \n",
+ " 411168 | \n",
+ " 2023-03-16 16:23:10+01:00 | \n",
+ " 3 | \n",
+ " False | \n",
+ " 62.0 | \n",
+ " False | \n",
+ " 2023-03-19 16:00:00+01:00 | \n",
+ " luisa miller | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 411168 | \n",
+ " lastname411168 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 6 | \n",
+ " 2023-03-17 06:30:35.431967+01:00 | \n",
+ " 2023-03-17 06:30:35.431967+01:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " 1556 | \n",
+ " 1053934 | \n",
+ " 411168 | \n",
+ " 2023-03-16 16:23:10+01:00 | \n",
+ " 3 | \n",
+ " False | \n",
+ " 62.0 | \n",
+ " False | \n",
+ " 2023-03-19 16:00:00+01:00 | \n",
+ " luisa miller | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4380 | \n",
+ " lastname4380 | \n",
+ " firstname4380 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 2021-04-22 14:51:55.432952+02:00 | \n",
+ " 2022-04-14 11:41:33.738500+02:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " 1556 | \n",
+ " 1189141 | \n",
+ " 4380 | \n",
+ " 2020-11-26 13:12:53+01:00 | \n",
+ " 3 | \n",
+ " False | \n",
+ " 51.3 | \n",
+ " False | \n",
+ " 2020-12-01 20:00:00+01:00 | \n",
+ " iphigenie en tauride | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 318964 | \n",
+ " 19095 | \n",
+ " lastname19095 | \n",
+ " firstname19095 | \n",
+ " 1979-07-16 | \n",
+ " email19095 | \n",
+ " 6 | \n",
+ " 2021-04-22 15:06:30.120537+02:00 | \n",
+ " 2023-09-12 18:27:36.904104+02:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " 1556 | \n",
+ " 1090839 | \n",
+ " 19095 | \n",
+ " 2019-05-19 21:18:36+02:00 | \n",
+ " 1 | \n",
+ " False | \n",
+ " 4.5 | \n",
+ " False | \n",
+ " 2019-05-27 20:00:00+02:00 | \n",
+ " entre femmes | \n",
+ "
\n",
+ " \n",
+ " | 318965 | \n",
+ " 19095 | \n",
+ " lastname19095 | \n",
+ " firstname19095 | \n",
+ " 1979-07-16 | \n",
+ " email19095 | \n",
+ " 6 | \n",
+ " 2021-04-22 15:06:30.120537+02:00 | \n",
+ " 2023-09-12 18:27:36.904104+02:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " 1556 | \n",
+ " 1090839 | \n",
+ " 19095 | \n",
+ " 2019-05-19 21:18:36+02:00 | \n",
+ " 1 | \n",
+ " False | \n",
+ " 4.5 | \n",
+ " False | \n",
+ " 2019-05-27 20:00:00+02:00 | \n",
+ " entre femmes | \n",
+ "
\n",
+ " \n",
+ " | 318966 | \n",
+ " 19095 | \n",
+ " lastname19095 | \n",
+ " firstname19095 | \n",
+ " 1979-07-16 | \n",
+ " email19095 | \n",
+ " 6 | \n",
+ " 2021-04-22 15:06:30.120537+02:00 | \n",
+ " 2023-09-12 18:27:36.904104+02:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " 1556 | \n",
+ " 1090839 | \n",
+ " 19095 | \n",
+ " 2019-05-19 21:18:36+02:00 | \n",
+ " 1 | \n",
+ " False | \n",
+ " 4.5 | \n",
+ " False | \n",
+ " 2019-05-27 20:00:00+02:00 | \n",
+ " entre femmes | \n",
+ "
\n",
+ " \n",
+ " | 318967 | \n",
+ " 19095 | \n",
+ " lastname19095 | \n",
+ " firstname19095 | \n",
+ " 1979-07-16 | \n",
+ " email19095 | \n",
+ " 6 | \n",
+ " 2021-04-22 15:06:30.120537+02:00 | \n",
+ " 2023-09-12 18:27:36.904104+02:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " 1556 | \n",
+ " 1244277 | \n",
+ " 19095 | \n",
+ " 2019-12-31 11:04:07+01:00 | \n",
+ " 1 | \n",
+ " False | \n",
+ " 5.5 | \n",
+ " False | \n",
+ " 2020-02-03 20:00:00+01:00 | \n",
+ " a boire et a manger | \n",
+ "
\n",
+ " \n",
+ " | 318968 | \n",
+ " 19095 | \n",
+ " lastname19095 | \n",
+ " firstname19095 | \n",
+ " 1979-07-16 | \n",
+ " email19095 | \n",
+ " 6 | \n",
+ " 2021-04-22 15:06:30.120537+02:00 | \n",
+ " 2023-09-12 18:27:36.904104+02:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " 1556 | \n",
+ " 1244277 | \n",
+ " 19095 | \n",
+ " 2019-12-31 11:04:07+01:00 | \n",
+ " 1 | \n",
+ " False | \n",
+ " 5.5 | \n",
+ " False | \n",
+ " 2020-02-03 20:00:00+01:00 | \n",
+ " a boire et a manger | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
318969 rows × 52 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id lastname firstname birthdate email \\\n",
+ "0 405082 lastname405082 NaN NaN NaN \n",
+ "1 405082 lastname405082 NaN NaN NaN \n",
+ "2 411168 lastname411168 NaN NaN NaN \n",
+ "3 411168 lastname411168 NaN NaN NaN \n",
+ "4 4380 lastname4380 firstname4380 NaN NaN \n",
+ "... ... ... ... ... ... \n",
+ "318964 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
+ "318965 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
+ "318966 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
+ "318967 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
+ "318968 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
+ "\n",
+ " street_id created_at \\\n",
+ "0 6 2023-01-12 06:30:31.197484+01:00 \n",
+ "1 6 2023-01-12 06:30:31.197484+01:00 \n",
+ "2 6 2023-03-17 06:30:35.431967+01:00 \n",
+ "3 6 2023-03-17 06:30:35.431967+01:00 \n",
+ "4 1 2021-04-22 14:51:55.432952+02:00 \n",
+ "... ... ... \n",
+ "318964 6 2021-04-22 15:06:30.120537+02:00 \n",
+ "318965 6 2021-04-22 15:06:30.120537+02:00 \n",
+ "318966 6 2021-04-22 15:06:30.120537+02:00 \n",
+ "318967 6 2021-04-22 15:06:30.120537+02:00 \n",
+ "318968 6 2021-04-22 15:06:30.120537+02:00 \n",
+ "\n",
+ " updated_at civility is_partner ... \\\n",
+ "0 2023-01-12 06:30:31.197484+01:00 NaN False ... \n",
+ "1 2023-01-12 06:30:31.197484+01:00 NaN False ... \n",
+ "2 2023-03-17 06:30:35.431967+01:00 NaN False ... \n",
+ "3 2023-03-17 06:30:35.431967+01:00 NaN False ... \n",
+ "4 2022-04-14 11:41:33.738500+02:00 NaN False ... \n",
+ "... ... ... ... ... \n",
+ "318964 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
+ "318965 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
+ "318966 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
+ "318967 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
+ "318968 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
+ "\n",
+ " tenant_id id_x customer_id purchase_date type_of \\\n",
+ "0 1556 992423 405082 2023-01-11 17:08:41+01:00 3 \n",
+ "1 1556 992423 405082 2023-01-11 17:08:41+01:00 3 \n",
+ "2 1556 1053934 411168 2023-03-16 16:23:10+01:00 3 \n",
+ "3 1556 1053934 411168 2023-03-16 16:23:10+01:00 3 \n",
+ "4 1556 1189141 4380 2020-11-26 13:12:53+01:00 3 \n",
+ "... ... ... ... ... ... \n",
+ "318964 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
+ "318965 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
+ "318966 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
+ "318967 1556 1244277 19095 2019-12-31 11:04:07+01:00 1 \n",
+ "318968 1556 1244277 19095 2019-12-31 11:04:07+01:00 1 \n",
+ "\n",
+ " is_from_subscription amount is_full_price start_date_time \\\n",
+ "0 False 13.0 False 2023-02-06 20:00:00+01:00 \n",
+ "1 False 13.0 False 2023-02-06 20:00:00+01:00 \n",
+ "2 False 62.0 False 2023-03-19 16:00:00+01:00 \n",
+ "3 False 62.0 False 2023-03-19 16:00:00+01:00 \n",
+ "4 False 51.3 False 2020-12-01 20:00:00+01:00 \n",
+ "... ... ... ... ... \n",
+ "318964 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
+ "318965 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
+ "318966 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
+ "318967 False 5.5 False 2020-02-03 20:00:00+01:00 \n",
+ "318968 False 5.5 False 2020-02-03 20:00:00+01:00 \n",
+ "\n",
+ " event_name \n",
+ "0 zaide \n",
+ "1 zaide \n",
+ "2 luisa miller \n",
+ "3 luisa miller \n",
+ "4 iphigenie en tauride \n",
+ "... ... \n",
+ "318964 entre femmes \n",
+ "318965 entre femmes \n",
+ "318966 entre femmes \n",
+ "318967 a boire et a manger \n",
+ "318968 a boire et a manger \n",
+ "\n",
+ "[318969 rows x 52 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Jointure\n",
+ "merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']]\n",
+ "merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[['id_x', 'customer_id', 'representation_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price']]\n",
+ "merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[['id_x', 'customer_id', 'event_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time']]\n",
+ "merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[['id_x', 'customer_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'name']]\n",
+ "merge_4 = merge_4.rename(columns={'name': 'event_name'})\n",
+ "df_customer_event = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = 'inner')[['id_x', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'event_name']]\n",
+ "df_customer_event"
+ ]
}
],
"metadata": {