update
This commit is contained in:
parent
f25b5b7922
commit
78664094c5
|
@ -1,6 +1,823 @@
|
||||||
{
|
{
|
||||||
"cells": [],
|
"cells": [
|
||||||
"metadata": {},
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "aa74dbe0-f974-4b5c-94f4-4dba9fbc64fa",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Business Data Challenge - Team 1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "94c498e7-7c50-45f9-b3f4-a1ab19b7ccc4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "7a3b50ac-b1ff-4f3d-9938-e048fdc8e027",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Configuration de l'accès aux données"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "0b029d42-fb02-481e-a407-7e41886198a6",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"['bdc2324-data/1',\n",
|
||||||
|
" 'bdc2324-data/10',\n",
|
||||||
|
" 'bdc2324-data/101',\n",
|
||||||
|
" 'bdc2324-data/11',\n",
|
||||||
|
" 'bdc2324-data/12',\n",
|
||||||
|
" 'bdc2324-data/13',\n",
|
||||||
|
" 'bdc2324-data/14',\n",
|
||||||
|
" 'bdc2324-data/2',\n",
|
||||||
|
" 'bdc2324-data/3',\n",
|
||||||
|
" 'bdc2324-data/4',\n",
|
||||||
|
" 'bdc2324-data/5',\n",
|
||||||
|
" 'bdc2324-data/6',\n",
|
||||||
|
" 'bdc2324-data/7',\n",
|
||||||
|
" 'bdc2324-data/8',\n",
|
||||||
|
" 'bdc2324-data/9']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import s3fs\n",
|
||||||
|
"# Create filesystem object\n",
|
||||||
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||||||
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||||||
|
"\n",
|
||||||
|
"BUCKET = \"bdc2324-data\"\n",
|
||||||
|
"fs.ls(BUCKET)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "fbaf9aa7-ff70-4dbe-a969-b801c593510b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Chargement des fichiers campaign_stats.csv\n",
|
||||||
|
"FILE_PATH_S3 = 'bdc2324-data/1/1campaign_stats.csv'\n",
|
||||||
|
"\n",
|
||||||
|
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||||
|
" campaign_stats_1 = pd.read_csv(file_in, sep=\",\")\n",
|
||||||
|
"\n",
|
||||||
|
"FILE_PATH_S3 = 'bdc2324-data/2/2campaign_stats.csv'\n",
|
||||||
|
"\n",
|
||||||
|
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||||
|
" campaign_stats_2 = pd.read_csv(file_in, sep=\",\")\n",
|
||||||
|
"\n",
|
||||||
|
"FILE_PATH_S3 = 'bdc2324-data/3/3campaign_stats.csv'\n",
|
||||||
|
"\n",
|
||||||
|
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||||
|
" campaign_stats_3 = pd.read_csv(file_in, sep=\",\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "1e0418bc-8e97-4a04-b7f3-bda3bef7d36e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Conversion des dates 'sent_at'\n",
|
||||||
|
"campaign_stats_1['sent_at'] = pd.to_datetime(campaign_stats_1['sent_at'], format = 'ISO8601', utc = True)\n",
|
||||||
|
"campaign_stats_2['sent_at'] = pd.to_datetime(campaign_stats_2['sent_at'], format = 'ISO8601', utc = True)\n",
|
||||||
|
"campaign_stats_3['sent_at'] = pd.to_datetime(campaign_stats_3['sent_at'], format = 'ISO8601', utc = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "cc5c20ba-e827-4e5a-97a5-7f3947e0621c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"2023-11-09 18:10:45+00:00\n",
|
||||||
|
"2020-06-02 08:24:08+00:00\n",
|
||||||
|
"2023-10-12 01:39:48+00:00\n",
|
||||||
|
"2023-10-10 17:06:29+00:00\n",
|
||||||
|
"2023-11-01 09:20:48+00:00\n",
|
||||||
|
"2021-03-31 14:59:02+00:00\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Chaque unites correspond à une période ? --> Non, les dossiers ont juste pour but de réduire la taille des fichiers\n",
|
||||||
|
"print(campaign_stats_1['sent_at'].max())\n",
|
||||||
|
"print(campaign_stats_1['sent_at'].min())\n",
|
||||||
|
"\n",
|
||||||
|
"print(campaign_stats_2['sent_at'].max())\n",
|
||||||
|
"print(campaign_stats_2['sent_at'].min())\n",
|
||||||
|
"\n",
|
||||||
|
"print(campaign_stats_3['sent_at'].max())\n",
|
||||||
|
"print(campaign_stats_3['sent_at'].min())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "c75632df-b018-4bb8-a99d-83f15af94369",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"0 2021-03-28 16:01:09+00:00\n",
|
||||||
|
"1 2021-03-28 16:01:09+00:00\n",
|
||||||
|
"2 2021-03-28 16:00:59+00:00\n",
|
||||||
|
"3 2021-03-28 16:00:59+00:00\n",
|
||||||
|
"4 2021-03-28 16:01:06+00:00\n",
|
||||||
|
" ... \n",
|
||||||
|
"6214803 2023-10-23 09:32:33+00:00\n",
|
||||||
|
"6214804 2023-10-23 09:32:49+00:00\n",
|
||||||
|
"6214805 2023-10-23 09:33:28+00:00\n",
|
||||||
|
"6214806 2023-10-23 09:31:53+00:00\n",
|
||||||
|
"6214807 2023-10-23 09:33:54+00:00\n",
|
||||||
|
"Name: sent_at, Length: 6214808, dtype: datetime64[ns, UTC]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"campaign_stats_1['sent_at']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "f4c0c63e-0418-4cfe-a57d-7af57bca0c22",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Customersplus.csv"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "d3bf880d-1065-4d5b-9954-1830aa5081af",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/tmp/ipykernel_1362/4118060109.py:9: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||||||
|
" customers_plus_2 = pd.read_csv(file_in, sep=\",\")\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"FILE_PATH_S3 = 'bdc2324-data/1/1customersplus.csv'\n",
|
||||||
|
"\n",
|
||||||
|
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||||
|
" customers_plus_1 = pd.read_csv(file_in, sep=\",\")\n",
|
||||||
|
"\n",
|
||||||
|
"FILE_PATH_S3 = 'bdc2324-data/2/2customersplus.csv'\n",
|
||||||
|
"\n",
|
||||||
|
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||||
|
" customers_plus_2 = pd.read_csv(file_in, sep=\",\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"id": "7368f381-db8e-4a4d-9fe2-5947eb55be58",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"Index(['id', 'lastname', 'firstname', 'birthdate', 'email', 'street_id',\n",
|
||||||
|
" 'created_at', 'updated_at', 'civility', 'is_partner', 'extra',\n",
|
||||||
|
" 'deleted_at', 'reference', 'gender', 'is_email_true', 'extra_field',\n",
|
||||||
|
" 'identifier', 'opt_in', 'structure_id', 'note', 'profession',\n",
|
||||||
|
" 'language', 'mcp_contact_id', 'need_reload', 'last_buying_date',\n",
|
||||||
|
" 'max_price', 'ticket_sum', 'average_price', 'fidelity',\n",
|
||||||
|
" 'average_purchase_delay', 'average_price_basket',\n",
|
||||||
|
" 'average_ticket_basket', 'total_price', 'preferred_category',\n",
|
||||||
|
" 'preferred_supplier', 'preferred_formula', 'purchase_count',\n",
|
||||||
|
" 'first_buying_date', 'last_visiting_date', 'zipcode', 'country', 'age',\n",
|
||||||
|
" 'tenant_id'],\n",
|
||||||
|
" dtype='object')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"customers_plus_1.columns"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "08091935-b159-47fa-806c-e1444f3b227e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"customers_plus_1.shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "9f8c8868-c1ac-4cee-af08-533d928f6764",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"customers_plus_1['id'].nunique()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "bf95daf2-4852-4718-b474-207a1ebd8ac4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"customers_plus_2['id'].nunique()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1425c385-3216-4e4f-ae8f-a121624721ba",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"common_id = set(customers_plus_2['id']).intersection(customers_plus_1['id'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 61,
|
||||||
|
"id": "92533026-e27c-4f1f-81ca-64eda32a34c0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 61,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"common_id = set(customers_plus_2['id']).intersection(customers_plus_1['id'])\n",
|
||||||
|
"# Exemple id commun = caractéristiques communes\n",
|
||||||
|
"print(customers_plus_2[customers_plus_2['id'] == list(common_id)[0]])\n",
|
||||||
|
"\n",
|
||||||
|
"print(customers_plus_1[customers_plus_1['id'] == list(common_id)[0]])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 49,
|
||||||
|
"id": "bf9ebc94-0ba6-443d-8e53-22477a6e79a7",
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"id 0.000000\n",
|
||||||
|
"lastname 43.461341\n",
|
||||||
|
"firstname 44.995588\n",
|
||||||
|
"birthdate 96.419870\n",
|
||||||
|
"email 8.622075\n",
|
||||||
|
"street_id 0.000000\n",
|
||||||
|
"created_at 0.000000\n",
|
||||||
|
"updated_at 0.000000\n",
|
||||||
|
"civility 100.000000\n",
|
||||||
|
"is_partner 0.000000\n",
|
||||||
|
"extra 100.000000\n",
|
||||||
|
"deleted_at 100.000000\n",
|
||||||
|
"reference 100.000000\n",
|
||||||
|
"gender 0.000000\n",
|
||||||
|
"is_email_true 0.000000\n",
|
||||||
|
"extra_field 100.000000\n",
|
||||||
|
"identifier 0.000000\n",
|
||||||
|
"opt_in 0.000000\n",
|
||||||
|
"structure_id 88.072380\n",
|
||||||
|
"note 99.403421\n",
|
||||||
|
"profession 95.913503\n",
|
||||||
|
"language 99.280945\n",
|
||||||
|
"mcp_contact_id 34.876141\n",
|
||||||
|
"need_reload 0.000000\n",
|
||||||
|
"last_buying_date 51.653431\n",
|
||||||
|
"max_price 51.653431\n",
|
||||||
|
"ticket_sum 0.000000\n",
|
||||||
|
"average_price 8.639195\n",
|
||||||
|
"fidelity 0.000000\n",
|
||||||
|
"average_purchase_delay 51.653431\n",
|
||||||
|
"average_price_basket 51.653431\n",
|
||||||
|
"average_ticket_basket 51.653431\n",
|
||||||
|
"total_price 43.014236\n",
|
||||||
|
"preferred_category 100.000000\n",
|
||||||
|
"preferred_supplier 100.000000\n",
|
||||||
|
"preferred_formula 100.000000\n",
|
||||||
|
"purchase_count 0.000000\n",
|
||||||
|
"first_buying_date 51.653431\n",
|
||||||
|
"last_visiting_date 100.000000\n",
|
||||||
|
"zipcode 71.176564\n",
|
||||||
|
"country 5.459418\n",
|
||||||
|
"age 96.419870\n",
|
||||||
|
"tenant_id 0.000000\n",
|
||||||
|
"dtype: float64\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"pd.DataFrame(customers_plus_1.isna().mean()*100)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"id": "6d62e73f-3925-490f-9fd4-d0e838903cb2",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Chargement de toutes les données\n",
|
||||||
|
"liste_base = ['customer_target_mappings', 'customersplus', 'target_types', 'tags', 'events', 'tickets', 'representations', 'purchases', 'products']\n",
|
||||||
|
"\n",
|
||||||
|
"for nom_base in liste_base:\n",
|
||||||
|
" FILE_PATH_S3 = 'bdc2324-data/11/11' + nom_base + '.csv'\n",
|
||||||
|
" with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||||
|
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"id": "12b24f1c-eb3e-45be-aaf3-b9273180caa3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>id</th>\n",
|
||||||
|
" <th>lastname</th>\n",
|
||||||
|
" <th>firstname</th>\n",
|
||||||
|
" <th>birthdate</th>\n",
|
||||||
|
" <th>email</th>\n",
|
||||||
|
" <th>street_id</th>\n",
|
||||||
|
" <th>created_at</th>\n",
|
||||||
|
" <th>updated_at</th>\n",
|
||||||
|
" <th>civility</th>\n",
|
||||||
|
" <th>is_partner</th>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <th>tenant_id</th>\n",
|
||||||
|
" <th>id_x</th>\n",
|
||||||
|
" <th>customer_id</th>\n",
|
||||||
|
" <th>purchase_date</th>\n",
|
||||||
|
" <th>type_of</th>\n",
|
||||||
|
" <th>is_from_subscription</th>\n",
|
||||||
|
" <th>amount</th>\n",
|
||||||
|
" <th>is_full_price</th>\n",
|
||||||
|
" <th>start_date_time</th>\n",
|
||||||
|
" <th>event_name</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>405082</td>\n",
|
||||||
|
" <td>lastname405082</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>2023-01-12 06:30:31.197484+01:00</td>\n",
|
||||||
|
" <td>2023-01-12 06:30:31.197484+01:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>1556</td>\n",
|
||||||
|
" <td>992423</td>\n",
|
||||||
|
" <td>405082</td>\n",
|
||||||
|
" <td>2023-01-11 17:08:41+01:00</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>13.0</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>2023-02-06 20:00:00+01:00</td>\n",
|
||||||
|
" <td>zaide</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>405082</td>\n",
|
||||||
|
" <td>lastname405082</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>2023-01-12 06:30:31.197484+01:00</td>\n",
|
||||||
|
" <td>2023-01-12 06:30:31.197484+01:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>1556</td>\n",
|
||||||
|
" <td>992423</td>\n",
|
||||||
|
" <td>405082</td>\n",
|
||||||
|
" <td>2023-01-11 17:08:41+01:00</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>13.0</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>2023-02-06 20:00:00+01:00</td>\n",
|
||||||
|
" <td>zaide</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>411168</td>\n",
|
||||||
|
" <td>lastname411168</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>2023-03-17 06:30:35.431967+01:00</td>\n",
|
||||||
|
" <td>2023-03-17 06:30:35.431967+01:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>1556</td>\n",
|
||||||
|
" <td>1053934</td>\n",
|
||||||
|
" <td>411168</td>\n",
|
||||||
|
" <td>2023-03-16 16:23:10+01:00</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>62.0</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>2023-03-19 16:00:00+01:00</td>\n",
|
||||||
|
" <td>luisa miller</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>411168</td>\n",
|
||||||
|
" <td>lastname411168</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>2023-03-17 06:30:35.431967+01:00</td>\n",
|
||||||
|
" <td>2023-03-17 06:30:35.431967+01:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>1556</td>\n",
|
||||||
|
" <td>1053934</td>\n",
|
||||||
|
" <td>411168</td>\n",
|
||||||
|
" <td>2023-03-16 16:23:10+01:00</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>62.0</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>2023-03-19 16:00:00+01:00</td>\n",
|
||||||
|
" <td>luisa miller</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>4380</td>\n",
|
||||||
|
" <td>lastname4380</td>\n",
|
||||||
|
" <td>firstname4380</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>2021-04-22 14:51:55.432952+02:00</td>\n",
|
||||||
|
" <td>2022-04-14 11:41:33.738500+02:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>1556</td>\n",
|
||||||
|
" <td>1189141</td>\n",
|
||||||
|
" <td>4380</td>\n",
|
||||||
|
" <td>2020-11-26 13:12:53+01:00</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>51.3</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>2020-12-01 20:00:00+01:00</td>\n",
|
||||||
|
" <td>iphigenie en tauride</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>318964</th>\n",
|
||||||
|
" <td>19095</td>\n",
|
||||||
|
" <td>lastname19095</td>\n",
|
||||||
|
" <td>firstname19095</td>\n",
|
||||||
|
" <td>1979-07-16</td>\n",
|
||||||
|
" <td>email19095</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
|
||||||
|
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>1556</td>\n",
|
||||||
|
" <td>1090839</td>\n",
|
||||||
|
" <td>19095</td>\n",
|
||||||
|
" <td>2019-05-19 21:18:36+02:00</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>4.5</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>2019-05-27 20:00:00+02:00</td>\n",
|
||||||
|
" <td>entre femmes</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>318965</th>\n",
|
||||||
|
" <td>19095</td>\n",
|
||||||
|
" <td>lastname19095</td>\n",
|
||||||
|
" <td>firstname19095</td>\n",
|
||||||
|
" <td>1979-07-16</td>\n",
|
||||||
|
" <td>email19095</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
|
||||||
|
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>1556</td>\n",
|
||||||
|
" <td>1090839</td>\n",
|
||||||
|
" <td>19095</td>\n",
|
||||||
|
" <td>2019-05-19 21:18:36+02:00</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>4.5</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>2019-05-27 20:00:00+02:00</td>\n",
|
||||||
|
" <td>entre femmes</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>318966</th>\n",
|
||||||
|
" <td>19095</td>\n",
|
||||||
|
" <td>lastname19095</td>\n",
|
||||||
|
" <td>firstname19095</td>\n",
|
||||||
|
" <td>1979-07-16</td>\n",
|
||||||
|
" <td>email19095</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
|
||||||
|
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>1556</td>\n",
|
||||||
|
" <td>1090839</td>\n",
|
||||||
|
" <td>19095</td>\n",
|
||||||
|
" <td>2019-05-19 21:18:36+02:00</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>4.5</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>2019-05-27 20:00:00+02:00</td>\n",
|
||||||
|
" <td>entre femmes</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>318967</th>\n",
|
||||||
|
" <td>19095</td>\n",
|
||||||
|
" <td>lastname19095</td>\n",
|
||||||
|
" <td>firstname19095</td>\n",
|
||||||
|
" <td>1979-07-16</td>\n",
|
||||||
|
" <td>email19095</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
|
||||||
|
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>1556</td>\n",
|
||||||
|
" <td>1244277</td>\n",
|
||||||
|
" <td>19095</td>\n",
|
||||||
|
" <td>2019-12-31 11:04:07+01:00</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>5.5</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>2020-02-03 20:00:00+01:00</td>\n",
|
||||||
|
" <td>a boire et a manger</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>318968</th>\n",
|
||||||
|
" <td>19095</td>\n",
|
||||||
|
" <td>lastname19095</td>\n",
|
||||||
|
" <td>firstname19095</td>\n",
|
||||||
|
" <td>1979-07-16</td>\n",
|
||||||
|
" <td>email19095</td>\n",
|
||||||
|
" <td>6</td>\n",
|
||||||
|
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
|
||||||
|
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>1556</td>\n",
|
||||||
|
" <td>1244277</td>\n",
|
||||||
|
" <td>19095</td>\n",
|
||||||
|
" <td>2019-12-31 11:04:07+01:00</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>5.5</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>2020-02-03 20:00:00+01:00</td>\n",
|
||||||
|
" <td>a boire et a manger</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>318969 rows × 52 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" id lastname firstname birthdate email \\\n",
|
||||||
|
"0 405082 lastname405082 NaN NaN NaN \n",
|
||||||
|
"1 405082 lastname405082 NaN NaN NaN \n",
|
||||||
|
"2 411168 lastname411168 NaN NaN NaN \n",
|
||||||
|
"3 411168 lastname411168 NaN NaN NaN \n",
|
||||||
|
"4 4380 lastname4380 firstname4380 NaN NaN \n",
|
||||||
|
"... ... ... ... ... ... \n",
|
||||||
|
"318964 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
|
||||||
|
"318965 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
|
||||||
|
"318966 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
|
||||||
|
"318967 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
|
||||||
|
"318968 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
|
||||||
|
"\n",
|
||||||
|
" street_id created_at \\\n",
|
||||||
|
"0 6 2023-01-12 06:30:31.197484+01:00 \n",
|
||||||
|
"1 6 2023-01-12 06:30:31.197484+01:00 \n",
|
||||||
|
"2 6 2023-03-17 06:30:35.431967+01:00 \n",
|
||||||
|
"3 6 2023-03-17 06:30:35.431967+01:00 \n",
|
||||||
|
"4 1 2021-04-22 14:51:55.432952+02:00 \n",
|
||||||
|
"... ... ... \n",
|
||||||
|
"318964 6 2021-04-22 15:06:30.120537+02:00 \n",
|
||||||
|
"318965 6 2021-04-22 15:06:30.120537+02:00 \n",
|
||||||
|
"318966 6 2021-04-22 15:06:30.120537+02:00 \n",
|
||||||
|
"318967 6 2021-04-22 15:06:30.120537+02:00 \n",
|
||||||
|
"318968 6 2021-04-22 15:06:30.120537+02:00 \n",
|
||||||
|
"\n",
|
||||||
|
" updated_at civility is_partner ... \\\n",
|
||||||
|
"0 2023-01-12 06:30:31.197484+01:00 NaN False ... \n",
|
||||||
|
"1 2023-01-12 06:30:31.197484+01:00 NaN False ... \n",
|
||||||
|
"2 2023-03-17 06:30:35.431967+01:00 NaN False ... \n",
|
||||||
|
"3 2023-03-17 06:30:35.431967+01:00 NaN False ... \n",
|
||||||
|
"4 2022-04-14 11:41:33.738500+02:00 NaN False ... \n",
|
||||||
|
"... ... ... ... ... \n",
|
||||||
|
"318964 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
|
||||||
|
"318965 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
|
||||||
|
"318966 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
|
||||||
|
"318967 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
|
||||||
|
"318968 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
|
||||||
|
"\n",
|
||||||
|
" tenant_id id_x customer_id purchase_date type_of \\\n",
|
||||||
|
"0 1556 992423 405082 2023-01-11 17:08:41+01:00 3 \n",
|
||||||
|
"1 1556 992423 405082 2023-01-11 17:08:41+01:00 3 \n",
|
||||||
|
"2 1556 1053934 411168 2023-03-16 16:23:10+01:00 3 \n",
|
||||||
|
"3 1556 1053934 411168 2023-03-16 16:23:10+01:00 3 \n",
|
||||||
|
"4 1556 1189141 4380 2020-11-26 13:12:53+01:00 3 \n",
|
||||||
|
"... ... ... ... ... ... \n",
|
||||||
|
"318964 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
|
||||||
|
"318965 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
|
||||||
|
"318966 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
|
||||||
|
"318967 1556 1244277 19095 2019-12-31 11:04:07+01:00 1 \n",
|
||||||
|
"318968 1556 1244277 19095 2019-12-31 11:04:07+01:00 1 \n",
|
||||||
|
"\n",
|
||||||
|
" is_from_subscription amount is_full_price start_date_time \\\n",
|
||||||
|
"0 False 13.0 False 2023-02-06 20:00:00+01:00 \n",
|
||||||
|
"1 False 13.0 False 2023-02-06 20:00:00+01:00 \n",
|
||||||
|
"2 False 62.0 False 2023-03-19 16:00:00+01:00 \n",
|
||||||
|
"3 False 62.0 False 2023-03-19 16:00:00+01:00 \n",
|
||||||
|
"4 False 51.3 False 2020-12-01 20:00:00+01:00 \n",
|
||||||
|
"... ... ... ... ... \n",
|
||||||
|
"318964 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
|
||||||
|
"318965 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
|
||||||
|
"318966 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
|
||||||
|
"318967 False 5.5 False 2020-02-03 20:00:00+01:00 \n",
|
||||||
|
"318968 False 5.5 False 2020-02-03 20:00:00+01:00 \n",
|
||||||
|
"\n",
|
||||||
|
" event_name \n",
|
||||||
|
"0 zaide \n",
|
||||||
|
"1 zaide \n",
|
||||||
|
"2 luisa miller \n",
|
||||||
|
"3 luisa miller \n",
|
||||||
|
"4 iphigenie en tauride \n",
|
||||||
|
"... ... \n",
|
||||||
|
"318964 entre femmes \n",
|
||||||
|
"318965 entre femmes \n",
|
||||||
|
"318966 entre femmes \n",
|
||||||
|
"318967 a boire et a manger \n",
|
||||||
|
"318968 a boire et a manger \n",
|
||||||
|
"\n",
|
||||||
|
"[318969 rows x 52 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Jointure\n",
|
||||||
|
"merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']]\n",
|
||||||
|
"merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[['id_x', 'customer_id', 'representation_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price']]\n",
|
||||||
|
"merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[['id_x', 'customer_id', 'event_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time']]\n",
|
||||||
|
"merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[['id_x', 'customer_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'name']]\n",
|
||||||
|
"merge_4 = merge_4.rename(columns={'name': 'event_name'})\n",
|
||||||
|
"df_customer_event = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = 'inner')[['id_x', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'event_name']]\n",
|
||||||
|
"df_customer_event"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.13"
|
||||||
|
}
|
||||||
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 5
|
"nbformat_minor": 5
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user