1216 lines
44 KiB
Plaintext
1216 lines
44 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"id": "5ce2ffc5-66b6-4709-9e2c-7a50f49d1361",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# test\n",
|
|||
|
"\n",
|
|||
|
"import os \n",
|
|||
|
"import s3fs\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"import re"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 35,
|
|||
|
"id": "f579ff01-f009-4fb1-ba79-0cb3ce58ab7f",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"['bdc2324-data/1',\n",
|
|||
|
" 'bdc2324-data/10',\n",
|
|||
|
" 'bdc2324-data/101',\n",
|
|||
|
" 'bdc2324-data/11',\n",
|
|||
|
" 'bdc2324-data/12',\n",
|
|||
|
" 'bdc2324-data/13',\n",
|
|||
|
" 'bdc2324-data/14',\n",
|
|||
|
" 'bdc2324-data/2',\n",
|
|||
|
" 'bdc2324-data/3',\n",
|
|||
|
" 'bdc2324-data/4',\n",
|
|||
|
" 'bdc2324-data/5',\n",
|
|||
|
" 'bdc2324-data/6',\n",
|
|||
|
" 'bdc2324-data/7',\n",
|
|||
|
" 'bdc2324-data/8',\n",
|
|||
|
" 'bdc2324-data/9']"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 35,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
|||
|
"\n",
|
|||
|
"fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n",
|
|||
|
"BUCKET = \"bdc2324-data\"\n",
|
|||
|
"fs.ls(BUCKET)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"id": "c8b2c797-271f-43ee-8823-d0aee5b8782d",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"FILE_PATH_S3 = fs.ls(BUCKET)[1] # +\".csv\"\n",
|
|||
|
"files_path_2 = fs.ls(FILE_PATH_S3)\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"id": "18cee687-1462-4169-9bfe-f39786135cdd",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"with fs.open(files_path_1[1], mode=\"rb\") as file_in:\n",
|
|||
|
" # print(file_in)\n",
|
|||
|
" df_campaigns = pd.read_csv(file_in)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"id": "33e8d14c-c649-4b9c-8290-4a2aa635f999",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>name</th>\n",
|
|||
|
" <th>service_id</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" <th>process_id</th>\n",
|
|||
|
" <th>report_url</th>\n",
|
|||
|
" <th>category</th>\n",
|
|||
|
" <th>to_be_synced</th>\n",
|
|||
|
" <th>identifier</th>\n",
|
|||
|
" <th>sent_at</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1319613</td>\n",
|
|||
|
" <td>newsletter enseignants janvier 2022</td>\n",
|
|||
|
" <td>721</td>\n",
|
|||
|
" <td>2022-01-14 16:06:42.586321+01:00</td>\n",
|
|||
|
" <td>2022-02-03 14:17:27.112963+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>aba3b6fd5d186d28e06ff97135cade7f</td>\n",
|
|||
|
" <td>2022-01-14 00:00:00+01:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>1319586</td>\n",
|
|||
|
" <td>lsf_janvier_2022</td>\n",
|
|||
|
" <td>717</td>\n",
|
|||
|
" <td>2022-01-07 11:30:35.315895+01:00</td>\n",
|
|||
|
" <td>2022-02-03 14:17:27.116171+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>788d986905533aba051261497ecffcbb</td>\n",
|
|||
|
" <td>2022-01-07 00:00:00+01:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>1319282</td>\n",
|
|||
|
" <td>Invitation à déjeuner au Mucem | Vernissage « ...</td>\n",
|
|||
|
" <td>591</td>\n",
|
|||
|
" <td>2021-09-28 12:50:24.448752+02:00</td>\n",
|
|||
|
" <td>2022-02-03 14:17:27.119582+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>3493894fa4ea036cfc6433c3e2ee63b0</td>\n",
|
|||
|
" <td>2021-09-28 00:00:00+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>1319283</td>\n",
|
|||
|
" <td>Vacances de la Toussaint - centres des loisirs</td>\n",
|
|||
|
" <td>590</td>\n",
|
|||
|
" <td>2021-09-28 18:01:04.692073+02:00</td>\n",
|
|||
|
" <td>2022-02-03 14:17:27.124408+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>08b255a5d42b89b0585260b6f2360bdd</td>\n",
|
|||
|
" <td>2021-09-28 00:00:00+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>1319636</td>\n",
|
|||
|
" <td>ddcp_promo_md_livemag</td>\n",
|
|||
|
" <td>730</td>\n",
|
|||
|
" <td>2022-01-27 18:00:41.053069+01:00</td>\n",
|
|||
|
" <td>2022-02-03 14:17:27.127607+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>d5cfead94f5350c12c322b5b664544c1</td>\n",
|
|||
|
" <td>2022-01-27 00:00:00+01:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>952</th>\n",
|
|||
|
" <td>1320072</td>\n",
|
|||
|
" <td>dre_gaza0106</td>\n",
|
|||
|
" <td>881</td>\n",
|
|||
|
" <td>2022-05-26 09:01:35.523639+02:00</td>\n",
|
|||
|
" <td>2022-12-02 17:51:22.614046+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>7504adad8bb96320eb3afdd4df6e1f60</td>\n",
|
|||
|
" <td>2022-05-26 00:00:00+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>953</th>\n",
|
|||
|
" <td>661398</td>\n",
|
|||
|
" <td>DDCP Plan Bis 4 - Marketing direct - MJ5C</td>\n",
|
|||
|
" <td>183</td>\n",
|
|||
|
" <td>2021-06-18 10:30:01.259578+02:00</td>\n",
|
|||
|
" <td>2021-09-24 11:56:09.082785+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>cedebb6e872f539bef8c3f919874e9d7</td>\n",
|
|||
|
" <td>2020-07-27 00:00:00+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>954</th>\n",
|
|||
|
" <td>1320487</td>\n",
|
|||
|
" <td>Invitation portes ouvertes amitiés</td>\n",
|
|||
|
" <td>988</td>\n",
|
|||
|
" <td>2022-09-29 18:01:33.834090+02:00</td>\n",
|
|||
|
" <td>2022-12-02 17:51:23.258324+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>9908279ebbf1f9b250ba689db6a0222b</td>\n",
|
|||
|
" <td>2022-09-29 00:00:00+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>955</th>\n",
|
|||
|
" <td>906903</td>\n",
|
|||
|
" <td>DDCP PROMO La méditerranée des philosophes #3 ...</td>\n",
|
|||
|
" <td>310</td>\n",
|
|||
|
" <td>2021-07-19 14:07:16.177390+02:00</td>\n",
|
|||
|
" <td>2021-09-24 11:56:09.086101+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>06eb61b839a0cefee4967c67ccb099dc</td>\n",
|
|||
|
" <td>2020-12-23 00:00:00+01:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>956</th>\n",
|
|||
|
" <td>579313</td>\n",
|
|||
|
" <td>ddcp_promo_automation_manuel_pre_visit</td>\n",
|
|||
|
" <td>481</td>\n",
|
|||
|
" <td>2021-06-08 17:38:54.041310+02:00</td>\n",
|
|||
|
" <td>2021-09-24 11:56:09.089394+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>9461cce28ebe3e76fb4b931c35a169b0</td>\n",
|
|||
|
" <td>2021-06-08 00:00:00+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>957 rows × 11 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id name service_id \\\n",
|
|||
|
"0 1319613 newsletter enseignants janvier 2022 721 \n",
|
|||
|
"1 1319586 lsf_janvier_2022 717 \n",
|
|||
|
"2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n",
|
|||
|
"3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n",
|
|||
|
"4 1319636 ddcp_promo_md_livemag 730 \n",
|
|||
|
".. ... ... ... \n",
|
|||
|
"952 1320072 dre_gaza0106 881 \n",
|
|||
|
"953 661398 DDCP Plan Bis 4 - Marketing direct - MJ5C 183 \n",
|
|||
|
"954 1320487 Invitation portes ouvertes amitiés 988 \n",
|
|||
|
"955 906903 DDCP PROMO La méditerranée des philosophes #3 ... 310 \n",
|
|||
|
"956 579313 ddcp_promo_automation_manuel_pre_visit 481 \n",
|
|||
|
"\n",
|
|||
|
" created_at updated_at \\\n",
|
|||
|
"0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n",
|
|||
|
"1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n",
|
|||
|
"2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n",
|
|||
|
"3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n",
|
|||
|
"4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n",
|
|||
|
".. ... ... \n",
|
|||
|
"952 2022-05-26 09:01:35.523639+02:00 2022-12-02 17:51:22.614046+01:00 \n",
|
|||
|
"953 2021-06-18 10:30:01.259578+02:00 2021-09-24 11:56:09.082785+02:00 \n",
|
|||
|
"954 2022-09-29 18:01:33.834090+02:00 2022-12-02 17:51:23.258324+01:00 \n",
|
|||
|
"955 2021-07-19 14:07:16.177390+02:00 2021-09-24 11:56:09.086101+02:00 \n",
|
|||
|
"956 2021-06-08 17:38:54.041310+02:00 2021-09-24 11:56:09.089394+02:00 \n",
|
|||
|
"\n",
|
|||
|
" process_id report_url category to_be_synced \\\n",
|
|||
|
"0 NaN NaN 0.0 False \n",
|
|||
|
"1 NaN NaN 0.0 False \n",
|
|||
|
"2 NaN NaN 0.0 False \n",
|
|||
|
"3 NaN NaN 0.0 False \n",
|
|||
|
"4 NaN NaN 0.0 False \n",
|
|||
|
".. ... ... ... ... \n",
|
|||
|
"952 NaN NaN 0.0 False \n",
|
|||
|
"953 NaN NaN 0.0 False \n",
|
|||
|
"954 NaN NaN 0.0 False \n",
|
|||
|
"955 NaN NaN 0.0 False \n",
|
|||
|
"956 NaN NaN 0.0 False \n",
|
|||
|
"\n",
|
|||
|
" identifier sent_at \n",
|
|||
|
"0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n",
|
|||
|
"1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n",
|
|||
|
"2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n",
|
|||
|
"3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n",
|
|||
|
"4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 \n",
|
|||
|
".. ... ... \n",
|
|||
|
"952 7504adad8bb96320eb3afdd4df6e1f60 2022-05-26 00:00:00+02:00 \n",
|
|||
|
"953 cedebb6e872f539bef8c3f919874e9d7 2020-07-27 00:00:00+02:00 \n",
|
|||
|
"954 9908279ebbf1f9b250ba689db6a0222b 2022-09-29 00:00:00+02:00 \n",
|
|||
|
"955 06eb61b839a0cefee4967c67ccb099dc 2020-12-23 00:00:00+01:00 \n",
|
|||
|
"956 9461cce28ebe3e76fb4b931c35a169b0 2021-06-08 00:00:00+02:00 \n",
|
|||
|
"\n",
|
|||
|
"[957 rows x 11 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 5,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df_campaigns"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 39,
|
|||
|
"id": "b04f39e7-7d53-4734-b125-4dc1843172d6",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"['bdc2324-data', '10', '10campaign_stats.csv']"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"files_path_2[0].split(\"/\")[1]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 74,
|
|||
|
"id": "d9bd97df-67bf-48ef-812a-975deb890163",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/tmp/ipykernel_521/1596461036.py:11: DtypeWarning: Columns (19,20,33,34,35,39) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
|||
|
" df = pd.read_csv(file_in)\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# loop to create dataframes from file 2\n",
|
|||
|
"\n",
|
|||
|
"files_path = files_path_2\n",
|
|||
|
"\n",
|
|||
|
"client_number = files_path[0].split(\"/\")[1]\n",
|
|||
|
"df_prefix = \"df\" + str(client_number) + \"_\"\n",
|
|||
|
"\n",
|
|||
|
"for i in range(len(files_path)) :\n",
|
|||
|
" current_path = files_path[i]\n",
|
|||
|
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
|
|||
|
" df = pd.read_csv(file_in)\n",
|
|||
|
" # the pattern of the name is df1xxx\n",
|
|||
|
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
|
|||
|
" globals()[nom_dataframe] = df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 78,
|
|||
|
"id": "7f46e38e-413c-48cb-a171-eb6bc7219d9c",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"client number :10\n",
|
|||
|
"prefix used : df10_\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(f\"client number :{client_number}\")\n",
|
|||
|
"print(f\"prefix used : {df_prefix}\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 79,
|
|||
|
"id": "bdfd388c-7971-4f4d-99ef-c5b0435a4567",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"['bdc2324-data/10/10campaign_stats.csv',\n",
|
|||
|
" 'bdc2324-data/10/10campaigns.csv',\n",
|
|||
|
" 'bdc2324-data/10/10categories.csv',\n",
|
|||
|
" 'bdc2324-data/10/10countries.csv',\n",
|
|||
|
" 'bdc2324-data/10/10currencies.csv',\n",
|
|||
|
" 'bdc2324-data/10/10customer_target_mappings.csv',\n",
|
|||
|
" 'bdc2324-data/10/10customersplus.csv',\n",
|
|||
|
" 'bdc2324-data/10/10event_types.csv',\n",
|
|||
|
" 'bdc2324-data/10/10events.csv',\n",
|
|||
|
" 'bdc2324-data/10/10facilities.csv',\n",
|
|||
|
" 'bdc2324-data/10/10link_stats.csv',\n",
|
|||
|
" 'bdc2324-data/10/10pricing_formulas.csv',\n",
|
|||
|
" 'bdc2324-data/10/10product_packs.csv',\n",
|
|||
|
" 'bdc2324-data/10/10products.csv',\n",
|
|||
|
" 'bdc2324-data/10/10products_groups.csv',\n",
|
|||
|
" 'bdc2324-data/10/10purchases.csv',\n",
|
|||
|
" 'bdc2324-data/10/10representation_category_capacities.csv',\n",
|
|||
|
" 'bdc2324-data/10/10representation_types.csv',\n",
|
|||
|
" 'bdc2324-data/10/10representations.csv',\n",
|
|||
|
" 'bdc2324-data/10/10seasons.csv',\n",
|
|||
|
" 'bdc2324-data/10/10suppliers.csv',\n",
|
|||
|
" 'bdc2324-data/10/10tags.csv',\n",
|
|||
|
" 'bdc2324-data/10/10target_types.csv',\n",
|
|||
|
" 'bdc2324-data/10/10targets.csv',\n",
|
|||
|
" 'bdc2324-data/10/10tickets.csv',\n",
|
|||
|
" 'bdc2324-data/10/10type_of_pricing_formulas.csv',\n",
|
|||
|
" 'bdc2324-data/10/10type_ofs.csv']"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 79,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"files_path_2"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 80,
|
|||
|
"id": "e7bd02dc-1925-46ff-9d59-231d18f9f4f1",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>number</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" <th>purchase_id</th>\n",
|
|||
|
" <th>product_id</th>\n",
|
|||
|
" <th>is_from_subscription</th>\n",
|
|||
|
" <th>type_of</th>\n",
|
|||
|
" <th>supplier_id</th>\n",
|
|||
|
" <th>barcode</th>\n",
|
|||
|
" <th>identifier</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1799177</td>\n",
|
|||
|
" <td>60_0_0_0_1_k-5</td>\n",
|
|||
|
" <td>2021-12-29 07:27:27.868513+01:00</td>\n",
|
|||
|
" <td>2021-12-29 07:27:27.868513+01:00</td>\n",
|
|||
|
" <td>409613</td>\n",
|
|||
|
" <td>321683</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>56c3db5a02c87af7e525676092cb7c4a</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>1799178</td>\n",
|
|||
|
" <td>71_0_0_0_1_k-5</td>\n",
|
|||
|
" <td>2021-12-29 07:27:27.976380+01:00</td>\n",
|
|||
|
" <td>2021-12-29 07:27:27.976380+01:00</td>\n",
|
|||
|
" <td>409613</td>\n",
|
|||
|
" <td>321684</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1ecad1dc6b42b4cdb75784dd9dcd9d5c</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>1799179</td>\n",
|
|||
|
" <td>93_0_0_0_1_k-5</td>\n",
|
|||
|
" <td>2021-12-29 07:27:27.978719+01:00</td>\n",
|
|||
|
" <td>2021-12-29 07:27:27.978719+01:00</td>\n",
|
|||
|
" <td>409613</td>\n",
|
|||
|
" <td>321685</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>b3d207bdb47bcdb27a52f6bae0db7ec2</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>1799180</td>\n",
|
|||
|
" <td>103_0_0_0_1_k-5</td>\n",
|
|||
|
" <td>2021-12-29 07:27:27.984621+01:00</td>\n",
|
|||
|
" <td>2021-12-29 07:27:27.984621+01:00</td>\n",
|
|||
|
" <td>409613</td>\n",
|
|||
|
" <td>321686</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>10df9591b617cc177516e9ddf91ddae3</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>1799181</td>\n",
|
|||
|
" <td>107_0_3_2_1_h-1</td>\n",
|
|||
|
" <td>2021-12-29 07:27:27.988602+01:00</td>\n",
|
|||
|
" <td>2021-12-29 07:27:27.988602+01:00</td>\n",
|
|||
|
" <td>409613</td>\n",
|
|||
|
" <td>321687</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>3a8c7d5882fe9f20f0f59c8d90c9873c</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>492309</th>\n",
|
|||
|
" <td>3252232</td>\n",
|
|||
|
" <td>336359</td>\n",
|
|||
|
" <td>2023-03-10 01:31:52.543375+01:00</td>\n",
|
|||
|
" <td>2023-03-10 01:31:52.543375+01:00</td>\n",
|
|||
|
" <td>710062</td>\n",
|
|||
|
" <td>572547</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>fc96f582931209501ed186d709664980</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>492310</th>\n",
|
|||
|
" <td>3252233</td>\n",
|
|||
|
" <td>336360</td>\n",
|
|||
|
" <td>2023-03-10 01:31:52.543869+01:00</td>\n",
|
|||
|
" <td>2023-03-10 01:31:52.543869+01:00</td>\n",
|
|||
|
" <td>710062</td>\n",
|
|||
|
" <td>572547</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>d4ccfb00a9b22b62654bbf98b4d9a5a5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>492311</th>\n",
|
|||
|
" <td>3252234</td>\n",
|
|||
|
" <td>336361</td>\n",
|
|||
|
" <td>2023-03-10 01:31:52.545783+01:00</td>\n",
|
|||
|
" <td>2023-03-10 01:31:52.545783+01:00</td>\n",
|
|||
|
" <td>710062</td>\n",
|
|||
|
" <td>572547</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>d5f76662d6571b8eaceaf19c781fa514</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>492312</th>\n",
|
|||
|
" <td>3252235</td>\n",
|
|||
|
" <td>336362</td>\n",
|
|||
|
" <td>2023-03-10 01:31:52.547043+01:00</td>\n",
|
|||
|
" <td>2023-03-10 01:31:52.547043+01:00</td>\n",
|
|||
|
" <td>710062</td>\n",
|
|||
|
" <td>572547</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>093225db5cd5e06cc8e06242b4cbba37</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>492313</th>\n",
|
|||
|
" <td>3252236</td>\n",
|
|||
|
" <td>336363</td>\n",
|
|||
|
" <td>2023-03-10 01:31:52.548311+01:00</td>\n",
|
|||
|
" <td>2023-03-10 01:31:52.548311+01:00</td>\n",
|
|||
|
" <td>710062</td>\n",
|
|||
|
" <td>572547</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>9bace0d0cd7a5ec559aca8ac8bf67700</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>492314 rows × 11 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id number created_at \\\n",
|
|||
|
"0 1799177 60_0_0_0_1_k-5 2021-12-29 07:27:27.868513+01:00 \n",
|
|||
|
"1 1799178 71_0_0_0_1_k-5 2021-12-29 07:27:27.976380+01:00 \n",
|
|||
|
"2 1799179 93_0_0_0_1_k-5 2021-12-29 07:27:27.978719+01:00 \n",
|
|||
|
"3 1799180 103_0_0_0_1_k-5 2021-12-29 07:27:27.984621+01:00 \n",
|
|||
|
"4 1799181 107_0_3_2_1_h-1 2021-12-29 07:27:27.988602+01:00 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"492309 3252232 336359 2023-03-10 01:31:52.543375+01:00 \n",
|
|||
|
"492310 3252233 336360 2023-03-10 01:31:52.543869+01:00 \n",
|
|||
|
"492311 3252234 336361 2023-03-10 01:31:52.545783+01:00 \n",
|
|||
|
"492312 3252235 336362 2023-03-10 01:31:52.547043+01:00 \n",
|
|||
|
"492313 3252236 336363 2023-03-10 01:31:52.548311+01:00 \n",
|
|||
|
"\n",
|
|||
|
" updated_at purchase_id product_id \\\n",
|
|||
|
"0 2021-12-29 07:27:27.868513+01:00 409613 321683 \n",
|
|||
|
"1 2021-12-29 07:27:27.976380+01:00 409613 321684 \n",
|
|||
|
"2 2021-12-29 07:27:27.978719+01:00 409613 321685 \n",
|
|||
|
"3 2021-12-29 07:27:27.984621+01:00 409613 321686 \n",
|
|||
|
"4 2021-12-29 07:27:27.988602+01:00 409613 321687 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"492309 2023-03-10 01:31:52.543375+01:00 710062 572547 \n",
|
|||
|
"492310 2023-03-10 01:31:52.543869+01:00 710062 572547 \n",
|
|||
|
"492311 2023-03-10 01:31:52.545783+01:00 710062 572547 \n",
|
|||
|
"492312 2023-03-10 01:31:52.547043+01:00 710062 572547 \n",
|
|||
|
"492313 2023-03-10 01:31:52.548311+01:00 710062 572547 \n",
|
|||
|
"\n",
|
|||
|
" is_from_subscription type_of supplier_id barcode \\\n",
|
|||
|
"0 False 1 2 NaN \n",
|
|||
|
"1 False 1 2 NaN \n",
|
|||
|
"2 False 1 2 NaN \n",
|
|||
|
"3 False 1 2 NaN \n",
|
|||
|
"4 False 1 2 NaN \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"492309 False 1 2 NaN \n",
|
|||
|
"492310 False 1 2 NaN \n",
|
|||
|
"492311 False 1 2 NaN \n",
|
|||
|
"492312 False 1 2 NaN \n",
|
|||
|
"492313 False 1 2 NaN \n",
|
|||
|
"\n",
|
|||
|
" identifier \n",
|
|||
|
"0 56c3db5a02c87af7e525676092cb7c4a \n",
|
|||
|
"1 1ecad1dc6b42b4cdb75784dd9dcd9d5c \n",
|
|||
|
"2 b3d207bdb47bcdb27a52f6bae0db7ec2 \n",
|
|||
|
"3 10df9591b617cc177516e9ddf91ddae3 \n",
|
|||
|
"4 3a8c7d5882fe9f20f0f59c8d90c9873c \n",
|
|||
|
"... ... \n",
|
|||
|
"492309 fc96f582931209501ed186d709664980 \n",
|
|||
|
"492310 d4ccfb00a9b22b62654bbf98b4d9a5a5 \n",
|
|||
|
"492311 d5f76662d6571b8eaceaf19c781fa514 \n",
|
|||
|
"492312 093225db5cd5e06cc8e06242b4cbba37 \n",
|
|||
|
"492313 9bace0d0cd7a5ec559aca8ac8bf67700 \n",
|
|||
|
"\n",
|
|||
|
"[492314 rows x 11 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 80,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# example : get the table \n",
|
|||
|
"\n",
|
|||
|
"df10_tickets"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"id": "48ae6de5-2353-4fa8-a2a8-20da3b77e2ff",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'\\nfor i in range(len(files_path_1)) :\\n current_path = files_path_1[i]\\n nom_dataframe = \"df\" + re.search(r\\'/([^/]+)\\\\.csv$\\', current_path).group(1)\\n df = globals()[nom_dataframe]\\n print(nom_dataframe)\\n print(df.head(20))\\n'"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# loop to have a look at dataframes from file 1\n",
|
|||
|
"\n",
|
|||
|
"\"\"\"\n",
|
|||
|
"for i in range(len(files_path_1)) :\n",
|
|||
|
" current_path = files_path_1[i]\n",
|
|||
|
" nom_dataframe = \"df\" + re.search(r'/([^/]+)\\.csv$', current_path).group(1)\n",
|
|||
|
" df = globals()[nom_dataframe]\n",
|
|||
|
" print(nom_dataframe)\n",
|
|||
|
" print(df.head(20))\n",
|
|||
|
"\"\"\""
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "d72166db-dcef-45bd-9f8c-7cb2ee6bcbde",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Beginning of the exploratory analysis of dataframes"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 82,
|
|||
|
"id": "17966ab2-9038-4dd6-a59c-7739ee05c964",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>lastname</th>\n",
|
|||
|
" <th>firstname</th>\n",
|
|||
|
" <th>birthdate</th>\n",
|
|||
|
" <th>email</th>\n",
|
|||
|
" <th>street_id</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" <th>civility</th>\n",
|
|||
|
" <th>is_partner</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>preferred_category</th>\n",
|
|||
|
" <th>preferred_supplier</th>\n",
|
|||
|
" <th>preferred_formula</th>\n",
|
|||
|
" <th>purchase_count</th>\n",
|
|||
|
" <th>first_buying_date</th>\n",
|
|||
|
" <th>last_visiting_date</th>\n",
|
|||
|
" <th>zipcode</th>\n",
|
|||
|
" <th>country</th>\n",
|
|||
|
" <th>age</th>\n",
|
|||
|
" <th>tenant_id</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>821538</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email821538</td>\n",
|
|||
|
" <td>139</td>\n",
|
|||
|
" <td>2023-07-14 11:43:34.261637+02:00</td>\n",
|
|||
|
" <td>2023-07-14 11:43:34.261637+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>875</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>809126</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email809126</td>\n",
|
|||
|
" <td>1063</td>\n",
|
|||
|
" <td>2023-05-04 17:17:24.456829+02:00</td>\n",
|
|||
|
" <td>2023-05-04 17:17:24.456829+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>875</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>11005</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1063</td>\n",
|
|||
|
" <td>2017-07-06 03:01:57.242998+02:00</td>\n",
|
|||
|
" <td>2018-11-12 18:01:18.283492+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>zone tarif 1</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>invite rp</td>\n",
|
|||
|
" <td>14</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>875</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>17663</td>\n",
|
|||
|
" <td>lastname17663</td>\n",
|
|||
|
" <td>firstname17663</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>12731</td>\n",
|
|||
|
" <td>2018-09-23 02:39:17.778100+02:00</td>\n",
|
|||
|
" <td>2018-09-23 02:39:17.778100+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>zone tarif 1</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>detaxe</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>44220</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>875</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>38100</td>\n",
|
|||
|
" <td>lastname38100</td>\n",
|
|||
|
" <td>firstname38100</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>12395</td>\n",
|
|||
|
" <td>2019-02-11 11:05:58.581121+01:00</td>\n",
|
|||
|
" <td>2022-12-06 23:15:33.485866+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>44100</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>875</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>98789</th>\n",
|
|||
|
" <td>766266</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email766266</td>\n",
|
|||
|
" <td>139</td>\n",
|
|||
|
" <td>2022-12-06 18:26:04.142337+01:00</td>\n",
|
|||
|
" <td>2023-05-03 18:01:01.799141+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>875</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>98790</th>\n",
|
|||
|
" <td>766336</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email766336</td>\n",
|
|||
|
" <td>139</td>\n",
|
|||
|
" <td>2022-12-06 18:28:49.139502+01:00</td>\n",
|
|||
|
" <td>2022-12-06 23:15:33.485866+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>875</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>98791</th>\n",
|
|||
|
" <td>766348</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email766348</td>\n",
|
|||
|
" <td>139</td>\n",
|
|||
|
" <td>2022-12-06 18:28:51.140745+01:00</td>\n",
|
|||
|
" <td>2022-12-06 23:15:33.485866+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>875</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>98792</th>\n",
|
|||
|
" <td>766363</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email766363</td>\n",
|
|||
|
" <td>139</td>\n",
|
|||
|
" <td>2022-12-06 18:29:44.081056+01:00</td>\n",
|
|||
|
" <td>2022-12-06 23:15:33.485866+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>875</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>98793</th>\n",
|
|||
|
" <td>766366</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email766366</td>\n",
|
|||
|
" <td>139</td>\n",
|
|||
|
" <td>2022-12-06 18:29:44.934174+01:00</td>\n",
|
|||
|
" <td>2022-12-06 23:15:33.485866+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>875</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>98794 rows × 43 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id lastname firstname birthdate email \\\n",
|
|||
|
"0 821538 NaN NaN NaN email821538 \n",
|
|||
|
"1 809126 NaN NaN NaN email809126 \n",
|
|||
|
"2 11005 NaN NaN NaN NaN \n",
|
|||
|
"3 17663 lastname17663 firstname17663 NaN NaN \n",
|
|||
|
"4 38100 lastname38100 firstname38100 NaN NaN \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"98789 766266 NaN NaN NaN email766266 \n",
|
|||
|
"98790 766336 NaN NaN NaN email766336 \n",
|
|||
|
"98791 766348 NaN NaN NaN email766348 \n",
|
|||
|
"98792 766363 NaN NaN NaN email766363 \n",
|
|||
|
"98793 766366 NaN NaN NaN email766366 \n",
|
|||
|
"\n",
|
|||
|
" street_id created_at \\\n",
|
|||
|
"0 139 2023-07-14 11:43:34.261637+02:00 \n",
|
|||
|
"1 1063 2023-05-04 17:17:24.456829+02:00 \n",
|
|||
|
"2 1063 2017-07-06 03:01:57.242998+02:00 \n",
|
|||
|
"3 12731 2018-09-23 02:39:17.778100+02:00 \n",
|
|||
|
"4 12395 2019-02-11 11:05:58.581121+01:00 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"98789 139 2022-12-06 18:26:04.142337+01:00 \n",
|
|||
|
"98790 139 2022-12-06 18:28:49.139502+01:00 \n",
|
|||
|
"98791 139 2022-12-06 18:28:51.140745+01:00 \n",
|
|||
|
"98792 139 2022-12-06 18:29:44.081056+01:00 \n",
|
|||
|
"98793 139 2022-12-06 18:29:44.934174+01:00 \n",
|
|||
|
"\n",
|
|||
|
" updated_at civility is_partner ... \\\n",
|
|||
|
"0 2023-07-14 11:43:34.261637+02:00 NaN False ... \n",
|
|||
|
"1 2023-05-04 17:17:24.456829+02:00 NaN False ... \n",
|
|||
|
"2 2018-11-12 18:01:18.283492+01:00 NaN False ... \n",
|
|||
|
"3 2018-09-23 02:39:17.778100+02:00 NaN False ... \n",
|
|||
|
"4 2022-12-06 23:15:33.485866+01:00 NaN False ... \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"98789 2023-05-03 18:01:01.799141+02:00 NaN False ... \n",
|
|||
|
"98790 2022-12-06 23:15:33.485866+01:00 NaN False ... \n",
|
|||
|
"98791 2022-12-06 23:15:33.485866+01:00 NaN False ... \n",
|
|||
|
"98792 2022-12-06 23:15:33.485866+01:00 NaN False ... \n",
|
|||
|
"98793 2022-12-06 23:15:33.485866+01:00 NaN False ... \n",
|
|||
|
"\n",
|
|||
|
" preferred_category preferred_supplier preferred_formula \\\n",
|
|||
|
"0 NaN NaN NaN \n",
|
|||
|
"1 NaN NaN NaN \n",
|
|||
|
"2 zone tarif 1 NaN invite rp \n",
|
|||
|
"3 zone tarif 1 NaN detaxe \n",
|
|||
|
"4 NaN NaN NaN \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"98789 NaN NaN NaN \n",
|
|||
|
"98790 NaN NaN NaN \n",
|
|||
|
"98791 NaN NaN NaN \n",
|
|||
|
"98792 NaN NaN NaN \n",
|
|||
|
"98793 NaN NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" purchase_count first_buying_date last_visiting_date zipcode country \\\n",
|
|||
|
"0 0 NaN NaN NaN NaN \n",
|
|||
|
"1 0 NaN NaN NaN fr \n",
|
|||
|
"2 14 NaN NaN NaN fr \n",
|
|||
|
"3 1 NaN NaN 44220 fr \n",
|
|||
|
"4 1 NaN NaN 44100 fr \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"98789 0 NaN NaN NaN NaN \n",
|
|||
|
"98790 0 NaN NaN NaN NaN \n",
|
|||
|
"98791 0 NaN NaN NaN NaN \n",
|
|||
|
"98792 0 NaN NaN NaN NaN \n",
|
|||
|
"98793 0 NaN NaN NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" age tenant_id \n",
|
|||
|
"0 NaN 875 \n",
|
|||
|
"1 NaN 875 \n",
|
|||
|
"2 NaN 875 \n",
|
|||
|
"3 NaN 875 \n",
|
|||
|
"4 NaN 875 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"98789 NaN 875 \n",
|
|||
|
"98790 NaN 875 \n",
|
|||
|
"98791 NaN 875 \n",
|
|||
|
"98792 NaN 875 \n",
|
|||
|
"98793 NaN 875 \n",
|
|||
|
"\n",
|
|||
|
"[98794 rows x 43 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 82,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df10_0customersplus"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "932812b1-7a24-4f2d-ae48-7fe8e06b9f62",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# how many missing values ?\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.11.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 5
|
|||
|
}
|