2024-01-10 19:19:51 +01:00
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "56b3d44e-1e3f-4726-9916-0f9af107860e",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"# Business Data Challenge - Team 1"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-13 14:14:11 +01:00
|
|
|
|
"execution_count": 1,
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"id": "15103481-8d74-404c-aa09-7601fe7730da",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
"import os\n",
|
|
|
|
|
"import s3fs\n",
|
|
|
|
|
"import re"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "c3bb0d13-34b2-4e1c-9985-468cd87c5a0e",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Configuration de l'accès aux données"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-13 14:14:11 +01:00
|
|
|
|
"execution_count": 2,
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Create filesystem object\n",
|
|
|
|
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "f99da24f-0d93-4618-92bc-3ba81dc0445c",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"# Exemple sur Company 1"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "9d74b68f-ba07-4a15-9a27-dae931762d70",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Chargement données"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-13 14:14:11 +01:00
|
|
|
|
"execution_count": 3,
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"id": "699664b9-eee4-4f8d-a207-e524526560c5",
|
|
|
|
|
"metadata": {},
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"BUCKET = \"bdc2324-data/1\"\n",
|
|
|
|
|
"liste_database = fs.ls(BUCKET)"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-02-04 16:02:01 +01:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 39,
|
|
|
|
|
"id": "aaf64d60-bf92-470c-8210-d09abd6a653e",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"['bdc2324-data/1/1campaign_stats.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1campaigns.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1categories.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1countries.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1currencies.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1customer_target_mappings.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1customersplus.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1event_types.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1events.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1facilities.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1link_stats.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1pricing_formulas.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1product_packs.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1products.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1products_groups.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1purchases.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1representation_category_capacities.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1representations.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1seasons.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1suppliers.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1tags.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1target_types.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1targets.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1tickets.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1type_of_categories.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
|
|
|
|
|
" 'bdc2324-data/1/1type_ofs.csv']"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 39,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"liste_database"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-01-13 10:38:10 +01:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"execution_count": 4,
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"id": "0cb92854-903b-4efd-ac1b-197e29f044b4",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"['bdc2324-data/1/1purchases.csv', 'bdc2324-data/1/1suppliers.csv', 'bdc2324-data/1/1tickets.csv', 'bdc2324-data/1/1type_ofs.csv']\n"
|
2024-01-13 10:38:10 +01:00
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-01-13 14:14:11 +01:00
|
|
|
|
"liste_database_select = ['suppliers', 'ticket', 'purchase', 'consumption', 'type_ofs']\n",
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"\n",
|
|
|
|
|
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
|
|
|
|
|
"liste_database_filtered = [element for element in liste_database if any(element_part in element for element_part in liste_database_select)]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Afficher le résultat\n",
|
|
|
|
|
"print(liste_database_filtered)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"execution_count": 5,
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"/tmp/ipykernel_9792/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
2024-01-13 10:38:10 +01:00
|
|
|
|
" df = pd.read_csv(file_in)\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"# loop to create dataframes from liste\n",
|
|
|
|
|
"files_path = liste_database\n",
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"\n",
|
|
|
|
|
"client_number = files_path[0].split(\"/\")[1]\n",
|
|
|
|
|
"df_prefix = \"df\" + str(client_number) + \"_\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"for i in range(len(files_path)) :\n",
|
|
|
|
|
" current_path = files_path[i]\n",
|
|
|
|
|
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
|
|
|
|
|
" df = pd.read_csv(file_in)\n",
|
|
|
|
|
" # the pattern of the name is df1xxx\n",
|
|
|
|
|
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
|
|
|
|
|
" globals()[nom_dataframe] = df"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-01-13 14:47:24 +01:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "e908f516-2a74-45d6-8492-7dcdc3afbe1f",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"metadata": {
|
|
|
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
|
|
|
},
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"source": [
|
|
|
|
|
"## tickets.csv"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
"id": "14f4158e-c9c0-4beb-826a-5e0f949434a4",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>id</th>\n",
|
|
|
|
|
" <th>number</th>\n",
|
|
|
|
|
" <th>created_at</th>\n",
|
|
|
|
|
" <th>updated_at</th>\n",
|
|
|
|
|
" <th>purchase_id</th>\n",
|
|
|
|
|
" <th>product_id</th>\n",
|
|
|
|
|
" <th>is_from_subscription</th>\n",
|
|
|
|
|
" <th>type_of</th>\n",
|
|
|
|
|
" <th>supplier_id</th>\n",
|
|
|
|
|
" <th>barcode</th>\n",
|
|
|
|
|
" <th>identifier</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>13070859</td>\n",
|
|
|
|
|
" <td>13593002661288</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:47:10.320641+01:00</td>\n",
|
|
|
|
|
" <td>2022-02-14 18:46:53.614229+01:00</td>\n",
|
|
|
|
|
" <td>5107462</td>\n",
|
|
|
|
|
" <td>225251</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>b6ad7fc36f33b5e05f58c7fca06688a6</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>13070860</td>\n",
|
|
|
|
|
" <td>13593002661399</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:47:10.321037+01:00</td>\n",
|
|
|
|
|
" <td>2022-02-14 18:46:53.614761+01:00</td>\n",
|
|
|
|
|
" <td>5107462</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>b0903af480266f27802fe5c38c277c9e</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>13070861</td>\n",
|
|
|
|
|
" <td>13593002661419</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:47:10.321629+01:00</td>\n",
|
|
|
|
|
" <td>2022-02-14 18:46:53.615521+01:00</td>\n",
|
|
|
|
|
" <td>5107462</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>64ca12b7e26a65b90335c0702ea0faba</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>13070862</td>\n",
|
|
|
|
|
" <td>13593002661508</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:47:10.322029+01:00</td>\n",
|
|
|
|
|
" <td>2022-02-14 18:46:53.616000+01:00</td>\n",
|
|
|
|
|
" <td>5107462</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>5ac2f8150aa9f3a6b1599df08cc2f0c7</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>13070863</td>\n",
|
|
|
|
|
" <td>13593002661689</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:47:10.322449+01:00</td>\n",
|
|
|
|
|
" <td>2022-02-14 18:46:53.616447+01:00</td>\n",
|
|
|
|
|
" <td>5107462</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>dfe30081bae020d12094279926136b9c</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1826667</th>\n",
|
|
|
|
|
" <td>20662815</td>\n",
|
|
|
|
|
" <td>13593016154390</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:34.935983+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:34.935983+01:00</td>\n",
|
|
|
|
|
" <td>8007697</td>\n",
|
|
|
|
|
" <td>405689</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>dba9aa428f843b79ae69dfacfe8fc579</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1826668</th>\n",
|
|
|
|
|
" <td>20662816</td>\n",
|
|
|
|
|
" <td>13593016154501</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:34.937038+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:34.937038+01:00</td>\n",
|
|
|
|
|
" <td>8007698</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>93f1fcfc6ba4fa68f92eb4b4a619fcf0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1826669</th>\n",
|
|
|
|
|
" <td>20662817</td>\n",
|
|
|
|
|
" <td>13593016154680</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:34.938224+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:34.938224+01:00</td>\n",
|
|
|
|
|
" <td>8007698</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>c8bbbd25df2c158767ceef42c3237f23</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1826670</th>\n",
|
|
|
|
|
" <td>20662818</td>\n",
|
|
|
|
|
" <td>13593016154899</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:34.939328+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:34.939328+01:00</td>\n",
|
|
|
|
|
" <td>8007699</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>738f0a8b5088b5056bc3b32eff2dca1f</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1826671</th>\n",
|
|
|
|
|
" <td>20662819</td>\n",
|
|
|
|
|
" <td>13593016154988</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:34.940680+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:34.940680+01:00</td>\n",
|
|
|
|
|
" <td>8007699</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>4c5a6195434377380b4e6ae63b2e9cf6</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>1826672 rows × 11 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" id number created_at \\\n",
|
|
|
|
|
"0 13070859 13593002661288 2021-12-28 20:47:10.320641+01:00 \n",
|
|
|
|
|
"1 13070860 13593002661399 2021-12-28 20:47:10.321037+01:00 \n",
|
|
|
|
|
"2 13070861 13593002661419 2021-12-28 20:47:10.321629+01:00 \n",
|
|
|
|
|
"3 13070862 13593002661508 2021-12-28 20:47:10.322029+01:00 \n",
|
|
|
|
|
"4 13070863 13593002661689 2021-12-28 20:47:10.322449+01:00 \n",
|
|
|
|
|
"... ... ... ... \n",
|
|
|
|
|
"1826667 20662815 13593016154390 2023-11-09 07:51:34.935983+01:00 \n",
|
|
|
|
|
"1826668 20662816 13593016154501 2023-11-09 07:51:34.937038+01:00 \n",
|
|
|
|
|
"1826669 20662817 13593016154680 2023-11-09 07:51:34.938224+01:00 \n",
|
|
|
|
|
"1826670 20662818 13593016154899 2023-11-09 07:51:34.939328+01:00 \n",
|
|
|
|
|
"1826671 20662819 13593016154988 2023-11-09 07:51:34.940680+01:00 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" updated_at purchase_id product_id \\\n",
|
|
|
|
|
"0 2022-02-14 18:46:53.614229+01:00 5107462 225251 \n",
|
|
|
|
|
"1 2022-02-14 18:46:53.614761+01:00 5107462 224914 \n",
|
|
|
|
|
"2 2022-02-14 18:46:53.615521+01:00 5107462 224914 \n",
|
|
|
|
|
"3 2022-02-14 18:46:53.616000+01:00 5107462 224914 \n",
|
|
|
|
|
"4 2022-02-14 18:46:53.616447+01:00 5107462 224914 \n",
|
|
|
|
|
"... ... ... ... \n",
|
|
|
|
|
"1826667 2023-11-09 07:51:34.935983+01:00 8007697 405689 \n",
|
|
|
|
|
"1826668 2023-11-09 07:51:34.937038+01:00 8007698 403658 \n",
|
|
|
|
|
"1826669 2023-11-09 07:51:34.938224+01:00 8007698 403658 \n",
|
|
|
|
|
"1826670 2023-11-09 07:51:34.939328+01:00 8007699 403658 \n",
|
|
|
|
|
"1826671 2023-11-09 07:51:34.940680+01:00 8007699 403658 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" is_from_subscription type_of supplier_id barcode \\\n",
|
|
|
|
|
"0 False 1 3 NaN \n",
|
|
|
|
|
"1 False 1 3 NaN \n",
|
|
|
|
|
"2 False 1 3 NaN \n",
|
|
|
|
|
"3 False 1 3 NaN \n",
|
|
|
|
|
"4 False 1 3 NaN \n",
|
|
|
|
|
"... ... ... ... ... \n",
|
|
|
|
|
"1826667 False 1 3 NaN \n",
|
|
|
|
|
"1826668 False 1 3 NaN \n",
|
|
|
|
|
"1826669 False 1 3 NaN \n",
|
|
|
|
|
"1826670 False 1 3 NaN \n",
|
|
|
|
|
"1826671 False 1 3 NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" identifier \n",
|
|
|
|
|
"0 b6ad7fc36f33b5e05f58c7fca06688a6 \n",
|
|
|
|
|
"1 b0903af480266f27802fe5c38c277c9e \n",
|
|
|
|
|
"2 64ca12b7e26a65b90335c0702ea0faba \n",
|
|
|
|
|
"3 5ac2f8150aa9f3a6b1599df08cc2f0c7 \n",
|
|
|
|
|
"4 dfe30081bae020d12094279926136b9c \n",
|
|
|
|
|
"... ... \n",
|
|
|
|
|
"1826667 dba9aa428f843b79ae69dfacfe8fc579 \n",
|
|
|
|
|
"1826668 93f1fcfc6ba4fa68f92eb4b4a619fcf0 \n",
|
|
|
|
|
"1826669 c8bbbd25df2c158767ceef42c3237f23 \n",
|
|
|
|
|
"1826670 738f0a8b5088b5056bc3b32eff2dca1f \n",
|
|
|
|
|
"1826671 4c5a6195434377380b4e6ae63b2e9cf6 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[1826672 rows x 11 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_tickets"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 7,
|
|
|
|
|
"id": "f3c35394-b586-4ae4-b5ab-b03bb01bb618",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
"RangeIndex: 1826672 entries, 0 to 1826671\n",
|
|
|
|
|
"Data columns (total 11 columns):\n",
|
|
|
|
|
" # Column Dtype \n",
|
|
|
|
|
"--- ------ ----- \n",
|
|
|
|
|
" 0 id int64 \n",
|
|
|
|
|
" 1 number object \n",
|
|
|
|
|
" 2 created_at object \n",
|
|
|
|
|
" 3 updated_at object \n",
|
|
|
|
|
" 4 purchase_id int64 \n",
|
|
|
|
|
" 5 product_id int64 \n",
|
|
|
|
|
" 6 is_from_subscription bool \n",
|
|
|
|
|
" 7 type_of int64 \n",
|
|
|
|
|
" 8 supplier_id int64 \n",
|
|
|
|
|
" 9 barcode float64\n",
|
|
|
|
|
" 10 identifier object \n",
|
|
|
|
|
"dtypes: bool(1), float64(1), int64(5), object(4)\n",
|
|
|
|
|
"memory usage: 141.1+ MB\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_tickets.info()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 8,
|
|
|
|
|
"id": "c1b42769-03c7-4785-92ce-5e1e6b41908d",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"id 0.0\n",
|
|
|
|
|
"number 0.0\n",
|
|
|
|
|
"created_at 0.0\n",
|
|
|
|
|
"updated_at 0.0\n",
|
|
|
|
|
"purchase_id 0.0\n",
|
|
|
|
|
"product_id 0.0\n",
|
|
|
|
|
"is_from_subscription 0.0\n",
|
|
|
|
|
"type_of 0.0\n",
|
|
|
|
|
"supplier_id 0.0\n",
|
|
|
|
|
"barcode 100.0\n",
|
|
|
|
|
"identifier 0.0\n",
|
|
|
|
|
"dtype: float64"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 8,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_tickets.isna().sum()/len(df1_tickets)*100"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 9,
|
|
|
|
|
"id": "42896791-2d93-4725-a50b-6c7cbe535ec7",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"/tmp/ipykernel_619/232847087.py:3: SettingWithCopyWarning: \n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
|
|
|
" df1_tickets_clean.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Selection des variables\n",
|
|
|
|
|
"df1_tickets_clean = df1_tickets[['id', 'purchase_id', 'product_id', 'is_from_subscription', 'type_of', 'supplier_id']]\n",
|
|
|
|
|
"df1_tickets_clean.rename(columns = {'id' : 'ticket_id'}, inplace = True)"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-01-13 10:38:10 +01:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "78453f3c-4f89-44ed-a6c6-2a7443b72b52",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"metadata": {
|
|
|
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
|
|
|
},
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"source": [
|
|
|
|
|
"## suppliers.csv"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"execution_count": 10,
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"id": "2e0dada0-9457-484c-aa55-77e44613ecca",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>id</th>\n",
|
|
|
|
|
" <th>name</th>\n",
|
|
|
|
|
" <th>manually_added</th>\n",
|
|
|
|
|
" <th>label</th>\n",
|
|
|
|
|
" <th>itr</th>\n",
|
|
|
|
|
" <th>updated_at</th>\n",
|
|
|
|
|
" <th>created_at</th>\n",
|
|
|
|
|
" <th>commission</th>\n",
|
|
|
|
|
" <th>identifier</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>1617</td>\n",
|
|
|
|
|
" <td>j4 administration</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2021-07-29 09:21:37.325772+02:00</td>\n",
|
|
|
|
|
" <td>2021-07-29 09:21:37.325772+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>5958b2a060ac3e31678b438892a1bd2e</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>8</td>\n",
|
|
|
|
|
" <td>non défini</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:16:35.329062+02:00</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:16:35.329062+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>52ff3466787b4d538407372e5f7afe0f</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" <td>vad</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.896992+02:00</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.896992+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1225483c97b36018cab2bea14ab78ea6</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>fort saint jean</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.833073+02:00</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.833073+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>001b9b4a524fe407150b8235b304d4ec</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>j4</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.888993+02:00</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.888993+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>6a0cf6edf20060344b465706b61719aa</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>5</th>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>revendeur</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.900987+02:00</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.900987+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>931239d4acb6214d7e5c98edecfb4916</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>6</th>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.893097+02:00</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.893097+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>bde8f2ccff510df8572d3214d86b837d</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>7</th>\n",
|
|
|
|
|
" <td>6</td>\n",
|
|
|
|
|
" <td>ccr</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.904974+02:00</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.904974+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>b48ec279411f7dbbb68393c61a9724d9</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>8</th>\n",
|
|
|
|
|
" <td>7</td>\n",
|
|
|
|
|
" <td>dab</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.908970+02:00</td>\n",
|
|
|
|
|
" <td>2020-09-03 13:11:23.908970+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>11c6d471fa4e354e62e684d293694202</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" id name manually_added label itr \\\n",
|
|
|
|
|
"0 1617 j4 administration False NaN NaN \n",
|
|
|
|
|
"1 8 non défini False NaN NaN \n",
|
|
|
|
|
"2 4 vad False NaN NaN \n",
|
|
|
|
|
"3 1 fort saint jean False NaN NaN \n",
|
|
|
|
|
"4 2 j4 False NaN NaN \n",
|
|
|
|
|
"5 5 revendeur False NaN NaN \n",
|
|
|
|
|
"6 3 vente en ligne False NaN NaN \n",
|
|
|
|
|
"7 6 ccr False NaN NaN \n",
|
|
|
|
|
"8 7 dab False NaN NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" updated_at created_at \\\n",
|
|
|
|
|
"0 2021-07-29 09:21:37.325772+02:00 2021-07-29 09:21:37.325772+02:00 \n",
|
|
|
|
|
"1 2020-09-03 13:16:35.329062+02:00 2020-09-03 13:16:35.329062+02:00 \n",
|
|
|
|
|
"2 2020-09-03 13:11:23.896992+02:00 2020-09-03 13:11:23.896992+02:00 \n",
|
|
|
|
|
"3 2020-09-03 13:11:23.833073+02:00 2020-09-03 13:11:23.833073+02:00 \n",
|
|
|
|
|
"4 2020-09-03 13:11:23.888993+02:00 2020-09-03 13:11:23.888993+02:00 \n",
|
|
|
|
|
"5 2020-09-03 13:11:23.900987+02:00 2020-09-03 13:11:23.900987+02:00 \n",
|
|
|
|
|
"6 2020-09-03 13:11:23.893097+02:00 2020-09-03 13:11:23.893097+02:00 \n",
|
|
|
|
|
"7 2020-09-03 13:11:23.904974+02:00 2020-09-03 13:11:23.904974+02:00 \n",
|
|
|
|
|
"8 2020-09-03 13:11:23.908970+02:00 2020-09-03 13:11:23.908970+02:00 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" commission identifier \n",
|
|
|
|
|
"0 NaN 5958b2a060ac3e31678b438892a1bd2e \n",
|
|
|
|
|
"1 NaN 52ff3466787b4d538407372e5f7afe0f \n",
|
|
|
|
|
"2 NaN 1225483c97b36018cab2bea14ab78ea6 \n",
|
|
|
|
|
"3 NaN 001b9b4a524fe407150b8235b304d4ec \n",
|
|
|
|
|
"4 NaN 6a0cf6edf20060344b465706b61719aa \n",
|
|
|
|
|
"5 NaN 931239d4acb6214d7e5c98edecfb4916 \n",
|
|
|
|
|
"6 NaN bde8f2ccff510df8572d3214d86b837d \n",
|
|
|
|
|
"7 NaN b48ec279411f7dbbb68393c61a9724d9 \n",
|
|
|
|
|
"8 NaN 11c6d471fa4e354e62e684d293694202 "
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"execution_count": 10,
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_suppliers"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"execution_count": 11,
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"id": "b583be02-ab60-4e14-9325-0204f203a1af",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
"RangeIndex: 9 entries, 0 to 8\n",
|
|
|
|
|
"Data columns (total 9 columns):\n",
|
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
|
" 0 id 9 non-null int64 \n",
|
|
|
|
|
" 1 name 9 non-null object \n",
|
|
|
|
|
" 2 manually_added 9 non-null bool \n",
|
|
|
|
|
" 3 label 0 non-null float64\n",
|
|
|
|
|
" 4 itr 0 non-null float64\n",
|
|
|
|
|
" 5 updated_at 9 non-null object \n",
|
|
|
|
|
" 6 created_at 9 non-null object \n",
|
|
|
|
|
" 7 commission 0 non-null float64\n",
|
|
|
|
|
" 8 identifier 9 non-null object \n",
|
|
|
|
|
"dtypes: bool(1), float64(3), int64(1), object(4)\n",
|
|
|
|
|
"memory usage: 713.0+ bytes\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_suppliers.info()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"execution_count": 12,
|
2024-01-13 10:38:10 +01:00
|
|
|
|
"id": "6d7f338e-e4d3-422b-9cdc-dec967c0b28e",
|
|
|
|
|
"metadata": {},
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"id 0.0\n",
|
|
|
|
|
"name 0.0\n",
|
|
|
|
|
"manually_added 0.0\n",
|
|
|
|
|
"label 100.0\n",
|
|
|
|
|
"itr 100.0\n",
|
|
|
|
|
"updated_at 0.0\n",
|
|
|
|
|
"created_at 0.0\n",
|
|
|
|
|
"commission 100.0\n",
|
|
|
|
|
"identifier 0.0\n",
|
|
|
|
|
"dtype: float64"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 12,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"source": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"df1_suppliers.isna().sum()/len(df1_suppliers)*100"
|
2024-01-13 10:38:10 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-01-13 14:14:11 +01:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 13,
|
2024-01-13 14:14:11 +01:00
|
|
|
|
"id": "3c645ab7-16bf-4054-9ae2-15a8c32e29c6",
|
|
|
|
|
"metadata": {},
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"/tmp/ipykernel_619/302783287.py:3: SettingWithCopyWarning: \n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
|
|
|
" df1_suppliers_clean.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"source": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"# Selection des variables\n",
|
|
|
|
|
"df1_suppliers_clean = df1_suppliers[['id', 'name']]\n",
|
|
|
|
|
"df1_suppliers_clean.rename(columns = {'name' : 'supplier_name'}, inplace = True)"
|
2024-01-13 10:38:10 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 14,
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"id": "4de7e2e2-6da4-4618-8444-b524399c5493",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>id</th>\n",
|
|
|
|
|
" <th>supplier_name</th>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>1617</td>\n",
|
|
|
|
|
" <td>j4 administration</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>8</td>\n",
|
|
|
|
|
" <td>non défini</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" <td>vad</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>1</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>fort saint jean</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>j4</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>5</th>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>revendeur</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>6</th>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" <td>3</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>7</th>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" <td>6</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>ccr</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>8</th>\n",
|
|
|
|
|
" <td>7</td>\n",
|
|
|
|
|
" <td>dab</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" id supplier_name\n",
|
|
|
|
|
"0 1617 j4 administration\n",
|
|
|
|
|
"1 8 non défini\n",
|
|
|
|
|
"2 4 vad\n",
|
|
|
|
|
"3 1 fort saint jean\n",
|
|
|
|
|
"4 2 j4\n",
|
|
|
|
|
"5 5 revendeur\n",
|
|
|
|
|
"6 3 vente en ligne\n",
|
|
|
|
|
"7 6 ccr\n",
|
|
|
|
|
"8 7 dab"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 14,
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"df1_suppliers_clean"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "0a6df975-c7fc-45bc-92af-a0bdab17d795",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"metadata": {
|
|
|
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
|
|
|
},
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"source": [
|
|
|
|
|
"## type_ofs.csv"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"execution_count": 15,
|
|
|
|
|
"id": "a02f6594-3e91-4e87-bbb6-649c28d4f7e9",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>id</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>name</th>\n",
|
|
|
|
|
" <th>children</th>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" <th>created_at</th>\n",
|
|
|
|
|
" <th>updated_at</th>\n",
|
|
|
|
|
" <th>identifier</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>1</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>Atelier</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-01-05 11:55:51.188106+01:00</td>\n",
|
|
|
|
|
" <td>2021-01-05 11:55:51.188106+01:00</td>\n",
|
|
|
|
|
" <td>623ec4067827558b28972cf39fe81ee7</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>Billet en nombre</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-01-11 12:13:19.286301+01:00</td>\n",
|
|
|
|
|
" <td>2021-01-11 12:13:19.286301+01:00</td>\n",
|
|
|
|
|
" <td>a53d313a97296ee37caa066dbfe7a45c</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>Groupe</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-01-11 12:19:22.842917+01:00</td>\n",
|
|
|
|
|
" <td>2021-01-11 12:19:22.842917+01:00</td>\n",
|
|
|
|
|
" <td>1ab143efc3b85acbbc752fe8eb2b0b86</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" <td>Revendeur</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-01-12 12:34:20.481236+01:00</td>\n",
|
|
|
|
|
" <td>2021-01-12 12:34:20.481236+01:00</td>\n",
|
|
|
|
|
" <td>8b332723366a07e1eef5f1c92f9ae067</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>Cinéma scolaire</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-01-25 19:16:05.141719+01:00</td>\n",
|
|
|
|
|
" <td>2021-01-25 19:16:05.141719+01:00</td>\n",
|
|
|
|
|
" <td>a12e62cb4c4f47e7406bd8fbff2bfe30</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>5</th>\n",
|
|
|
|
|
" <td>6</td>\n",
|
|
|
|
|
" <td>Musée famille</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-01-25 19:23:06.692627+01:00</td>\n",
|
|
|
|
|
" <td>2021-01-25 19:23:06.692627+01:00</td>\n",
|
|
|
|
|
" <td>1ec6c19283111ccb3ed67f52d414470e</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>6</th>\n",
|
|
|
|
|
" <td>7</td>\n",
|
|
|
|
|
" <td>Spectacle famille</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-01-25 19:28:21.390016+01:00</td>\n",
|
|
|
|
|
" <td>2021-01-25 19:28:21.390016+01:00</td>\n",
|
|
|
|
|
" <td>05e2104f1b74ced229c06847d6e91938</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>7</th>\n",
|
|
|
|
|
" <td>8</td>\n",
|
|
|
|
|
" <td>Masterclass</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-01-25 19:31:05.076904+01:00</td>\n",
|
|
|
|
|
" <td>2021-01-25 19:31:05.076904+01:00</td>\n",
|
|
|
|
|
" <td>9cc946edfb25e11b4282f58db16e6ae9</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>8</th>\n",
|
|
|
|
|
" <td>9</td>\n",
|
|
|
|
|
" <td>Spectacle</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-01-25 19:38:41.260535+01:00</td>\n",
|
|
|
|
|
" <td>2021-01-25 19:38:41.260535+01:00</td>\n",
|
|
|
|
|
" <td>d88321c347f0e0ab101184cdf25c94bf</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>9</th>\n",
|
|
|
|
|
" <td>10</td>\n",
|
|
|
|
|
" <td>Cinema</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-02-05 11:12:31.932576+01:00</td>\n",
|
|
|
|
|
" <td>2021-02-05 11:12:31.932576+01:00</td>\n",
|
|
|
|
|
" <td>0870fef2bfcd5b30a12e4f5c7f4aaba7</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>10</th>\n",
|
|
|
|
|
" <td>11</td>\n",
|
|
|
|
|
" <td>Musee</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2021-02-05 11:52:05.468207+01:00</td>\n",
|
|
|
|
|
" <td>2021-02-05 11:52:05.468207+01:00</td>\n",
|
|
|
|
|
" <td>8ba8934454cc62c7cdb3eb6e1b39df0c</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>11</th>\n",
|
|
|
|
|
" <td>12</td>\n",
|
|
|
|
|
" <td>Tarifs plein</td>\n",
|
|
|
|
|
" <td>category</td>\n",
|
|
|
|
|
" <td>2023-03-13 11:31:50.528331+01:00</td>\n",
|
|
|
|
|
" <td>2023-03-13 11:31:50.528331+01:00</td>\n",
|
|
|
|
|
" <td>a6969df76efc15d157be48e87a7bcf9a</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" id name children created_at \\\n",
|
|
|
|
|
"0 1 Atelier pricing_formula 2021-01-05 11:55:51.188106+01:00 \n",
|
|
|
|
|
"1 2 Billet en nombre pricing_formula 2021-01-11 12:13:19.286301+01:00 \n",
|
|
|
|
|
"2 3 Groupe pricing_formula 2021-01-11 12:19:22.842917+01:00 \n",
|
|
|
|
|
"3 4 Revendeur pricing_formula 2021-01-12 12:34:20.481236+01:00 \n",
|
|
|
|
|
"4 5 Cinéma scolaire pricing_formula 2021-01-25 19:16:05.141719+01:00 \n",
|
|
|
|
|
"5 6 Musée famille pricing_formula 2021-01-25 19:23:06.692627+01:00 \n",
|
|
|
|
|
"6 7 Spectacle famille pricing_formula 2021-01-25 19:28:21.390016+01:00 \n",
|
|
|
|
|
"7 8 Masterclass pricing_formula 2021-01-25 19:31:05.076904+01:00 \n",
|
|
|
|
|
"8 9 Spectacle pricing_formula 2021-01-25 19:38:41.260535+01:00 \n",
|
|
|
|
|
"9 10 Cinema pricing_formula 2021-02-05 11:12:31.932576+01:00 \n",
|
|
|
|
|
"10 11 Musee pricing_formula 2021-02-05 11:52:05.468207+01:00 \n",
|
|
|
|
|
"11 12 Tarifs plein category 2023-03-13 11:31:50.528331+01:00 \n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" updated_at identifier \n",
|
|
|
|
|
"0 2021-01-05 11:55:51.188106+01:00 623ec4067827558b28972cf39fe81ee7 \n",
|
|
|
|
|
"1 2021-01-11 12:13:19.286301+01:00 a53d313a97296ee37caa066dbfe7a45c \n",
|
|
|
|
|
"2 2021-01-11 12:19:22.842917+01:00 1ab143efc3b85acbbc752fe8eb2b0b86 \n",
|
|
|
|
|
"3 2021-01-12 12:34:20.481236+01:00 8b332723366a07e1eef5f1c92f9ae067 \n",
|
|
|
|
|
"4 2021-01-25 19:16:05.141719+01:00 a12e62cb4c4f47e7406bd8fbff2bfe30 \n",
|
|
|
|
|
"5 2021-01-25 19:23:06.692627+01:00 1ec6c19283111ccb3ed67f52d414470e \n",
|
|
|
|
|
"6 2021-01-25 19:28:21.390016+01:00 05e2104f1b74ced229c06847d6e91938 \n",
|
|
|
|
|
"7 2021-01-25 19:31:05.076904+01:00 9cc946edfb25e11b4282f58db16e6ae9 \n",
|
|
|
|
|
"8 2021-01-25 19:38:41.260535+01:00 d88321c347f0e0ab101184cdf25c94bf \n",
|
|
|
|
|
"9 2021-02-05 11:12:31.932576+01:00 0870fef2bfcd5b30a12e4f5c7f4aaba7 \n",
|
|
|
|
|
"10 2021-02-05 11:52:05.468207+01:00 8ba8934454cc62c7cdb3eb6e1b39df0c \n",
|
|
|
|
|
"11 2023-03-13 11:31:50.528331+01:00 a6969df76efc15d157be48e87a7bcf9a "
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"execution_count": 15,
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"df1_type_ofs"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"execution_count": 16,
|
|
|
|
|
"id": "e9c8d32b-22f4-4581-8af7-31cc1c31fa0e",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"RangeIndex: 12 entries, 0 to 11\n",
|
|
|
|
|
"Data columns (total 6 columns):\n",
|
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
|
" 0 id 12 non-null int64 \n",
|
|
|
|
|
" 1 name 12 non-null object\n",
|
|
|
|
|
" 2 children 12 non-null object\n",
|
|
|
|
|
" 3 created_at 12 non-null object\n",
|
|
|
|
|
" 4 updated_at 12 non-null object\n",
|
|
|
|
|
" 5 identifier 12 non-null object\n",
|
|
|
|
|
"dtypes: int64(1), object(5)\n",
|
|
|
|
|
"memory usage: 704.0+ bytes\n"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"df1_type_ofs.info()"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 17,
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"id": "cbb5e614-1fe5-4da0-bca0-8a242e0885da",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"/tmp/ipykernel_619/81842251.py:3: SettingWithCopyWarning: \n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
|
|
|
" df1_type_ofs_clean.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Selection des variables\n",
|
|
|
|
|
"df1_type_ofs_clean = df1_type_ofs[['id', 'name', 'children']]\n",
|
|
|
|
|
"df1_type_ofs_clean.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "676a9869-9a8b-4cd2-8b1c-0644b5229c72",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"metadata": {
|
|
|
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
|
|
|
},
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"source": [
|
|
|
|
|
"## purchases.csv"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 18,
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"id": "f8d36b72-f8e7-45e5-b4fa-e0803493fd3c",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"scrolled": true
|
|
|
|
|
},
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>id</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>purchase_date</th>\n",
|
|
|
|
|
" <th>customer_id</th>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" <th>created_at</th>\n",
|
|
|
|
|
" <th>updated_at</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>number</th>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" <th>identifier</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>5145662</td>\n",
|
|
|
|
|
" <td>2019-07-17 11:17:53+02:00</td>\n",
|
|
|
|
|
" <td>6632</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:48:51.569237+01:00</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:48:51.569237+01:00</td>\n",
|
|
|
|
|
" <td>fa80c83b29a268b45728c910a8afcf79</td>\n",
|
|
|
|
|
" <td>82877c41df26f832eb823a83acd1a172</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>4941642</td>\n",
|
|
|
|
|
" <td>2018-10-31 11:59:00+01:00</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" <td>1</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>2021-12-28 20:31:48.196681+01:00</td>\n",
|
|
|
|
|
" <td>2022-03-03 17:52:21.958861+01:00</td>\n",
|
|
|
|
|
" <td>597b6c06adfe6acc539b29b657b80da0</td>\n",
|
|
|
|
|
" <td>e7102ebe65526c427245533ebabe66e5</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>5088860</td>\n",
|
|
|
|
|
" <td>2018-10-31 12:45:12+01:00</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:46:34.703542+01:00</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:46:34.703542+01:00</td>\n",
|
|
|
|
|
" <td>4a7f6baaf9be6a99e3fead7f7e981fa8</td>\n",
|
|
|
|
|
" <td>af75c4ae53d1b6957875538355b162e1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>5088862</td>\n",
|
|
|
|
|
" <td>2018-10-31 13:07:12+01:00</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:46:34.704773+01:00</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:46:34.704773+01:00</td>\n",
|
|
|
|
|
" <td>1d83dfad44b73070d1c6d5875d0edd2d</td>\n",
|
|
|
|
|
" <td>4b2fe34659b177209b07270ae1043b40</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>5088863</td>\n",
|
|
|
|
|
" <td>2018-10-31 13:08:50+01:00</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:46:34.705453+01:00</td>\n",
|
|
|
|
|
" <td>2021-12-28 20:46:34.705453+01:00</td>\n",
|
|
|
|
|
" <td>7bfe2bc9c1670c973d0960e3fd408cf8</td>\n",
|
|
|
|
|
" <td>b115f04a99b94df9e4a32185844f0998</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>742245</th>\n",
|
|
|
|
|
" <td>8007695</td>\n",
|
|
|
|
|
" <td>2023-11-08 17:51:19+01:00</td>\n",
|
|
|
|
|
" <td>1256133</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:33.920187+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:33.920187+01:00</td>\n",
|
|
|
|
|
" <td>99ad774dedbad43feb73514765d2f0ba</td>\n",
|
|
|
|
|
" <td>d68558180b4bf2e8a945724843655775</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>742246</th>\n",
|
|
|
|
|
" <td>8007696</td>\n",
|
|
|
|
|
" <td>2023-11-08 18:17:51+01:00</td>\n",
|
|
|
|
|
" <td>1256134</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:33.921967+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:33.921967+01:00</td>\n",
|
|
|
|
|
" <td>c1511614c511c5f95980172690179102</td>\n",
|
|
|
|
|
" <td>f5102d910a7731091f239ad7b0df35b4</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>742247</th>\n",
|
|
|
|
|
" <td>8007697</td>\n",
|
|
|
|
|
" <td>2023-11-08 18:23:54+01:00</td>\n",
|
|
|
|
|
" <td>1256135</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:33.923034+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:33.923034+01:00</td>\n",
|
|
|
|
|
" <td>33b64b39cc53428b4f17d65ff5b93104</td>\n",
|
|
|
|
|
" <td>e2b917626be60cc2c3207cc037fe69e4</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>742248</th>\n",
|
|
|
|
|
" <td>8007698</td>\n",
|
|
|
|
|
" <td>2023-11-08 19:32:18+01:00</td>\n",
|
|
|
|
|
" <td>1256136</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:33.924135+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:33.924135+01:00</td>\n",
|
|
|
|
|
" <td>9ae0b129e704b3d9c093ce9c7c4e5039</td>\n",
|
|
|
|
|
" <td>5bfa23236c31f8562c3a0233c1b53b31</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>742249</th>\n",
|
|
|
|
|
" <td>8007699</td>\n",
|
|
|
|
|
" <td>2023-11-08 20:30:28+01:00</td>\n",
|
|
|
|
|
" <td>1256137</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:33.925382+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 07:51:33.925382+01:00</td>\n",
|
|
|
|
|
" <td>d31ced089c2b1f90479257a4686f9306</td>\n",
|
|
|
|
|
" <td>d86b1e0de3ff01eaf04fbcd031ac5fef</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"<p>742250 rows × 7 columns</p>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" id purchase_date customer_id \\\n",
|
|
|
|
|
"0 5145662 2019-07-17 11:17:53+02:00 6632 \n",
|
|
|
|
|
"1 4941642 2018-10-31 11:59:00+01:00 1 \n",
|
|
|
|
|
"2 5088860 2018-10-31 12:45:12+01:00 1 \n",
|
|
|
|
|
"3 5088862 2018-10-31 13:07:12+01:00 1 \n",
|
|
|
|
|
"4 5088863 2018-10-31 13:08:50+01:00 1 \n",
|
|
|
|
|
"... ... ... ... \n",
|
|
|
|
|
"742245 8007695 2023-11-08 17:51:19+01:00 1256133 \n",
|
|
|
|
|
"742246 8007696 2023-11-08 18:17:51+01:00 1256134 \n",
|
|
|
|
|
"742247 8007697 2023-11-08 18:23:54+01:00 1256135 \n",
|
|
|
|
|
"742248 8007698 2023-11-08 19:32:18+01:00 1256136 \n",
|
|
|
|
|
"742249 8007699 2023-11-08 20:30:28+01:00 1256137 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" created_at updated_at \\\n",
|
|
|
|
|
"0 2021-12-28 20:48:51.569237+01:00 2021-12-28 20:48:51.569237+01:00 \n",
|
|
|
|
|
"1 2021-12-28 20:31:48.196681+01:00 2022-03-03 17:52:21.958861+01:00 \n",
|
|
|
|
|
"2 2021-12-28 20:46:34.703542+01:00 2021-12-28 20:46:34.703542+01:00 \n",
|
|
|
|
|
"3 2021-12-28 20:46:34.704773+01:00 2021-12-28 20:46:34.704773+01:00 \n",
|
|
|
|
|
"4 2021-12-28 20:46:34.705453+01:00 2021-12-28 20:46:34.705453+01:00 \n",
|
|
|
|
|
"... ... ... \n",
|
|
|
|
|
"742245 2023-11-09 07:51:33.920187+01:00 2023-11-09 07:51:33.920187+01:00 \n",
|
|
|
|
|
"742246 2023-11-09 07:51:33.921967+01:00 2023-11-09 07:51:33.921967+01:00 \n",
|
|
|
|
|
"742247 2023-11-09 07:51:33.923034+01:00 2023-11-09 07:51:33.923034+01:00 \n",
|
|
|
|
|
"742248 2023-11-09 07:51:33.924135+01:00 2023-11-09 07:51:33.924135+01:00 \n",
|
|
|
|
|
"742249 2023-11-09 07:51:33.925382+01:00 2023-11-09 07:51:33.925382+01:00 \n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" number identifier \n",
|
|
|
|
|
"0 fa80c83b29a268b45728c910a8afcf79 82877c41df26f832eb823a83acd1a172 \n",
|
|
|
|
|
"1 597b6c06adfe6acc539b29b657b80da0 e7102ebe65526c427245533ebabe66e5 \n",
|
|
|
|
|
"2 4a7f6baaf9be6a99e3fead7f7e981fa8 af75c4ae53d1b6957875538355b162e1 \n",
|
|
|
|
|
"3 1d83dfad44b73070d1c6d5875d0edd2d 4b2fe34659b177209b07270ae1043b40 \n",
|
|
|
|
|
"4 7bfe2bc9c1670c973d0960e3fd408cf8 b115f04a99b94df9e4a32185844f0998 \n",
|
|
|
|
|
"... ... ... \n",
|
|
|
|
|
"742245 99ad774dedbad43feb73514765d2f0ba d68558180b4bf2e8a945724843655775 \n",
|
|
|
|
|
"742246 c1511614c511c5f95980172690179102 f5102d910a7731091f239ad7b0df35b4 \n",
|
|
|
|
|
"742247 33b64b39cc53428b4f17d65ff5b93104 e2b917626be60cc2c3207cc037fe69e4 \n",
|
|
|
|
|
"742248 9ae0b129e704b3d9c093ce9c7c4e5039 5bfa23236c31f8562c3a0233c1b53b31 \n",
|
|
|
|
|
"742249 d31ced089c2b1f90479257a4686f9306 d86b1e0de3ff01eaf04fbcd031ac5fef \n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"[742250 rows x 7 columns]"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 18,
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"df1_purchases"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 19,
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"id": "3f266a9d-6eee-4b27-b6cc-d401bc2fa0b8",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"RangeIndex: 742250 entries, 0 to 742249\n",
|
|
|
|
|
"Data columns (total 7 columns):\n",
|
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
|
" 0 id 742250 non-null int64 \n",
|
|
|
|
|
" 1 purchase_date 742250 non-null object\n",
|
|
|
|
|
" 2 customer_id 742250 non-null int64 \n",
|
|
|
|
|
" 3 created_at 742250 non-null object\n",
|
|
|
|
|
" 4 updated_at 742250 non-null object\n",
|
|
|
|
|
" 5 number 742250 non-null object\n",
|
|
|
|
|
" 6 identifier 742250 non-null object\n",
|
|
|
|
|
"dtypes: int64(2), object(5)\n",
|
|
|
|
|
"memory usage: 39.6+ MB\n"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"df1_purchases.info()"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 20,
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"id": "8b24ccbc-ccf0-4722-8cd9-8ee8aa90d1fd",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"outputs": [],
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"source": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"# Nettoyage purchase_date\n",
|
|
|
|
|
"df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], utc = True)\n",
|
|
|
|
|
"df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], format = 'ISO8601')"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 21,
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"id": "27d18584-228f-4698-85d6-4d23151ea5ed",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"RangeIndex: 742250 entries, 0 to 742249\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"Data columns (total 7 columns):\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
|
" 0 id 742250 non-null int64 \n",
|
|
|
|
|
" 1 purchase_date 742250 non-null datetime64[ns, UTC]\n",
|
|
|
|
|
" 2 customer_id 742250 non-null int64 \n",
|
|
|
|
|
" 3 created_at 742250 non-null object \n",
|
|
|
|
|
" 4 updated_at 742250 non-null object \n",
|
|
|
|
|
" 5 number 742250 non-null object \n",
|
|
|
|
|
" 6 identifier 742250 non-null object \n",
|
|
|
|
|
"dtypes: datetime64[ns, UTC](1), int64(2), object(4)\n",
|
|
|
|
|
"memory usage: 39.6+ MB\n"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"df1_purchases.info()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 22,
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"id": "ea22e3a2-2b25-481d-8ebc-194e11a06cd9",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Selection des variables\n",
|
|
|
|
|
"df1_purchases_clean = df1_purchases[['id', 'purchase_date', 'customer_id']]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "53227600-c1c5-48aa-9f5d-db5a23a8a22a",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Fusion de l'ensemble des données billétiques"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 23,
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"id": "e0b8b47a-b321-4a79-823c-36a131a78ac7",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Fusion avec fournisseurs\n",
|
|
|
|
|
"df1_ticket_information = pd.merge(df1_tickets_clean, df1_suppliers_clean, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
|
|
|
|
|
"df1_ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"# Fusion avec type de tickets\n",
|
|
|
|
|
"df1_ticket_information = pd.merge(df1_ticket_information, df1_type_ofs_clean, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
|
|
|
|
|
"df1_ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"# Fusion avec achats\n",
|
|
|
|
|
"df1_ticket_information = pd.merge(df1_ticket_information, df1_purchases_clean, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
|
|
|
|
|
"df1_ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"execution_count": 24,
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"id": "7572e6e7-f28d-43ba-b045-b9fa09e68e1d",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"metadata": {
|
|
|
|
|
"scrolled": true
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>ticket_id</th>\n",
|
|
|
|
|
" <th>product_id</th>\n",
|
|
|
|
|
" <th>is_from_subscription</th>\n",
|
|
|
|
|
" <th>supplier_name</th>\n",
|
|
|
|
|
" <th>type_of_ticket_name</th>\n",
|
|
|
|
|
" <th>children</th>\n",
|
|
|
|
|
" <th>purchase_date</th>\n",
|
|
|
|
|
" <th>customer_id</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>13070859</td>\n",
|
|
|
|
|
" <td>225251</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>Atelier</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|
|
|
|
" <td>48187</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>13070860</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>Atelier</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|
|
|
|
" <td>48187</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>13070861</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>Atelier</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|
|
|
|
" <td>48187</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>13070862</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>Atelier</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|
|
|
|
" <td>48187</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>13070863</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>Atelier</td>\n",
|
|
|
|
|
" <td>pricing_formula</td>\n",
|
|
|
|
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|
|
|
|
" <td>48187</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1826667</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>20662815</td>\n",
|
|
|
|
|
" <td>405689</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>Atelier</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" <td>pricing_formula</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>2023-11-08 17:23:54+00:00</td>\n",
|
|
|
|
|
" <td>1256135</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1826668</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>20662816</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>Atelier</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" <td>pricing_formula</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>2023-11-08 18:32:18+00:00</td>\n",
|
|
|
|
|
" <td>1256136</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1826669</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>20662817</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>Atelier</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" <td>pricing_formula</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>2023-11-08 18:32:18+00:00</td>\n",
|
|
|
|
|
" <td>1256136</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1826670</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>20662818</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>Atelier</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" <td>pricing_formula</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>2023-11-08 19:30:28+00:00</td>\n",
|
|
|
|
|
" <td>1256137</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1826671</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>20662819</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>Atelier</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" <td>pricing_formula</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>2023-11-08 19:30:28+00:00</td>\n",
|
|
|
|
|
" <td>1256137</td>\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>1826672 rows × 8 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" ticket_id product_id is_from_subscription supplier_name \\\n",
|
|
|
|
|
"0 13070859 225251 False vente en ligne \n",
|
|
|
|
|
"1 13070860 224914 False vente en ligne \n",
|
|
|
|
|
"2 13070861 224914 False vente en ligne \n",
|
|
|
|
|
"3 13070862 224914 False vente en ligne \n",
|
|
|
|
|
"4 13070863 224914 False vente en ligne \n",
|
|
|
|
|
"... ... ... ... ... \n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"1826667 20662815 405689 False vente en ligne \n",
|
|
|
|
|
"1826668 20662816 403658 False vente en ligne \n",
|
|
|
|
|
"1826669 20662817 403658 False vente en ligne \n",
|
|
|
|
|
"1826670 20662818 403658 False vente en ligne \n",
|
|
|
|
|
"1826671 20662819 403658 False vente en ligne \n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"\n",
|
|
|
|
|
" type_of_ticket_name children purchase_date \\\n",
|
|
|
|
|
"0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
|
|
|
|
|
"1 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
|
|
|
|
|
"2 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
|
|
|
|
|
"3 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
|
|
|
|
|
"4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
|
|
|
|
|
"... ... ... ... \n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"1826667 Atelier pricing_formula 2023-11-08 17:23:54+00:00 \n",
|
|
|
|
|
"1826668 Atelier pricing_formula 2023-11-08 18:32:18+00:00 \n",
|
|
|
|
|
"1826669 Atelier pricing_formula 2023-11-08 18:32:18+00:00 \n",
|
|
|
|
|
"1826670 Atelier pricing_formula 2023-11-08 19:30:28+00:00 \n",
|
|
|
|
|
"1826671 Atelier pricing_formula 2023-11-08 19:30:28+00:00 \n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"\n",
|
|
|
|
|
" customer_id \n",
|
|
|
|
|
"0 48187 \n",
|
|
|
|
|
"1 48187 \n",
|
|
|
|
|
"2 48187 \n",
|
|
|
|
|
"3 48187 \n",
|
|
|
|
|
"4 48187 \n",
|
|
|
|
|
"... ... \n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"1826667 1256135 \n",
|
|
|
|
|
"1826668 1256136 \n",
|
|
|
|
|
"1826669 1256136 \n",
|
|
|
|
|
"1826670 1256137 \n",
|
|
|
|
|
"1826671 1256137 \n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"\n",
|
|
|
|
|
"[1826672 rows x 8 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 24,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_ticket_information"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "ad2d0059-76d3-44b9-b0eb-0b0ca4d4ba75",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"# Utilisation de fonctions"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"execution_count": 50,
|
|
|
|
|
"id": "d237be96-8c86-4a91-b7a1-487e87a16c3d",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"def cleaning_date(df, column_name):\n",
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
" Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" Parameters:\n",
|
|
|
|
|
" - df: DataFrame\n",
|
|
|
|
|
" Le DataFrame contenant la colonne à nettoyer.\n",
|
|
|
|
|
" - column_name: str\n",
|
|
|
|
|
" Le nom de la colonne à nettoyer.\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" Returns:\n",
|
|
|
|
|
" - DataFrame\n",
|
|
|
|
|
" Le DataFrame modifié avec la colonne nettoyée.\n",
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
|
|
|
|
|
" return df"
|
2024-01-14 17:38:16 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"execution_count": 51,
|
|
|
|
|
"id": "c1afe322-ff41-4760-819e-0195fed5b27d",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"name": "stdout",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
"RangeIndex: 20 entries, 0 to 19\n",
|
|
|
|
|
"Data columns (total 2 columns):\n",
|
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
|
" 0 opened_at 8 non-null object \n",
|
|
|
|
|
" 1 opened_at_clean 8 non-null datetime64[ns, UTC]\n",
|
|
|
|
|
"dtypes: datetime64[ns, UTC](1), object(1)\n",
|
|
|
|
|
"memory usage: 448.0+ bytes\n"
|
2024-01-14 17:38:16 +01:00
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"# Créer un DataFrame exemple\n",
|
|
|
|
|
"df_not_clean = df1_campaign_stats[['opened_at']].head(20)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Appliquer la fonction pour nettoyer la colonne 'purchase_date' de manière vectorisée\n",
|
|
|
|
|
"df_clean = cleaning_date(df_not_clean, 'opened_at')\n",
|
|
|
|
|
"df_clean.rename(columns = {'opened_at' : 'opened_at_clean'}, inplace = True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"test = pd.concat([df1_campaign_stats[['opened_at']].head(20), df_clean], axis=1)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"test.info()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "27ecf058-23eb-4018-abbd-68c4ebe7c786",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Nettoyage, selection et fusion"
|
2024-01-14 17:38:16 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"execution_count": 23,
|
|
|
|
|
"id": "d887898c-6a21-41ed-901d-4d6fdbca5372",
|
|
|
|
|
"metadata": {},
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>ticket_id</th>\n",
|
|
|
|
|
" <th>product_id</th>\n",
|
|
|
|
|
" <th>is_from_subscription</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <th>type_of</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>supplier_name</th>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" <th>purchase_date</th>\n",
|
|
|
|
|
" <th>customer_id</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>13070859</td>\n",
|
|
|
|
|
" <td>225251</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>1</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|
|
|
|
" <td>48187</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>13070860</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>1</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|
|
|
|
" <td>48187</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>13070861</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>1</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|
|
|
|
" <td>48187</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>13070862</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>1</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|
|
|
|
" <td>48187</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>13070863</td>\n",
|
|
|
|
|
" <td>224914</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>1</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|
|
|
|
" <td>48187</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>1826667</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>20662815</td>\n",
|
|
|
|
|
" <td>405689</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>2023-11-08 17:23:54+00:00</td>\n",
|
|
|
|
|
" <td>1256135</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>1826668</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>20662816</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>2023-11-08 18:32:18+00:00</td>\n",
|
|
|
|
|
" <td>1256136</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>1826669</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>20662817</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>2023-11-08 18:32:18+00:00</td>\n",
|
|
|
|
|
" <td>1256136</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>1826670</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>20662818</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>2023-11-08 19:30:28+00:00</td>\n",
|
|
|
|
|
" <td>1256137</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <th>1826671</th>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>20662819</td>\n",
|
|
|
|
|
" <td>403658</td>\n",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
" <td>False</td>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>vente en ligne</td>\n",
|
|
|
|
|
" <td>2023-11-08 19:30:28+00:00</td>\n",
|
|
|
|
|
" <td>1256137</td>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"<p>1826672 rows × 7 columns</p>\n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" ticket_id product_id is_from_subscription type_of supplier_name \\\n",
|
|
|
|
|
"0 13070859 225251 False 1 vente en ligne \n",
|
|
|
|
|
"1 13070860 224914 False 1 vente en ligne \n",
|
|
|
|
|
"2 13070861 224914 False 1 vente en ligne \n",
|
|
|
|
|
"3 13070862 224914 False 1 vente en ligne \n",
|
|
|
|
|
"4 13070863 224914 False 1 vente en ligne \n",
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
"1826667 20662815 405689 False 1 vente en ligne \n",
|
|
|
|
|
"1826668 20662816 403658 False 1 vente en ligne \n",
|
|
|
|
|
"1826669 20662817 403658 False 1 vente en ligne \n",
|
|
|
|
|
"1826670 20662818 403658 False 1 vente en ligne \n",
|
|
|
|
|
"1826671 20662819 403658 False 1 vente en ligne \n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
" purchase_date customer_id \n",
|
|
|
|
|
"0 2018-12-28 14:47:50+00:00 48187 \n",
|
|
|
|
|
"1 2018-12-28 14:47:50+00:00 48187 \n",
|
|
|
|
|
"2 2018-12-28 14:47:50+00:00 48187 \n",
|
|
|
|
|
"3 2018-12-28 14:47:50+00:00 48187 \n",
|
|
|
|
|
"4 2018-12-28 14:47:50+00:00 48187 \n",
|
|
|
|
|
"... ... ... \n",
|
|
|
|
|
"1826667 2023-11-08 17:23:54+00:00 1256135 \n",
|
|
|
|
|
"1826668 2023-11-08 18:32:18+00:00 1256136 \n",
|
|
|
|
|
"1826669 2023-11-08 18:32:18+00:00 1256136 \n",
|
|
|
|
|
"1826670 2023-11-08 19:30:28+00:00 1256137 \n",
|
|
|
|
|
"1826671 2023-11-08 19:30:28+00:00 1256137 \n",
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"\n",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"[1826672 rows x 7 columns]"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"execution_count": 23,
|
2024-01-10 19:19:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"df1_ticket_information"
|
2024-01-10 19:19:51 +01:00
|
|
|
|
]
|
2024-01-13 14:47:24 +01:00
|
|
|
|
},
|
2024-02-04 16:02:01 +01:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 14,
|
|
|
|
|
"id": "ac9a6373-c1c6-46b5-873b-dc22f17bcbdb",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
"RangeIndex: 1826672 entries, 0 to 1826671\n",
|
|
|
|
|
"Data columns (total 7 columns):\n",
|
|
|
|
|
" # Column Dtype \n",
|
|
|
|
|
"--- ------ ----- \n",
|
|
|
|
|
" 0 ticket_id int64 \n",
|
|
|
|
|
" 1 product_id int64 \n",
|
|
|
|
|
" 2 is_from_subscription bool \n",
|
|
|
|
|
" 3 type_of int64 \n",
|
|
|
|
|
" 4 supplier_name object \n",
|
|
|
|
|
" 5 purchase_date datetime64[ns, UTC]\n",
|
|
|
|
|
" 6 customer_id int64 \n",
|
|
|
|
|
"dtypes: bool(1), datetime64[ns, UTC](1), int64(4), object(1)\n",
|
|
|
|
|
"memory usage: 85.4+ MB\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_ticket_information.info()"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-01-14 17:38:16 +01:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"id": "b1719943-89eb-4ba0-a107-2f96d5d01ec9",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"# Customer information"
|
2024-01-14 17:38:16 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "a2132ee2-3f22-45fd-b65b-72689c8b672c",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"## Target area"
|
2024-01-14 17:38:16 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"execution_count": 60,
|
|
|
|
|
"id": "da5d4708-7147-4cc8-8686-52d4bcba5a7a",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"metadata": {},
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"/tmp/ipykernel_619/2625134041.py:3: SettingWithCopyWarning: \n",
|
|
|
|
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
|
|
|
" df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"source": [
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"# Target.csv cleaning\n",
|
|
|
|
|
"df1_targets_clean = df1_targets[[\"id\", \"target_type_id\", \"name\"]]\n",
|
|
|
|
|
"df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# target_type cleaning\n",
|
|
|
|
|
"df1_target_types_clean = df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"#customer_target_mappings cleaning\n",
|
|
|
|
|
"df1_customer_target_mappings_clean = df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Merge target et target_type\n",
|
|
|
|
|
"df1_targets_full = pd.merge(df1_targets_clean, df1_target_types_clean, left_on='target_type_id', right_on='target_type_id', how='inner')\n",
|
|
|
|
|
"df1_targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Merge\n",
|
|
|
|
|
"df1_targets_full = pd.merge(df1_customer_target_mappings_clean, df1_targets_full, left_on='target_id', right_on='target_id', how='inner')\n",
|
|
|
|
|
"df1_targets_full.drop(['target_id'], axis = 1, inplace=True)"
|
2024-01-14 17:38:16 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"execution_count": 62,
|
|
|
|
|
"id": "b4fa5fe3-ce8e-4b0a-af94-fb468d241bad",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"text/plain": [
|
|
|
|
|
"id 5.080902\n",
|
|
|
|
|
"dtype: float64"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 62,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_targets_test = df1_targets_full[['id', 'customer_id']].groupby(['customer_id']).count()\n",
|
|
|
|
|
"len(df1_targets_test[df1_targets_test['id'] > 1]) / len(df1_targets_test)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# 99,6% des 151 000 client visés sont catégorisés plusieurs fois et en moyenne 5 fois... \n",
|
|
|
|
|
"df1_targets_test.mean()\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 57,
|
|
|
|
|
"id": "8072bbb7-1360-4882-bb2b-2f43b6beea0d",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"scrolled": true
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>id</th>\n",
|
|
|
|
|
" <th>customer_id</th>\n",
|
|
|
|
|
" <th>target_name</th>\n",
|
|
|
|
|
" <th>target_type_is_import</th>\n",
|
|
|
|
|
" <th>target_type_name</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>8793</th>\n",
|
|
|
|
|
" <td>4584599</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>consentement optin jeune public</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>13249</th>\n",
|
|
|
|
|
" <td>4567465</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>DDCP rentrée culturelle 2023</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>21424</th>\n",
|
|
|
|
|
" <td>4544805</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>spectateurs cine dimanche_cine concert_2122</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>21665</th>\n",
|
|
|
|
|
" <td>4544911</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>DDCP Cine 2023</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>22811</th>\n",
|
|
|
|
|
" <td>4545766</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>DDCP OLBJ! 2023</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>57305</th>\n",
|
|
|
|
|
" <td>4457909</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>ddcp_promo_visiteurs occasionnels_musee_8mois</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_dynamic_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>58843</th>\n",
|
|
|
|
|
" <td>3688872</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>DDCP promo livemag</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>66813</th>\n",
|
|
|
|
|
" <td>4313646</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>DDCP spectateurs Classique mais pas que 2022</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>68367</th>\n",
|
|
|
|
|
" <td>4547662</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>ddcp_promo_musee_au moins 3 achats_dps8mois</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_dynamic_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>77320</th>\n",
|
|
|
|
|
" <td>4285520</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>DDCP spectateurs Iminente</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>84350</th>\n",
|
|
|
|
|
" <td>4037805</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>DDCP spectateurs Marseille Jazz 18-19-21</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>85383</th>\n",
|
|
|
|
|
" <td>4569504</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>DDCP rendez-vous de septembre offre spéciale</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>92868</th>\n",
|
|
|
|
|
" <td>4433064</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>ddcp_promo_plein air_ateliers_jardins</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>99670</th>\n",
|
|
|
|
|
" <td>3858684</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>Acid Arab</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>105477</th>\n",
|
|
|
|
|
" <td>4321810</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>Arenametrix_bascule tel vers sib</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>169513</th>\n",
|
|
|
|
|
" <td>3697992</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>ddcp_achats billets nb dps 19052021</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>214421</th>\n",
|
|
|
|
|
" <td>2925324</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>consentement optout scolaires</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>234546</th>\n",
|
|
|
|
|
" <td>4575957</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>Portrait de Leila shahid</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>259808</th>\n",
|
|
|
|
|
" <td>3722259</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>consentement optin b2b</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>274380</th>\n",
|
|
|
|
|
" <td>4510423</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>DDCP_marseille_jazz_2023</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>307511</th>\n",
|
|
|
|
|
" <td>5174466</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>ddcp actoral 21-22</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>357509</th>\n",
|
|
|
|
|
" <td>4442526</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>ddcp musique barvalo</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>392920</th>\n",
|
|
|
|
|
" <td>4390642</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>ddcp_md_promo_spectateurs theatre contempo</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>449620</th>\n",
|
|
|
|
|
" <td>4411897</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>FORMATION _ acheteurs optin last year</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_dynamic_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>503809</th>\n",
|
|
|
|
|
" <td>4734591</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>consentement optin mediation specialisee</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>651222</th>\n",
|
|
|
|
|
" <td>3554426</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>consentement optin b2c</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>654246</th>\n",
|
|
|
|
|
" <td>5182212</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>DDCP spectateurs Festival de Marseille 2023</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>654395</th>\n",
|
|
|
|
|
" <td>5182456</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>rencontres_echelle_spectateurs_2021_2023</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>manual_static_filter</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" id customer_id target_name \\\n",
|
|
|
|
|
"8793 4584599 1 consentement optin jeune public \n",
|
|
|
|
|
"13249 4567465 1 DDCP rentrée culturelle 2023 \n",
|
|
|
|
|
"21424 4544805 1 spectateurs cine dimanche_cine concert_2122 \n",
|
|
|
|
|
"21665 4544911 1 DDCP Cine 2023 \n",
|
|
|
|
|
"22811 4545766 1 DDCP OLBJ! 2023 \n",
|
|
|
|
|
"57305 4457909 1 ddcp_promo_visiteurs occasionnels_musee_8mois \n",
|
|
|
|
|
"58843 3688872 1 DDCP promo livemag \n",
|
|
|
|
|
"66813 4313646 1 DDCP spectateurs Classique mais pas que 2022 \n",
|
|
|
|
|
"68367 4547662 1 ddcp_promo_musee_au moins 3 achats_dps8mois \n",
|
|
|
|
|
"77320 4285520 1 DDCP spectateurs Iminente \n",
|
|
|
|
|
"84350 4037805 1 DDCP spectateurs Marseille Jazz 18-19-21 \n",
|
|
|
|
|
"85383 4569504 1 DDCP rendez-vous de septembre offre spéciale \n",
|
|
|
|
|
"92868 4433064 1 ddcp_promo_plein air_ateliers_jardins \n",
|
|
|
|
|
"99670 3858684 1 Acid Arab \n",
|
|
|
|
|
"105477 4321810 1 Arenametrix_bascule tel vers sib \n",
|
|
|
|
|
"169513 3697992 1 ddcp_achats billets nb dps 19052021 \n",
|
|
|
|
|
"214421 2925324 1 consentement optout scolaires \n",
|
|
|
|
|
"234546 4575957 1 Portrait de Leila shahid \n",
|
|
|
|
|
"259808 3722259 1 consentement optin b2b \n",
|
|
|
|
|
"274380 4510423 1 DDCP_marseille_jazz_2023 \n",
|
|
|
|
|
"307511 5174466 1 ddcp actoral 21-22 \n",
|
|
|
|
|
"357509 4442526 1 ddcp musique barvalo \n",
|
|
|
|
|
"392920 4390642 1 ddcp_md_promo_spectateurs theatre contempo \n",
|
|
|
|
|
"449620 4411897 1 FORMATION _ acheteurs optin last year \n",
|
|
|
|
|
"503809 4734591 1 consentement optin mediation specialisee \n",
|
|
|
|
|
"651222 3554426 1 consentement optin b2c \n",
|
|
|
|
|
"654246 5182212 1 DDCP spectateurs Festival de Marseille 2023 \n",
|
|
|
|
|
"654395 5182456 1 rencontres_echelle_spectateurs_2021_2023 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" target_type_is_import target_type_name \n",
|
|
|
|
|
"8793 False manual_static_filter \n",
|
|
|
|
|
"13249 False manual_static_filter \n",
|
|
|
|
|
"21424 False manual_static_filter \n",
|
|
|
|
|
"21665 False manual_static_filter \n",
|
|
|
|
|
"22811 False manual_static_filter \n",
|
|
|
|
|
"57305 False manual_dynamic_filter \n",
|
|
|
|
|
"58843 False manual_static_filter \n",
|
|
|
|
|
"66813 False manual_static_filter \n",
|
|
|
|
|
"68367 False manual_dynamic_filter \n",
|
|
|
|
|
"77320 False manual_static_filter \n",
|
|
|
|
|
"84350 False manual_static_filter \n",
|
|
|
|
|
"85383 False manual_static_filter \n",
|
|
|
|
|
"92868 False manual_static_filter \n",
|
|
|
|
|
"99670 False manual_static_filter \n",
|
|
|
|
|
"105477 False manual_static_filter \n",
|
|
|
|
|
"169513 False manual_static_filter \n",
|
|
|
|
|
"214421 False manual_static_filter \n",
|
|
|
|
|
"234546 False manual_static_filter \n",
|
|
|
|
|
"259808 False manual_static_filter \n",
|
|
|
|
|
"274380 False manual_static_filter \n",
|
|
|
|
|
"307511 False manual_static_filter \n",
|
|
|
|
|
"357509 False manual_static_filter \n",
|
|
|
|
|
"392920 False manual_static_filter \n",
|
|
|
|
|
"449620 False manual_dynamic_filter \n",
|
|
|
|
|
"503809 False manual_static_filter \n",
|
|
|
|
|
"651222 False manual_static_filter \n",
|
|
|
|
|
"654246 False manual_static_filter \n",
|
|
|
|
|
"654395 False manual_static_filter "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 57,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_targets_full[df1_targets_full['customer_id'] == 1]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "2f665824-a026-4acd-8358-b408a61854b4",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Campaign area"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 52,
|
|
|
|
|
"id": "5d05203c-ea30-4208-a29f-fef7737c672e",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n",
|
|
|
|
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|
|
|
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
|
|
|
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
|
|
|
|
|
"/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n",
|
|
|
|
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|
|
|
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
|
|
|
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
|
|
|
|
|
"/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n",
|
|
|
|
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|
|
|
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
|
|
|
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# campaign_stats cleaning \n",
|
|
|
|
|
"df1_campaign_stats_clean = df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
|
|
|
|
|
"cleaning_date(df1_campaign_stats_clean, 'opened_at')\n",
|
|
|
|
|
"cleaning_date(df1_campaign_stats_clean, 'sent_at')\n",
|
|
|
|
|
"cleaning_date(df1_campaign_stats_clean, 'delivered_at')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# campaigns cleaning\n",
|
|
|
|
|
"df1_campaigns_clean = df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n",
|
|
|
|
|
"cleaning_date(df1_campaigns_clean, 'campaign_sent_at')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Merge \n",
|
|
|
|
|
"df1_campaigns_full = pd.merge(df1_campaign_stats_clean, df1_campaigns_clean, on = \"campaign_id\", how = \"left\")\n",
|
|
|
|
|
"df1_campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 53,
|
|
|
|
|
"id": "8ac634cf-2a30-4ccc-a34d-0fd401a49aaa",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
"RangeIndex: 6214808 entries, 0 to 6214807\n",
|
|
|
|
|
"Data columns (total 8 columns):\n",
|
|
|
|
|
" # Column Dtype \n",
|
|
|
|
|
"--- ------ ----- \n",
|
|
|
|
|
" 0 id int64 \n",
|
|
|
|
|
" 1 customer_id int64 \n",
|
|
|
|
|
" 2 opened_at datetime64[ns, UTC]\n",
|
|
|
|
|
" 3 sent_at datetime64[ns, UTC]\n",
|
|
|
|
|
" 4 delivered_at datetime64[ns, UTC]\n",
|
|
|
|
|
" 5 campaign_name object \n",
|
|
|
|
|
" 6 campaign_service_id int64 \n",
|
|
|
|
|
" 7 campaign_sent_at datetime64[ns, UTC]\n",
|
|
|
|
|
"dtypes: datetime64[ns, UTC](4), int64(3), object(1)\n",
|
|
|
|
|
"memory usage: 379.3+ MB\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_campaigns_full.info()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 56,
|
|
|
|
|
"id": "7d22cdd5-2060-4922-8e04-27b613d4ee27",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>id</th>\n",
|
|
|
|
|
" <th>customer_id</th>\n",
|
|
|
|
|
" <th>opened_at</th>\n",
|
|
|
|
|
" <th>sent_at</th>\n",
|
|
|
|
|
" <th>delivered_at</th>\n",
|
|
|
|
|
" <th>campaign_name</th>\n",
|
|
|
|
|
" <th>campaign_service_id</th>\n",
|
|
|
|
|
" <th>campaign_sent_at</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>19793</td>\n",
|
|
|
|
|
" <td>112597</td>\n",
|
|
|
|
|
" <td>NaT</td>\n",
|
|
|
|
|
" <td>2021-03-28 16:01:09+00:00</td>\n",
|
|
|
|
|
" <td>2021-03-28 16:24:18+00:00</td>\n",
|
|
|
|
|
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
|
|
|
|
|
" <td>404</td>\n",
|
|
|
|
|
" <td>2021-03-27 23:00:00+00:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>14211</td>\n",
|
|
|
|
|
" <td>113666</td>\n",
|
|
|
|
|
" <td>NaT</td>\n",
|
|
|
|
|
" <td>2021-03-28 16:01:09+00:00</td>\n",
|
|
|
|
|
" <td>2021-03-28 16:21:02+00:00</td>\n",
|
|
|
|
|
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
|
|
|
|
|
" <td>404</td>\n",
|
|
|
|
|
" <td>2021-03-27 23:00:00+00:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>13150</td>\n",
|
|
|
|
|
" <td>280561</td>\n",
|
|
|
|
|
" <td>NaT</td>\n",
|
|
|
|
|
" <td>2021-03-28 16:00:59+00:00</td>\n",
|
|
|
|
|
" <td>2021-03-28 16:08:45+00:00</td>\n",
|
|
|
|
|
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
|
|
|
|
|
" <td>404</td>\n",
|
|
|
|
|
" <td>2021-03-27 23:00:00+00:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>7073</td>\n",
|
|
|
|
|
" <td>101007</td>\n",
|
|
|
|
|
" <td>2021-03-28 18:11:06+00:00</td>\n",
|
|
|
|
|
" <td>2021-03-28 16:00:59+00:00</td>\n",
|
|
|
|
|
" <td>2021-03-28 16:09:47+00:00</td>\n",
|
|
|
|
|
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
|
|
|
|
|
" <td>404</td>\n",
|
|
|
|
|
" <td>2021-03-27 23:00:00+00:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>5175</td>\n",
|
|
|
|
|
" <td>103972</td>\n",
|
|
|
|
|
" <td>NaT</td>\n",
|
|
|
|
|
" <td>2021-03-28 16:01:06+00:00</td>\n",
|
|
|
|
|
" <td>2021-03-28 16:05:03+00:00</td>\n",
|
|
|
|
|
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
|
|
|
|
|
" <td>404</td>\n",
|
|
|
|
|
" <td>2021-03-27 23:00:00+00:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>6214803</th>\n",
|
|
|
|
|
" <td>8302994</td>\n",
|
|
|
|
|
" <td>266155</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:43:25+00:00</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:32:33+00:00</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:32:34+00:00</td>\n",
|
|
|
|
|
" <td>dre_nov_2023</td>\n",
|
|
|
|
|
" <td>1318</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:31:17+00:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>6214804</th>\n",
|
|
|
|
|
" <td>8303307</td>\n",
|
|
|
|
|
" <td>21355</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:44:02+00:00</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:32:49+00:00</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:32:49+00:00</td>\n",
|
|
|
|
|
" <td>dre_nov_2023</td>\n",
|
|
|
|
|
" <td>1318</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:31:17+00:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>6214805</th>\n",
|
|
|
|
|
" <td>8304346</td>\n",
|
|
|
|
|
" <td>21849</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:45:52+00:00</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:33:28+00:00</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:33:29+00:00</td>\n",
|
|
|
|
|
" <td>dre_nov_2023</td>\n",
|
|
|
|
|
" <td>1318</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:31:17+00:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>6214806</th>\n",
|
|
|
|
|
" <td>8302037</td>\n",
|
|
|
|
|
" <td>667789</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:47:32+00:00</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:31:53+00:00</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:31:54+00:00</td>\n",
|
|
|
|
|
" <td>dre_nov_2023</td>\n",
|
|
|
|
|
" <td>1318</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:31:17+00:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>6214807</th>\n",
|
|
|
|
|
" <td>8304939</td>\n",
|
|
|
|
|
" <td>294154</td>\n",
|
|
|
|
|
" <td>NaT</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:33:54+00:00</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:33:55+00:00</td>\n",
|
|
|
|
|
" <td>dre_nov_2023</td>\n",
|
|
|
|
|
" <td>1318</td>\n",
|
|
|
|
|
" <td>2023-10-23 09:31:17+00:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>6214808 rows × 8 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" id customer_id opened_at \\\n",
|
|
|
|
|
"0 19793 112597 NaT \n",
|
|
|
|
|
"1 14211 113666 NaT \n",
|
|
|
|
|
"2 13150 280561 NaT \n",
|
|
|
|
|
"3 7073 101007 2021-03-28 18:11:06+00:00 \n",
|
|
|
|
|
"4 5175 103972 NaT \n",
|
|
|
|
|
"... ... ... ... \n",
|
|
|
|
|
"6214803 8302994 266155 2023-10-23 09:43:25+00:00 \n",
|
|
|
|
|
"6214804 8303307 21355 2023-10-23 09:44:02+00:00 \n",
|
|
|
|
|
"6214805 8304346 21849 2023-10-23 09:45:52+00:00 \n",
|
|
|
|
|
"6214806 8302037 667789 2023-10-23 09:47:32+00:00 \n",
|
|
|
|
|
"6214807 8304939 294154 NaT \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" sent_at delivered_at \\\n",
|
|
|
|
|
"0 2021-03-28 16:01:09+00:00 2021-03-28 16:24:18+00:00 \n",
|
|
|
|
|
"1 2021-03-28 16:01:09+00:00 2021-03-28 16:21:02+00:00 \n",
|
|
|
|
|
"2 2021-03-28 16:00:59+00:00 2021-03-28 16:08:45+00:00 \n",
|
|
|
|
|
"3 2021-03-28 16:00:59+00:00 2021-03-28 16:09:47+00:00 \n",
|
|
|
|
|
"4 2021-03-28 16:01:06+00:00 2021-03-28 16:05:03+00:00 \n",
|
|
|
|
|
"... ... ... \n",
|
|
|
|
|
"6214803 2023-10-23 09:32:33+00:00 2023-10-23 09:32:34+00:00 \n",
|
|
|
|
|
"6214804 2023-10-23 09:32:49+00:00 2023-10-23 09:32:49+00:00 \n",
|
|
|
|
|
"6214805 2023-10-23 09:33:28+00:00 2023-10-23 09:33:29+00:00 \n",
|
|
|
|
|
"6214806 2023-10-23 09:31:53+00:00 2023-10-23 09:31:54+00:00 \n",
|
|
|
|
|
"6214807 2023-10-23 09:33:54+00:00 2023-10-23 09:33:55+00:00 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" campaign_name campaign_service_id \\\n",
|
|
|
|
|
"0 Le Mucem chez vous, gardons le lien #22 404 \n",
|
|
|
|
|
"1 Le Mucem chez vous, gardons le lien #22 404 \n",
|
|
|
|
|
"2 Le Mucem chez vous, gardons le lien #22 404 \n",
|
|
|
|
|
"3 Le Mucem chez vous, gardons le lien #22 404 \n",
|
|
|
|
|
"4 Le Mucem chez vous, gardons le lien #22 404 \n",
|
|
|
|
|
"... ... ... \n",
|
|
|
|
|
"6214803 dre_nov_2023 1318 \n",
|
|
|
|
|
"6214804 dre_nov_2023 1318 \n",
|
|
|
|
|
"6214805 dre_nov_2023 1318 \n",
|
|
|
|
|
"6214806 dre_nov_2023 1318 \n",
|
|
|
|
|
"6214807 dre_nov_2023 1318 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" campaign_sent_at \n",
|
|
|
|
|
"0 2021-03-27 23:00:00+00:00 \n",
|
|
|
|
|
"1 2021-03-27 23:00:00+00:00 \n",
|
|
|
|
|
"2 2021-03-27 23:00:00+00:00 \n",
|
|
|
|
|
"3 2021-03-27 23:00:00+00:00 \n",
|
|
|
|
|
"4 2021-03-27 23:00:00+00:00 \n",
|
|
|
|
|
"... ... \n",
|
|
|
|
|
"6214803 2023-10-23 09:31:17+00:00 \n",
|
|
|
|
|
"6214804 2023-10-23 09:31:17+00:00 \n",
|
|
|
|
|
"6214805 2023-10-23 09:31:17+00:00 \n",
|
|
|
|
|
"6214806 2023-10-23 09:31:17+00:00 \n",
|
|
|
|
|
"6214807 2023-10-23 09:31:17+00:00 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[6214808 rows x 8 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 56,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_campaigns_information"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "0a5b24f0-4bca-4cde-a6ba-eb130b38cac4",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Link area"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 37,
|
|
|
|
|
"id": "bc63bc4e-6cc1-4d35-9635-faf55339e186",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>id</th>\n",
|
|
|
|
|
" <th>name</th>\n",
|
|
|
|
|
" <th>service_id</th>\n",
|
|
|
|
|
" <th>created_at</th>\n",
|
|
|
|
|
" <th>updated_at</th>\n",
|
|
|
|
|
" <th>process_id</th>\n",
|
|
|
|
|
" <th>report_url</th>\n",
|
|
|
|
|
" <th>category</th>\n",
|
|
|
|
|
" <th>to_be_synced</th>\n",
|
|
|
|
|
" <th>identifier</th>\n",
|
|
|
|
|
" <th>sent_at</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>1319613</td>\n",
|
|
|
|
|
" <td>newsletter enseignants janvier 2022</td>\n",
|
|
|
|
|
" <td>721</td>\n",
|
|
|
|
|
" <td>2022-01-14 16:06:42.586321+01:00</td>\n",
|
|
|
|
|
" <td>2022-02-03 14:17:27.112963+01:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>aba3b6fd5d186d28e06ff97135cade7f</td>\n",
|
|
|
|
|
" <td>2022-01-14 00:00:00+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>1319586</td>\n",
|
|
|
|
|
" <td>lsf_janvier_2022</td>\n",
|
|
|
|
|
" <td>717</td>\n",
|
|
|
|
|
" <td>2022-01-07 11:30:35.315895+01:00</td>\n",
|
|
|
|
|
" <td>2022-02-03 14:17:27.116171+01:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>788d986905533aba051261497ecffcbb</td>\n",
|
|
|
|
|
" <td>2022-01-07 00:00:00+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>1319282</td>\n",
|
|
|
|
|
" <td>Invitation à déjeuner au Mucem | Vernissage « ...</td>\n",
|
|
|
|
|
" <td>591</td>\n",
|
|
|
|
|
" <td>2021-09-28 12:50:24.448752+02:00</td>\n",
|
|
|
|
|
" <td>2022-02-03 14:17:27.119582+01:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>3493894fa4ea036cfc6433c3e2ee63b0</td>\n",
|
|
|
|
|
" <td>2021-09-28 00:00:00+02:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>1319283</td>\n",
|
|
|
|
|
" <td>Vacances de la Toussaint - centres des loisirs</td>\n",
|
|
|
|
|
" <td>590</td>\n",
|
|
|
|
|
" <td>2021-09-28 18:01:04.692073+02:00</td>\n",
|
|
|
|
|
" <td>2022-02-03 14:17:27.124408+01:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>08b255a5d42b89b0585260b6f2360bdd</td>\n",
|
|
|
|
|
" <td>2021-09-28 00:00:00+02:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>1319636</td>\n",
|
|
|
|
|
" <td>ddcp_promo_md_livemag</td>\n",
|
|
|
|
|
" <td>730</td>\n",
|
|
|
|
|
" <td>2022-01-27 18:00:41.053069+01:00</td>\n",
|
|
|
|
|
" <td>2022-02-03 14:17:27.127607+01:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>d5cfead94f5350c12c322b5b664544c1</td>\n",
|
|
|
|
|
" <td>2022-01-27 00:00:00+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>952</th>\n",
|
|
|
|
|
" <td>1320072</td>\n",
|
|
|
|
|
" <td>dre_gaza0106</td>\n",
|
|
|
|
|
" <td>881</td>\n",
|
|
|
|
|
" <td>2022-05-26 09:01:35.523639+02:00</td>\n",
|
|
|
|
|
" <td>2022-12-02 17:51:22.614046+01:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>7504adad8bb96320eb3afdd4df6e1f60</td>\n",
|
|
|
|
|
" <td>2022-05-26 00:00:00+02:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>953</th>\n",
|
|
|
|
|
" <td>661398</td>\n",
|
|
|
|
|
" <td>DDCP Plan Bis 4 - Marketing direct - MJ5C</td>\n",
|
|
|
|
|
" <td>183</td>\n",
|
|
|
|
|
" <td>2021-06-18 10:30:01.259578+02:00</td>\n",
|
|
|
|
|
" <td>2021-09-24 11:56:09.082785+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>cedebb6e872f539bef8c3f919874e9d7</td>\n",
|
|
|
|
|
" <td>2020-07-27 00:00:00+02:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>954</th>\n",
|
|
|
|
|
" <td>1320487</td>\n",
|
|
|
|
|
" <td>Invitation portes ouvertes amitiés</td>\n",
|
|
|
|
|
" <td>988</td>\n",
|
|
|
|
|
" <td>2022-09-29 18:01:33.834090+02:00</td>\n",
|
|
|
|
|
" <td>2022-12-02 17:51:23.258324+01:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>9908279ebbf1f9b250ba689db6a0222b</td>\n",
|
|
|
|
|
" <td>2022-09-29 00:00:00+02:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>955</th>\n",
|
|
|
|
|
" <td>906903</td>\n",
|
|
|
|
|
" <td>DDCP PROMO La méditerranée des philosophes #3 ...</td>\n",
|
|
|
|
|
" <td>310</td>\n",
|
|
|
|
|
" <td>2021-07-19 14:07:16.177390+02:00</td>\n",
|
|
|
|
|
" <td>2021-09-24 11:56:09.086101+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>06eb61b839a0cefee4967c67ccb099dc</td>\n",
|
|
|
|
|
" <td>2020-12-23 00:00:00+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>956</th>\n",
|
|
|
|
|
" <td>579313</td>\n",
|
|
|
|
|
" <td>ddcp_promo_automation_manuel_pre_visit</td>\n",
|
|
|
|
|
" <td>481</td>\n",
|
|
|
|
|
" <td>2021-06-08 17:38:54.041310+02:00</td>\n",
|
|
|
|
|
" <td>2021-09-24 11:56:09.089394+02:00</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>9461cce28ebe3e76fb4b931c35a169b0</td>\n",
|
|
|
|
|
" <td>2021-06-08 00:00:00+02:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>957 rows × 11 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" id name service_id \\\n",
|
|
|
|
|
"0 1319613 newsletter enseignants janvier 2022 721 \n",
|
|
|
|
|
"1 1319586 lsf_janvier_2022 717 \n",
|
|
|
|
|
"2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n",
|
|
|
|
|
"3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n",
|
|
|
|
|
"4 1319636 ddcp_promo_md_livemag 730 \n",
|
|
|
|
|
".. ... ... ... \n",
|
|
|
|
|
"952 1320072 dre_gaza0106 881 \n",
|
|
|
|
|
"953 661398 DDCP Plan Bis 4 - Marketing direct - MJ5C 183 \n",
|
|
|
|
|
"954 1320487 Invitation portes ouvertes amitiés 988 \n",
|
|
|
|
|
"955 906903 DDCP PROMO La méditerranée des philosophes #3 ... 310 \n",
|
|
|
|
|
"956 579313 ddcp_promo_automation_manuel_pre_visit 481 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" created_at updated_at \\\n",
|
|
|
|
|
"0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n",
|
|
|
|
|
"1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n",
|
|
|
|
|
"2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n",
|
|
|
|
|
"3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n",
|
|
|
|
|
"4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n",
|
|
|
|
|
".. ... ... \n",
|
|
|
|
|
"952 2022-05-26 09:01:35.523639+02:00 2022-12-02 17:51:22.614046+01:00 \n",
|
|
|
|
|
"953 2021-06-18 10:30:01.259578+02:00 2021-09-24 11:56:09.082785+02:00 \n",
|
|
|
|
|
"954 2022-09-29 18:01:33.834090+02:00 2022-12-02 17:51:23.258324+01:00 \n",
|
|
|
|
|
"955 2021-07-19 14:07:16.177390+02:00 2021-09-24 11:56:09.086101+02:00 \n",
|
|
|
|
|
"956 2021-06-08 17:38:54.041310+02:00 2021-09-24 11:56:09.089394+02:00 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" process_id report_url category to_be_synced \\\n",
|
|
|
|
|
"0 NaN NaN 0.0 False \n",
|
|
|
|
|
"1 NaN NaN 0.0 False \n",
|
|
|
|
|
"2 NaN NaN 0.0 False \n",
|
|
|
|
|
"3 NaN NaN 0.0 False \n",
|
|
|
|
|
"4 NaN NaN 0.0 False \n",
|
|
|
|
|
".. ... ... ... ... \n",
|
|
|
|
|
"952 NaN NaN 0.0 False \n",
|
|
|
|
|
"953 NaN NaN 0.0 False \n",
|
|
|
|
|
"954 NaN NaN 0.0 False \n",
|
|
|
|
|
"955 NaN NaN 0.0 False \n",
|
|
|
|
|
"956 NaN NaN 0.0 False \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" identifier sent_at \n",
|
|
|
|
|
"0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n",
|
|
|
|
|
"1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n",
|
|
|
|
|
"2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n",
|
|
|
|
|
"3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n",
|
|
|
|
|
"4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 \n",
|
|
|
|
|
".. ... ... \n",
|
|
|
|
|
"952 7504adad8bb96320eb3afdd4df6e1f60 2022-05-26 00:00:00+02:00 \n",
|
|
|
|
|
"953 cedebb6e872f539bef8c3f919874e9d7 2020-07-27 00:00:00+02:00 \n",
|
|
|
|
|
"954 9908279ebbf1f9b250ba689db6a0222b 2022-09-29 00:00:00+02:00 \n",
|
|
|
|
|
"955 06eb61b839a0cefee4967c67ccb099dc 2020-12-23 00:00:00+01:00 \n",
|
|
|
|
|
"956 9461cce28ebe3e76fb4b931c35a169b0 2021-06-08 00:00:00+02:00 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[957 rows x 11 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 37,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_campaigns"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 38,
|
|
|
|
|
"id": "c19b321f-65f9-4d6c-8c1f-edb2eb9d70e7",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>id</th>\n",
|
|
|
|
|
" <th>clicked_at</th>\n",
|
|
|
|
|
" <th>link_id</th>\n",
|
|
|
|
|
" <th>customer_id</th>\n",
|
|
|
|
|
" <th>created_at</th>\n",
|
|
|
|
|
" <th>updated_at</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>2021-03-26 16:30:36+01:00</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>284033</td>\n",
|
|
|
|
|
" <td>2021-03-26 15:30:37.050161+01:00</td>\n",
|
|
|
|
|
" <td>2021-03-26 15:30:37.050161+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>2021-03-26 17:16:34+01:00</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>119768</td>\n",
|
|
|
|
|
" <td>2021-03-26 16:16:34.950871+01:00</td>\n",
|
|
|
|
|
" <td>2021-03-26 16:16:34.950871+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>272</td>\n",
|
|
|
|
|
" <td>2021-03-28 20:03:32+02:00</td>\n",
|
|
|
|
|
" <td>42</td>\n",
|
|
|
|
|
" <td>113105</td>\n",
|
|
|
|
|
" <td>2021-03-28 18:03:32.736394+02:00</td>\n",
|
|
|
|
|
" <td>2021-03-28 18:03:32.736394+02:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" <td>2021-03-26 17:43:19+01:00</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>272280</td>\n",
|
|
|
|
|
" <td>2021-03-26 16:43:19.338321+01:00</td>\n",
|
|
|
|
|
" <td>2021-03-26 16:43:19.338321+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>2021-03-26 17:46:00+01:00</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>105095</td>\n",
|
|
|
|
|
" <td>2021-03-26 16:46:00.502945+01:00</td>\n",
|
|
|
|
|
" <td>2021-03-26 16:46:00.502945+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>151046</th>\n",
|
|
|
|
|
" <td>243553</td>\n",
|
|
|
|
|
" <td>2023-11-09 16:34:27+01:00</td>\n",
|
|
|
|
|
" <td>14666</td>\n",
|
|
|
|
|
" <td>998</td>\n",
|
|
|
|
|
" <td>2023-11-09 15:34:29.425425+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 15:34:29.425425+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>151047</th>\n",
|
|
|
|
|
" <td>243554</td>\n",
|
|
|
|
|
" <td>2023-11-09 16:34:35+01:00</td>\n",
|
|
|
|
|
" <td>14670</td>\n",
|
|
|
|
|
" <td>998</td>\n",
|
|
|
|
|
" <td>2023-11-09 15:34:37.505505+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 15:34:37.505505+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>151048</th>\n",
|
|
|
|
|
" <td>243559</td>\n",
|
|
|
|
|
" <td>2023-11-09 16:51:15+01:00</td>\n",
|
|
|
|
|
" <td>14686</td>\n",
|
|
|
|
|
" <td>82923</td>\n",
|
|
|
|
|
" <td>2023-11-09 15:51:17.439518+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 15:51:17.439518+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>151049</th>\n",
|
|
|
|
|
" <td>243561</td>\n",
|
|
|
|
|
" <td>2023-11-09 16:59:42+01:00</td>\n",
|
|
|
|
|
" <td>14677</td>\n",
|
|
|
|
|
" <td>82923</td>\n",
|
|
|
|
|
" <td>2023-11-09 15:59:44.030922+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 15:59:44.030922+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>151050</th>\n",
|
|
|
|
|
" <td>243564</td>\n",
|
|
|
|
|
" <td>2023-11-09 17:16:41+01:00</td>\n",
|
|
|
|
|
" <td>14691</td>\n",
|
|
|
|
|
" <td>1254355</td>\n",
|
|
|
|
|
" <td>2023-11-09 16:16:43.012932+01:00</td>\n",
|
|
|
|
|
" <td>2023-11-09 16:16:43.012932+01:00</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>151051 rows × 6 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" id clicked_at link_id customer_id \\\n",
|
|
|
|
|
"0 1 2021-03-26 16:30:36+01:00 1 284033 \n",
|
|
|
|
|
"1 2 2021-03-26 17:16:34+01:00 2 119768 \n",
|
|
|
|
|
"2 272 2021-03-28 20:03:32+02:00 42 113105 \n",
|
|
|
|
|
"3 4 2021-03-26 17:43:19+01:00 3 272280 \n",
|
|
|
|
|
"4 5 2021-03-26 17:46:00+01:00 3 105095 \n",
|
|
|
|
|
"... ... ... ... ... \n",
|
|
|
|
|
"151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n",
|
|
|
|
|
"151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n",
|
|
|
|
|
"151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n",
|
|
|
|
|
"151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n",
|
|
|
|
|
"151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" created_at updated_at \n",
|
|
|
|
|
"0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n",
|
|
|
|
|
"1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n",
|
|
|
|
|
"2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n",
|
|
|
|
|
"3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n",
|
|
|
|
|
"4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n",
|
|
|
|
|
"... ... ... \n",
|
|
|
|
|
"151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n",
|
|
|
|
|
"151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n",
|
|
|
|
|
"151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n",
|
|
|
|
|
"151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n",
|
|
|
|
|
"151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[151051 rows x 6 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 38,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_link_stats"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "96ea2523-38dc-47ef-a49e-2c2d9ad0b1c6",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Exploration variables"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 28,
|
|
|
|
|
"id": "aaa41688-ea7e-4dba-851c-1f0b0ec43c71",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Fonction d'exploration pour suppliers.csv = label itr et commission inconnues\n",
|
|
|
|
|
"def suppliers_exploration(suppliers = None) : \n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Taux de NaN pour ces colonnes\n",
|
|
|
|
|
" label_na = suppliers['label'].isna().sum()/len(suppliers)*100\n",
|
|
|
|
|
" itr_na = suppliers['itr'].isna().sum()/len(suppliers)*100\n",
|
|
|
|
|
" commission_na = suppliers['commission'].isna().sum()/len(suppliers)*100\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" suppliers_desc = pd.DataFrame({'nb_suppliers' : [suppliers['name'].nunique()],\n",
|
|
|
|
|
" 'label_na' : [label_na],\n",
|
|
|
|
|
" 'itr_na' : [itr_na],\n",
|
|
|
|
|
" 'commission_na' : [commission_na]})\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" return suppliers_desc"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 29,
|
|
|
|
|
"id": "2fecc2e1-113f-46ed-9065-0b9ee416166e",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_suppliers_desc = suppliers_exploration(suppliers = df1_suppliers)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 30,
|
|
|
|
|
"id": "55f6170a-36fb-4efb-9810-f982883660cf",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
2024-01-14 17:38:16 +01:00
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>nb_suppliers</th>\n",
|
|
|
|
|
" <th>label_na</th>\n",
|
|
|
|
|
" <th>itr_na</th>\n",
|
|
|
|
|
" <th>commission_na</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>9</td>\n",
|
|
|
|
|
" <td>100.0</td>\n",
|
|
|
|
|
" <td>100.0</td>\n",
|
|
|
|
|
" <td>100.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" nb_suppliers label_na itr_na commission_na\n",
|
|
|
|
|
"0 9 100.0 100.0 100.0"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"execution_count": 30,
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df1_suppliers_desc"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"execution_count": 31,
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"id": "0030fd02-09e3-42f5-9c83-290458a38c29",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"BUCKET = \"bdc2324-data\"\n",
|
|
|
|
|
"liste_folders = fs.ls(BUCKET)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"liste_files = []\n",
|
|
|
|
|
"for company_folder in liste_folders : \n",
|
|
|
|
|
" liste_files.extend(fs.ls(company_folder))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"execution_count": 32,
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"id": "6b1736d1-8fd7-4fcc-9431-b8bf0c7b4f2b",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"['bdc2324-data/1/1suppliers.csv', 'bdc2324-data/10/10suppliers.csv', 'bdc2324-data/101/101suppliers.csv', 'bdc2324-data/11/11suppliers.csv', 'bdc2324-data/12/12suppliers.csv', 'bdc2324-data/13/13suppliers.csv', 'bdc2324-data/14/14suppliers.csv', 'bdc2324-data/2/2suppliers.csv', 'bdc2324-data/3/3suppliers.csv', 'bdc2324-data/4/4suppliers.csv', 'bdc2324-data/5/5suppliers.csv', 'bdc2324-data/6/6suppliers.csv', 'bdc2324-data/7/7suppliers.csv', 'bdc2324-data/8/8suppliers.csv', 'bdc2324-data/9/9suppliers.csv']\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"liste_database_select = ['suppliers']\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
|
|
|
|
|
"liste_suppliers = [element for element in liste_files if any(element_part in element for element_part in liste_database_select)]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Afficher le résultat\n",
|
|
|
|
|
"print(liste_suppliers)"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-01-13 14:47:24 +01:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-02-04 16:02:01 +01:00
|
|
|
|
"execution_count": 33,
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"id": "226b694b-0b00-4167-b69f-3178902254eb",
|
2024-01-13 14:47:24 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
2024-01-14 17:38:16 +01:00
|
|
|
|
"source": [
|
|
|
|
|
"# loop to create dataframes from file 2\n",
|
|
|
|
|
"def database_loading(database_name = None):\n",
|
|
|
|
|
" files_path = database_name\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" client_number = files_path[0].split(\"/\")[1]\n",
|
|
|
|
|
" df_prefix = \"df\" + str(client_number) + \"_\"\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" for i in range(len(files_path)) :\n",
|
|
|
|
|
" current_path = files_path[i]\n",
|
|
|
|
|
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
|
|
|
|
|
" df = pd.read_csv(file_in)\n",
|
|
|
|
|
" # the pattern of the name is df1xxx\n",
|
|
|
|
|
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
|
|
|
|
|
" globals()[nom_dataframe] = df\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" "
|
|
|
|
|
]
|
2024-01-10 19:19:51 +01:00
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.10.13"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
}
|