2881 lines
106 KiB
Plaintext
2881 lines
106 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c4205b5d-e052-4863-a46b-20e4757052a7",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Business Data Challenge - Team 1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "ae3af8e6-ced8-4994-8877-fa98d4297cc0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "dd3184e7-54a1-4463-af42-5850d9517a41",
|
||
"metadata": {},
|
||
"source": [
|
||
"Configuration de l'accès aux données"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "b6035982-9ff4-4013-9792-2d50e10db3d1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['bdc2324-data/1/1campaign_stats.csv',\n",
|
||
" 'bdc2324-data/1/1campaigns.csv',\n",
|
||
" 'bdc2324-data/1/1categories.csv',\n",
|
||
" 'bdc2324-data/1/1countries.csv',\n",
|
||
" 'bdc2324-data/1/1currencies.csv',\n",
|
||
" 'bdc2324-data/1/1customer_target_mappings.csv',\n",
|
||
" 'bdc2324-data/1/1customersplus.csv',\n",
|
||
" 'bdc2324-data/1/1event_types.csv',\n",
|
||
" 'bdc2324-data/1/1events.csv',\n",
|
||
" 'bdc2324-data/1/1facilities.csv',\n",
|
||
" 'bdc2324-data/1/1link_stats.csv',\n",
|
||
" 'bdc2324-data/1/1pricing_formulas.csv',\n",
|
||
" 'bdc2324-data/1/1product_packs.csv',\n",
|
||
" 'bdc2324-data/1/1products.csv',\n",
|
||
" 'bdc2324-data/1/1products_groups.csv',\n",
|
||
" 'bdc2324-data/1/1purchases.csv',\n",
|
||
" 'bdc2324-data/1/1representation_category_capacities.csv',\n",
|
||
" 'bdc2324-data/1/1representations.csv',\n",
|
||
" 'bdc2324-data/1/1seasons.csv',\n",
|
||
" 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
|
||
" 'bdc2324-data/1/1suppliers.csv',\n",
|
||
" 'bdc2324-data/1/1tags.csv',\n",
|
||
" 'bdc2324-data/1/1target_types.csv',\n",
|
||
" 'bdc2324-data/1/1targets.csv',\n",
|
||
" 'bdc2324-data/1/1tickets.csv',\n",
|
||
" 'bdc2324-data/1/1type_of_categories.csv',\n",
|
||
" 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
|
||
" 'bdc2324-data/1/1type_ofs.csv']"
|
||
]
|
||
},
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import os\n",
|
||
"import s3fs\n",
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||
"\n",
|
||
"BUCKET = \"bdc2324-data/1\"\n",
|
||
"fs.ls(BUCKET)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "b86c935d-124f-453f-80dd-83ea6770d09c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"dic_base=['campaign_stats','campaigns','categories','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets','type_of_categories','type_of_pricing_formulas','type_ofs']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "f6d0b27c-0ecd-406b-b042-6c3802dd68fd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_425/1008972637.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"dic_base=['campaign_stats','campaigns','categories','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets','type_of_categories','type_of_pricing_formulas','type_ofs']\n",
|
||
"for nom_base in dic_base:\n",
|
||
" FILE_PATH_S3_fanta = 'bdc2324-data/1/1' + nom_base + '.csv'\n",
|
||
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
|
||
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "2a6b5e22-3370-457f-83b7-dd1e13663229",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'bdc2324-data/1/1type_ofs.csv'"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"FILE_PATH_S3_fanta"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "79012186-ea51-4252-843e-36a9bbe3847e",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Analyse exploratoire "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "1a365f29-4766-47d8-9796-24a5271867b2",
|
||
"metadata": {},
|
||
"source": [
|
||
"## I. Base type_of_pricing_formulas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "bcc14f93-2289-44eb-816b-a51049b258df",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Detection des valeur manquantes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "raw",
|
||
"id": "ab2ec4c4-9d38-4aeb-8202-9116df3cdd66",
|
||
"metadata": {},
|
||
"source": [
|
||
"dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "88759b4a-2633-478d-abce-29abeac376d1",
|
||
"metadata": {},
|
||
"source": [
|
||
"def verifier_donnees_manquantes(base):\n",
|
||
" donnees_manquantes = base.isna().sum()\n",
|
||
" print(\"Données manquantes pour la base :\")\n",
|
||
" print(donnees_manquantes)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "df3075b4-1490-4cf2-a3fe-c6d4e2144ae3",
|
||
"metadata": {},
|
||
"source": [
|
||
"for nom_base in dic_prod_princing:\n",
|
||
" verifier_donnees_manquantes(nom_base)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "e0c67c01-e837-4772-b070-d1be0d895a36",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id 0\n",
|
||
"type_of_id 0\n",
|
||
"pricing_formula_id 0\n",
|
||
"created_at 0\n",
|
||
"updated_at 0\n",
|
||
"identifier 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#detection des Nan d\n",
|
||
"\n",
|
||
"type_of_pricing_formulas.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "83a6a48d-effe-4537-b4bb-d5a540b610f1",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#variable retenu:[[\"id\",\"type_of_id\",\"pricing_formula_id\"]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "3eaffaa6-1164-4ee9-a671-8b5eb3df797d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>127</td>\n",
|
||
" <td>2021-01-05 11:55:51.226960+01:00</td>\n",
|
||
" <td>2021-01-05 11:55:51.226960+01:00</td>\n",
|
||
" <td>cf2918b25e6dcf8c30798ca05c8ec8ed</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2425</td>\n",
|
||
" <td>2021-01-05 11:55:51.235606+01:00</td>\n",
|
||
" <td>2021-01-05 11:55:51.235606+01:00</td>\n",
|
||
" <td>2c8ee3f7c1487d792b6c946314e681f2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2937</td>\n",
|
||
" <td>2021-01-05 11:55:51.240114+01:00</td>\n",
|
||
" <td>2021-01-05 11:55:51.240114+01:00</td>\n",
|
||
" <td>44e55c85e4eb59b3c3c01c137a6b25fc</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>48</td>\n",
|
||
" <td>2021-01-05 11:55:51.244638+01:00</td>\n",
|
||
" <td>2021-01-05 11:55:51.244638+01:00</td>\n",
|
||
" <td>ee3bb93b7e2217cd86a49d547fedf6c6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>2021-01-05 11:55:51.249409+01:00</td>\n",
|
||
" <td>2021-01-05 11:55:51.249409+01:00</td>\n",
|
||
" <td>ae701668574f1a653d2b21ddfd250620</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>563</th>\n",
|
||
" <td>564</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>6656</td>\n",
|
||
" <td>2022-02-18 16:15:58.872249+01:00</td>\n",
|
||
" <td>2022-02-18 16:15:58.872249+01:00</td>\n",
|
||
" <td>f669824cdca9de9697f07ff3ba365a8d</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>564</th>\n",
|
||
" <td>565</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>6607</td>\n",
|
||
" <td>2022-02-18 16:15:59.231018+01:00</td>\n",
|
||
" <td>2022-02-18 16:15:59.231018+01:00</td>\n",
|
||
" <td>6421c8146a598758139153b0e7b921ea</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>565</th>\n",
|
||
" <td>566</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>6700</td>\n",
|
||
" <td>2022-02-18 16:15:59.724812+01:00</td>\n",
|
||
" <td>2022-02-18 16:15:59.724812+01:00</td>\n",
|
||
" <td>6823f6d4d80b322fbfb8b83545a9f96d</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>566</th>\n",
|
||
" <td>567</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>8118</td>\n",
|
||
" <td>2022-02-18 16:16:00.163381+01:00</td>\n",
|
||
" <td>2022-02-18 16:16:00.163381+01:00</td>\n",
|
||
" <td>35cfc12584b4d1b94795d97fd0aa56e8</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>567</th>\n",
|
||
" <td>569</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>48157</td>\n",
|
||
" <td>2023-03-13 11:30:29.480161+01:00</td>\n",
|
||
" <td>2023-03-13 11:30:29.480161+01:00</td>\n",
|
||
" <td>55863541f33fd229ac9b54d9ec1f4874</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>568 rows × 6 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id type_of_id pricing_formula_id created_at \\\n",
|
||
"0 1 1 127 2021-01-05 11:55:51.226960+01:00 \n",
|
||
"1 2 1 2425 2021-01-05 11:55:51.235606+01:00 \n",
|
||
"2 3 1 2937 2021-01-05 11:55:51.240114+01:00 \n",
|
||
"3 4 1 48 2021-01-05 11:55:51.244638+01:00 \n",
|
||
"4 5 1 7 2021-01-05 11:55:51.249409+01:00 \n",
|
||
".. ... ... ... ... \n",
|
||
"563 564 4 6656 2022-02-18 16:15:58.872249+01:00 \n",
|
||
"564 565 4 6607 2022-02-18 16:15:59.231018+01:00 \n",
|
||
"565 566 4 6700 2022-02-18 16:15:59.724812+01:00 \n",
|
||
"566 567 4 8118 2022-02-18 16:16:00.163381+01:00 \n",
|
||
"567 569 7 48157 2023-03-13 11:30:29.480161+01:00 \n",
|
||
"\n",
|
||
" updated_at identifier \n",
|
||
"0 2021-01-05 11:55:51.226960+01:00 cf2918b25e6dcf8c30798ca05c8ec8ed \n",
|
||
"1 2021-01-05 11:55:51.235606+01:00 2c8ee3f7c1487d792b6c946314e681f2 \n",
|
||
"2 2021-01-05 11:55:51.240114+01:00 44e55c85e4eb59b3c3c01c137a6b25fc \n",
|
||
"3 2021-01-05 11:55:51.244638+01:00 ee3bb93b7e2217cd86a49d547fedf6c6 \n",
|
||
"4 2021-01-05 11:55:51.249409+01:00 ae701668574f1a653d2b21ddfd250620 \n",
|
||
".. ... ... \n",
|
||
"563 2022-02-18 16:15:58.872249+01:00 f669824cdca9de9697f07ff3ba365a8d \n",
|
||
"564 2022-02-18 16:15:59.231018+01:00 6421c8146a598758139153b0e7b921ea \n",
|
||
"565 2022-02-18 16:15:59.724812+01:00 6823f6d4d80b322fbfb8b83545a9f96d \n",
|
||
"566 2022-02-18 16:16:00.163381+01:00 35cfc12584b4d1b94795d97fd0aa56e8 \n",
|
||
"567 2023-03-13 11:30:29.480161+01:00 55863541f33fd229ac9b54d9ec1f4874 \n",
|
||
"\n",
|
||
"[568 rows x 6 columns]"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"type_of_pricing_formulas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "57298669-8d55-40d5-a5aa-4c5df984eec7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"type_of_id int64\n",
|
||
"pricing_formula_id int64\n",
|
||
"created_at object\n",
|
||
"updated_at object\n",
|
||
"identifier object\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#type des variables\n",
|
||
"\n",
|
||
"type_of_pricing_formulas.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "c11850cb-8833-44c0-a11d-9695d620a42b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [id, type_of_id, pricing_formula_id, created_at, updated_at, identifier]\n",
|
||
"Index: []"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#Identification des doublons\n",
|
||
"type_of_pricing_formulas.loc[type_of_pricing_formulas['id'].duplicated(keep=False),:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "7a40de03-5e18-4d3d-a0f8-da960c29fad8",
|
||
"metadata": {},
|
||
"source": [
|
||
"## II.products_groups"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "89909175-6734-4e8e-8632-d6f8ca812388",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id 0\n",
|
||
"percent_price 0\n",
|
||
"max_price 0\n",
|
||
"min_price 0\n",
|
||
"category_id 0\n",
|
||
"pricing_formula_id 0\n",
|
||
"representation_id 0\n",
|
||
"created_at 0\n",
|
||
"updated_at 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#detection des Nan \n",
|
||
"\n",
|
||
"products_groups.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e0518684-c83c-4f0a-89ea-d7dcfd60051d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#variable retenu:[[\"id\",\"percent_price\",\"max_price\",\"min_price\",\"category_id\",\"pricing_formula_id\",\"representation_id\"]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "6a187170-96c4-48d2-9568-b270f67e2c27",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"percent_price float64\n",
|
||
"max_price float64\n",
|
||
"min_price float64\n",
|
||
"category_id int64\n",
|
||
"pricing_formula_id int64\n",
|
||
"representation_id int64\n",
|
||
"created_at object\n",
|
||
"updated_at object\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#type des variables\n",
|
||
"\n",
|
||
"products_groups.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "2fba2cb0-a6a4-43b2-a854-3be07939c28b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>percent_price</th>\n",
|
||
" <th>max_price</th>\n",
|
||
" <th>min_price</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [id, percent_price, max_price, min_price, category_id, pricing_formula_id, representation_id, created_at, updated_at]\n",
|
||
"Index: []"
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#Identification des doublons\n",
|
||
"products_groups.loc[products_groups[['id','pricing_formula_id','representation_id']].duplicated(keep=False),:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "5312ac13-8fbd-4c3f-a98a-8c28f079a599",
|
||
"metadata": {},
|
||
"source": [
|
||
"## III.pricing_formulas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "3383a773-0817-4b23-84e7-8d5d0c74b179",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>41909</td>\n",
|
||
" <td>visite mécènes 1h30</td>\n",
|
||
" <td>2022-07-08 07:08:26.802266+02:00</td>\n",
|
||
" <td>2022-07-08 07:08:26.802266+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>21d4b0043c12b21952b0797d140991a1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>502</td>\n",
|
||
" <td>entree mucem tp( expo picasso)</td>\n",
|
||
" <td>2020-09-03 13:43:59.816765+02:00</td>\n",
|
||
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>223b09e6c3f1f75dbf8df019af97a555</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>504</td>\n",
|
||
" <td>nombre de personnes cinema</td>\n",
|
||
" <td>2020-09-03 13:43:59.818198+02:00</td>\n",
|
||
" <td>2021-01-25 19:16:05.187114+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>ba33b7b6d225a75d713a356b49c4d915</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>117</td>\n",
|
||
" <td>spectacle tarif e famille tr</td>\n",
|
||
" <td>2020-09-03 13:21:21.400249+02:00</td>\n",
|
||
" <td>2023-03-13 11:30:29.525335+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>a00b61ad933518856f86e63ca91a5750</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1496</td>\n",
|
||
" <td>billet nb famille mecene 1a</td>\n",
|
||
" <td>2020-09-03 14:29:33.320952+02:00</td>\n",
|
||
" <td>2021-01-25 19:23:06.816402+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>7f6013803c242253a5ccde80f780984f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>551</th>\n",
|
||
" <td>529</td>\n",
|
||
" <td>billet nb expo gr</td>\n",
|
||
" <td>2020-09-03 13:43:59.835944+02:00</td>\n",
|
||
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>7d888e42abe101fc8b21dc88948c8b74</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>552</th>\n",
|
||
" <td>3153</td>\n",
|
||
" <td>nb pers visite scolaire rep</td>\n",
|
||
" <td>2020-09-03 16:32:37.068864+02:00</td>\n",
|
||
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3cf21731c25eee650d5b232ee4780563</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>553</th>\n",
|
||
" <td>5847</td>\n",
|
||
" <td>visite scolaire rep1h00</td>\n",
|
||
" <td>2021-06-09 18:10:49.742531+02:00</td>\n",
|
||
" <td>2022-02-18 15:55:03.576236+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>a7bb5a6892d55f0d5ee4ce5786ae5fc6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>554</th>\n",
|
||
" <td>5840</td>\n",
|
||
" <td>france billet - entree ts</td>\n",
|
||
" <td>2021-06-09 18:10:49.737576+02:00</td>\n",
|
||
" <td>2022-02-18 16:16:00.199543+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4c53016fc65847646f600eff853593e5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>555</th>\n",
|
||
" <td>5863</td>\n",
|
||
" <td>france billet - entree tp</td>\n",
|
||
" <td>2021-06-09 18:12:49.269924+02:00</td>\n",
|
||
" <td>2022-02-18 16:16:00.199543+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>90e642c0e1ef6bc9f2bc43089798de00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>556 rows × 6 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 41909 visite mécènes 1h30 2022-07-08 07:08:26.802266+02:00 \n",
|
||
"1 502 entree mucem tp( expo picasso) 2020-09-03 13:43:59.816765+02:00 \n",
|
||
"2 504 nombre de personnes cinema 2020-09-03 13:43:59.818198+02:00 \n",
|
||
"3 117 spectacle tarif e famille tr 2020-09-03 13:21:21.400249+02:00 \n",
|
||
"4 1496 billet nb famille mecene 1a 2020-09-03 14:29:33.320952+02:00 \n",
|
||
".. ... ... ... \n",
|
||
"551 529 billet nb expo gr 2020-09-03 13:43:59.835944+02:00 \n",
|
||
"552 3153 nb pers visite scolaire rep 2020-09-03 16:32:37.068864+02:00 \n",
|
||
"553 5847 visite scolaire rep1h00 2021-06-09 18:10:49.742531+02:00 \n",
|
||
"554 5840 france billet - entree ts 2021-06-09 18:10:49.737576+02:00 \n",
|
||
"555 5863 france billet - entree tp 2021-06-09 18:12:49.269924+02:00 \n",
|
||
"\n",
|
||
" updated_at extra_field \\\n",
|
||
"0 2022-07-08 07:08:26.802266+02:00 NaN \n",
|
||
"1 2022-02-18 15:57:55.792581+01:00 NaN \n",
|
||
"2 2021-01-25 19:16:05.187114+01:00 NaN \n",
|
||
"3 2023-03-13 11:30:29.525335+01:00 NaN \n",
|
||
"4 2021-01-25 19:23:06.816402+01:00 NaN \n",
|
||
".. ... ... \n",
|
||
"551 2022-02-18 15:57:55.792581+01:00 NaN \n",
|
||
"552 2022-02-18 15:57:55.792581+01:00 NaN \n",
|
||
"553 2022-02-18 15:55:03.576236+01:00 NaN \n",
|
||
"554 2022-02-18 16:16:00.199543+01:00 NaN \n",
|
||
"555 2022-02-18 16:16:00.199543+01:00 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 21d4b0043c12b21952b0797d140991a1 \n",
|
||
"1 223b09e6c3f1f75dbf8df019af97a555 \n",
|
||
"2 ba33b7b6d225a75d713a356b49c4d915 \n",
|
||
"3 a00b61ad933518856f86e63ca91a5750 \n",
|
||
"4 7f6013803c242253a5ccde80f780984f \n",
|
||
".. ... \n",
|
||
"551 7d888e42abe101fc8b21dc88948c8b74 \n",
|
||
"552 3cf21731c25eee650d5b232ee4780563 \n",
|
||
"553 a7bb5a6892d55f0d5ee4ce5786ae5fc6 \n",
|
||
"554 4c53016fc65847646f600eff853593e5 \n",
|
||
"555 90e642c0e1ef6bc9f2bc43089798de00 \n",
|
||
"\n",
|
||
"[556 rows x 6 columns]"
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pricing_formulas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "d8130c73-6c5f-45b1-93ae-db7679c8ca56",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id 0.0\n",
|
||
"name 0.0\n",
|
||
"created_at 0.0\n",
|
||
"updated_at 0.0\n",
|
||
"extra_field 1.0\n",
|
||
"identifier 0.0\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#detection des Nan \n",
|
||
"\n",
|
||
"pricing_formulas.isna().sum()/pricing_formulas.shape[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "9f2909c1-bc6a-443f-a077-84f6ce6b7ab5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#variable retenu: [[\"id\",\"name\"]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "44f1dbfd-c3cf-464b-9877-f37fcc61da92",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"name object\n",
|
||
"created_at object\n",
|
||
"updated_at object\n",
|
||
"extra_field float64\n",
|
||
"identifier object\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#type des variables\n",
|
||
"\n",
|
||
"pricing_formulas.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "6784b41b-da74-4fae-832e-16641ae710c1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [id, name, created_at, updated_at, extra_field, identifier]\n",
|
||
"Index: []"
|
||
]
|
||
},
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#Identification des doublons\n",
|
||
"pricing_formulas.loc[pricing_formulas[['id']].duplicated(keep=False),:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "2145b0a4-b73d-4530-8c12-a78b1cf86eae",
|
||
"metadata": {},
|
||
"source": [
|
||
"## IV. product_packs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"id": "e36b07a7-4f0b-4711-86a0-12a1d8158eef",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id 0.0\n",
|
||
"name 1.0\n",
|
||
"type_of 0.0\n",
|
||
"created_at 0.0\n",
|
||
"updated_at 0.0\n",
|
||
"identifier 0.0\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#detection des Nan \n",
|
||
"\n",
|
||
"product_packs.isna().sum()/product_packs.shape[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e0887a01-51ea-4034-84fe-dc4dbf2ad949",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#variable retenu:[[\"id\",\"name\"]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"id": "8707396a-f86b-476d-a9f9-c39f8de1d02e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"name float64\n",
|
||
"type_of int64\n",
|
||
"created_at object\n",
|
||
"updated_at object\n",
|
||
"identifier object\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#type des variables\n",
|
||
"\n",
|
||
"product_packs.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"id": "4b102bd3-924b-43da-8915-be7664c23f97",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>type_of</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [id, name, type_of, created_at, updated_at, identifier]\n",
|
||
"Index: []"
|
||
]
|
||
},
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#Identification des doublons\n",
|
||
"product_packs.loc[product_packs[['id']].duplicated(keep=False),:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "cfe0c525-896b-4731-b38e-306ff6ea0c65",
|
||
"metadata": {},
|
||
"source": [
|
||
"## V.products"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "968beb24-f70c-4eb6-8b1e-4b04bc7fe9c9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id 0.0\n",
|
||
"amount 0.0\n",
|
||
"is_full_price 0.0\n",
|
||
"representation_id 0.0\n",
|
||
"pricing_formula_id 0.0\n",
|
||
"created_at 0.0\n",
|
||
"updated_at 0.0\n",
|
||
"category_id 0.0\n",
|
||
"apply_price 0.0\n",
|
||
"products_group_id 0.0\n",
|
||
"product_pack_id 0.0\n",
|
||
"extra_field 1.0\n",
|
||
"amount_consumption 1.0\n",
|
||
"identifier 0.0\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#detection des Nan \n",
|
||
"\n",
|
||
"products.isna().sum()/products.shape[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "15bc6ac6-67e8-4e2c-9641-7ee8bb2581a3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"amount float64\n",
|
||
"is_full_price bool\n",
|
||
"representation_id int64\n",
|
||
"pricing_formula_id int64\n",
|
||
"created_at object\n",
|
||
"updated_at object\n",
|
||
"category_id int64\n",
|
||
"apply_price float64\n",
|
||
"products_group_id int64\n",
|
||
"product_pack_id int64\n",
|
||
"extra_field float64\n",
|
||
"amount_consumption float64\n",
|
||
"identifier object\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#type des variables\n",
|
||
"\n",
|
||
"products.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "7daa4f1a-e429-4daf-a2e1-1e311b487e09",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"id": "dc12b746-6708-4708-826a-acb5a8e665a1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>41909</td>\n",
|
||
" <td>visite mécènes 1h30</td>\n",
|
||
" <td>2022-07-08 07:08:26.802266+02:00</td>\n",
|
||
" <td>2022-07-08 07:08:26.802266+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>21d4b0043c12b21952b0797d140991a1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>502</td>\n",
|
||
" <td>entree mucem tp( expo picasso)</td>\n",
|
||
" <td>2020-09-03 13:43:59.816765+02:00</td>\n",
|
||
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>223b09e6c3f1f75dbf8df019af97a555</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>504</td>\n",
|
||
" <td>nombre de personnes cinema</td>\n",
|
||
" <td>2020-09-03 13:43:59.818198+02:00</td>\n",
|
||
" <td>2021-01-25 19:16:05.187114+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>ba33b7b6d225a75d713a356b49c4d915</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>117</td>\n",
|
||
" <td>spectacle tarif e famille tr</td>\n",
|
||
" <td>2020-09-03 13:21:21.400249+02:00</td>\n",
|
||
" <td>2023-03-13 11:30:29.525335+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>a00b61ad933518856f86e63ca91a5750</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1496</td>\n",
|
||
" <td>billet nb famille mecene 1a</td>\n",
|
||
" <td>2020-09-03 14:29:33.320952+02:00</td>\n",
|
||
" <td>2021-01-25 19:23:06.816402+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>7f6013803c242253a5ccde80f780984f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>551</th>\n",
|
||
" <td>529</td>\n",
|
||
" <td>billet nb expo gr</td>\n",
|
||
" <td>2020-09-03 13:43:59.835944+02:00</td>\n",
|
||
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>7d888e42abe101fc8b21dc88948c8b74</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>552</th>\n",
|
||
" <td>3153</td>\n",
|
||
" <td>nb pers visite scolaire rep</td>\n",
|
||
" <td>2020-09-03 16:32:37.068864+02:00</td>\n",
|
||
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3cf21731c25eee650d5b232ee4780563</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>553</th>\n",
|
||
" <td>5847</td>\n",
|
||
" <td>visite scolaire rep1h00</td>\n",
|
||
" <td>2021-06-09 18:10:49.742531+02:00</td>\n",
|
||
" <td>2022-02-18 15:55:03.576236+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>a7bb5a6892d55f0d5ee4ce5786ae5fc6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>554</th>\n",
|
||
" <td>5840</td>\n",
|
||
" <td>france billet - entree ts</td>\n",
|
||
" <td>2021-06-09 18:10:49.737576+02:00</td>\n",
|
||
" <td>2022-02-18 16:16:00.199543+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4c53016fc65847646f600eff853593e5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>555</th>\n",
|
||
" <td>5863</td>\n",
|
||
" <td>france billet - entree tp</td>\n",
|
||
" <td>2021-06-09 18:12:49.269924+02:00</td>\n",
|
||
" <td>2022-02-18 16:16:00.199543+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>90e642c0e1ef6bc9f2bc43089798de00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>556 rows × 6 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 41909 visite mécènes 1h30 2022-07-08 07:08:26.802266+02:00 \n",
|
||
"1 502 entree mucem tp( expo picasso) 2020-09-03 13:43:59.816765+02:00 \n",
|
||
"2 504 nombre de personnes cinema 2020-09-03 13:43:59.818198+02:00 \n",
|
||
"3 117 spectacle tarif e famille tr 2020-09-03 13:21:21.400249+02:00 \n",
|
||
"4 1496 billet nb famille mecene 1a 2020-09-03 14:29:33.320952+02:00 \n",
|
||
".. ... ... ... \n",
|
||
"551 529 billet nb expo gr 2020-09-03 13:43:59.835944+02:00 \n",
|
||
"552 3153 nb pers visite scolaire rep 2020-09-03 16:32:37.068864+02:00 \n",
|
||
"553 5847 visite scolaire rep1h00 2021-06-09 18:10:49.742531+02:00 \n",
|
||
"554 5840 france billet - entree ts 2021-06-09 18:10:49.737576+02:00 \n",
|
||
"555 5863 france billet - entree tp 2021-06-09 18:12:49.269924+02:00 \n",
|
||
"\n",
|
||
" updated_at extra_field \\\n",
|
||
"0 2022-07-08 07:08:26.802266+02:00 NaN \n",
|
||
"1 2022-02-18 15:57:55.792581+01:00 NaN \n",
|
||
"2 2021-01-25 19:16:05.187114+01:00 NaN \n",
|
||
"3 2023-03-13 11:30:29.525335+01:00 NaN \n",
|
||
"4 2021-01-25 19:23:06.816402+01:00 NaN \n",
|
||
".. ... ... \n",
|
||
"551 2022-02-18 15:57:55.792581+01:00 NaN \n",
|
||
"552 2022-02-18 15:57:55.792581+01:00 NaN \n",
|
||
"553 2022-02-18 15:55:03.576236+01:00 NaN \n",
|
||
"554 2022-02-18 16:16:00.199543+01:00 NaN \n",
|
||
"555 2022-02-18 16:16:00.199543+01:00 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 21d4b0043c12b21952b0797d140991a1 \n",
|
||
"1 223b09e6c3f1f75dbf8df019af97a555 \n",
|
||
"2 ba33b7b6d225a75d713a356b49c4d915 \n",
|
||
"3 a00b61ad933518856f86e63ca91a5750 \n",
|
||
"4 7f6013803c242253a5ccde80f780984f \n",
|
||
".. ... \n",
|
||
"551 7d888e42abe101fc8b21dc88948c8b74 \n",
|
||
"552 3cf21731c25eee650d5b232ee4780563 \n",
|
||
"553 a7bb5a6892d55f0d5ee4ce5786ae5fc6 \n",
|
||
"554 4c53016fc65847646f600eff853593e5 \n",
|
||
"555 90e642c0e1ef6bc9f2bc43089798de00 \n",
|
||
"\n",
|
||
"[556 rows x 6 columns]"
|
||
]
|
||
},
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pricing_formulas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "46aad10f-8530-410e-872b-bb253c553a46",
|
||
"metadata": {},
|
||
"source": [
|
||
"# jointure entre les bases"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "a4c3edd1-6d58-4c57-b3e4-0ef3529f6b8c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 74,
|
||
"id": "eac537e1-bbad-45bc-a85c-12b675da1088",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#Merge1 entre products et pricing_formulas\n",
|
||
"base1=products.merge(pricing_formulas, how='left', left_on= 'pricing_formula_id', right_on= 'id', suffixes = (\"_products\", \"_pricing_formula\"))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 78,
|
||
"id": "75be3a30-3114-432d-87d6-697533c3c871",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#Merge2 entre base1 et products_groups\n",
|
||
"base2=base1.merge(products_groups, how='left', left_on= 'id_pricing_formula', right_on= 'id', suffixes = (\"_merge2\", \"_product_group\"))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 81,
|
||
"id": "34a169c6-07a8-4ac3-a9e1-d7e7461f7310",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#Merge3 entre base2 et type_of_pricing_formulas\n",
|
||
"base3=base2.merge(type_of_pricing_formulas, how='left', left_on= 'id_pricing_formula', right_on= 'pricing_formula_id', suffixes = (\"_merge3\", \"_type_of_pricing_f\"))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 89,
|
||
"id": "f44f40d2-5304-4931-b7e6-fcc06b2657b6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#Merge4 entre base3 et type_of_pricing_formulas\n",
|
||
"df_product_pricing=base3.merge(product_packs, how='left', left_on= 'product_pack_id', right_on= 'id', suffixes = (\"_merge4\", \"_product_pack\"))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 90,
|
||
"id": "a28772c3-7bc1-46b4-acc8-1388dc60ec98",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id_products</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>representation_id_merge2</th>\n",
|
||
" <th>pricing_formula_id_merge2</th>\n",
|
||
" <th>created_at_products</th>\n",
|
||
" <th>updated_at_products</th>\n",
|
||
" <th>category_id_merge2</th>\n",
|
||
" <th>apply_price</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>created_at_type_of_pricing_f</th>\n",
|
||
" <th>updated_at_type_of_pricing_f</th>\n",
|
||
" <th>identifier_merge4</th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name_product_pack</th>\n",
|
||
" <th>type_of</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier_product_pack</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10682</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>914</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>2020-09-03 14:09:43.119798+02:00</td>\n",
|
||
" <td>2020-09-03 14:09:43.119798+02:00</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10655</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>114.0</td>\n",
|
||
" <td>2021-02-15 17:02:27.395376+01:00</td>\n",
|
||
" <td>2021-02-15 17:02:27.395376+01:00</td>\n",
|
||
" <td>3706121eb9f43b635bef1433c06f679c</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
|
||
" <td>a764b4bf13a360c7ac2a35ec4ca96c95</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478</td>\n",
|
||
" <td>9.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>273</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>2020-09-03 13:21:22.711773+02:00</td>\n",
|
||
" <td>2020-09-03 13:21:22.711773+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>471</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>131.0</td>\n",
|
||
" <td>2021-02-05 11:52:05.923905+01:00</td>\n",
|
||
" <td>2021-02-05 11:52:05.923905+01:00</td>\n",
|
||
" <td>0aceb248607671792298436004b95275</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
|
||
" <td>a764b4bf13a360c7ac2a35ec4ca96c95</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>20873</td>\n",
|
||
" <td>11.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>275</td>\n",
|
||
" <td>137</td>\n",
|
||
" <td>2020-09-03 14:46:33.589030+02:00</td>\n",
|
||
" <td>2020-09-03 14:46:33.589030+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>20825</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>137.0</td>\n",
|
||
" <td>2021-02-05 11:52:05.939898+01:00</td>\n",
|
||
" <td>2021-02-05 11:52:05.939898+01:00</td>\n",
|
||
" <td>93002d4637331edd81ffc28b6e8e89c0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
|
||
" <td>a764b4bf13a360c7ac2a35ec4ca96c95</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>157142</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2022-01-28 19:29:23.525722+01:00</td>\n",
|
||
" <td>2022-01-28 19:29:23.525722+01:00</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>2021-02-05 11:52:06.107939+01:00</td>\n",
|
||
" <td>2021-02-05 11:52:06.107939+01:00</td>\n",
|
||
" <td>7d0b25bdfff9f366da8be820608c8191</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
|
||
" <td>a764b4bf13a360c7ac2a35ec4ca96c95</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1341</td>\n",
|
||
" <td>8.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>2020-09-03 13:29:30.773089+02:00</td>\n",
|
||
" <td>2020-09-03 13:29:30.773089+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1175</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>93.0</td>\n",
|
||
" <td>2021-02-05 11:52:06.004162+01:00</td>\n",
|
||
" <td>2021-02-05 11:52:06.004162+01:00</td>\n",
|
||
" <td>1dbb0795e8f47cb75ba7cdb08c06be5f</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
|
||
" <td>a764b4bf13a360c7ac2a35ec4ca96c95</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 41 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id_products amount is_full_price representation_id_merge2 \\\n",
|
||
"0 10682 9.0 False 914 \n",
|
||
"1 478 9.5 False 273 \n",
|
||
"2 20873 11.5 False 275 \n",
|
||
"3 157142 8.0 False 82519 \n",
|
||
"4 1341 8.5 False 9 \n",
|
||
"\n",
|
||
" pricing_formula_id_merge2 created_at_products \\\n",
|
||
"0 114 2020-09-03 14:09:43.119798+02:00 \n",
|
||
"1 131 2020-09-03 13:21:22.711773+02:00 \n",
|
||
"2 137 2020-09-03 14:46:33.589030+02:00 \n",
|
||
"3 9 2022-01-28 19:29:23.525722+01:00 \n",
|
||
"4 93 2020-09-03 13:29:30.773089+02:00 \n",
|
||
"\n",
|
||
" updated_at_products category_id_merge2 apply_price \\\n",
|
||
"0 2020-09-03 14:09:43.119798+02:00 41 0.0 \n",
|
||
"1 2020-09-03 13:21:22.711773+02:00 1 0.0 \n",
|
||
"2 2020-09-03 14:46:33.589030+02:00 1 0.0 \n",
|
||
"3 2022-01-28 19:29:23.525722+01:00 5 0.0 \n",
|
||
"4 2020-09-03 13:29:30.773089+02:00 1 0.0 \n",
|
||
"\n",
|
||
" products_group_id ... pricing_formula_id \\\n",
|
||
"0 10655 ... 114.0 \n",
|
||
"1 471 ... 131.0 \n",
|
||
"2 20825 ... 137.0 \n",
|
||
"3 156773 ... 9.0 \n",
|
||
"4 1175 ... 93.0 \n",
|
||
"\n",
|
||
" created_at_type_of_pricing_f updated_at_type_of_pricing_f \\\n",
|
||
"0 2021-02-15 17:02:27.395376+01:00 2021-02-15 17:02:27.395376+01:00 \n",
|
||
"1 2021-02-05 11:52:05.923905+01:00 2021-02-05 11:52:05.923905+01:00 \n",
|
||
"2 2021-02-05 11:52:05.939898+01:00 2021-02-05 11:52:05.939898+01:00 \n",
|
||
"3 2021-02-05 11:52:06.107939+01:00 2021-02-05 11:52:06.107939+01:00 \n",
|
||
"4 2021-02-05 11:52:06.004162+01:00 2021-02-05 11:52:06.004162+01:00 \n",
|
||
"\n",
|
||
" identifier_merge4 id name_product_pack type_of \\\n",
|
||
"0 3706121eb9f43b635bef1433c06f679c 1 NaN 0 \n",
|
||
"1 0aceb248607671792298436004b95275 1 NaN 0 \n",
|
||
"2 93002d4637331edd81ffc28b6e8e89c0 1 NaN 0 \n",
|
||
"3 7d0b25bdfff9f366da8be820608c8191 1 NaN 0 \n",
|
||
"4 1dbb0795e8f47cb75ba7cdb08c06be5f 1 NaN 0 \n",
|
||
"\n",
|
||
" created_at updated_at \\\n",
|
||
"0 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n",
|
||
"1 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n",
|
||
"2 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n",
|
||
"3 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n",
|
||
"4 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n",
|
||
"\n",
|
||
" identifier_product_pack \n",
|
||
"0 a764b4bf13a360c7ac2a35ec4ca96c95 \n",
|
||
"1 a764b4bf13a360c7ac2a35ec4ca96c95 \n",
|
||
"2 a764b4bf13a360c7ac2a35ec4ca96c95 \n",
|
||
"3 a764b4bf13a360c7ac2a35ec4ca96c95 \n",
|
||
"4 a764b4bf13a360c7ac2a35ec4ca96c95 \n",
|
||
"\n",
|
||
"[5 rows x 41 columns]"
|
||
]
|
||
},
|
||
"execution_count": 90,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_product_pricing.head(5)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "03442997-806f-4285-a139-3bad46bb4522",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "d22a0d75-53c5-4b54-9060-c9e7c307fb13",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"BUCKET = \"bdc2324-data\"\n",
|
||
"directory_path = '2'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "7c229dad-6ebd-4f43-99f1-fb330dc29466",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['bdc2324-data/2/2campaign_stats.csv',\n",
|
||
" 'bdc2324-data/2/2campaigns.csv',\n",
|
||
" 'bdc2324-data/2/2categories.csv',\n",
|
||
" 'bdc2324-data/2/2contribution_sites.csv',\n",
|
||
" 'bdc2324-data/2/2contributions.csv',\n",
|
||
" 'bdc2324-data/2/2countries.csv',\n",
|
||
" 'bdc2324-data/2/2currencies.csv',\n",
|
||
" 'bdc2324-data/2/2customer_target_mappings.csv',\n",
|
||
" 'bdc2324-data/2/2customersplus.csv',\n",
|
||
" 'bdc2324-data/2/2event_types.csv',\n",
|
||
" 'bdc2324-data/2/2events.csv',\n",
|
||
" 'bdc2324-data/2/2facilities.csv',\n",
|
||
" 'bdc2324-data/2/2link_stats.csv',\n",
|
||
" 'bdc2324-data/2/2pricing_formulas.csv',\n",
|
||
" 'bdc2324-data/2/2product_packs.csv',\n",
|
||
" 'bdc2324-data/2/2products.csv',\n",
|
||
" 'bdc2324-data/2/2products_groups.csv',\n",
|
||
" 'bdc2324-data/2/2purchases.csv',\n",
|
||
" 'bdc2324-data/2/2representation_category_capacities.csv',\n",
|
||
" 'bdc2324-data/2/2representations.csv',\n",
|
||
" 'bdc2324-data/2/2seasons.csv',\n",
|
||
" 'bdc2324-data/2/2structure_tag_mappings.csv',\n",
|
||
" 'bdc2324-data/2/2suppliers.csv',\n",
|
||
" 'bdc2324-data/2/2tags.csv',\n",
|
||
" 'bdc2324-data/2/2target_types.csv',\n",
|
||
" 'bdc2324-data/2/2targets.csv',\n",
|
||
" 'bdc2324-data/2/2tickets.csv']"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"BUCKET = \"bdc2324-data/2\"\n",
|
||
"fs.ls(BUCKET)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "df3d3548-3d76-4f07-afa1-e240932bc1c7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"dic_base_ent2=['campaign_stats','campaigns','categories','contribution_sites','contributions','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "90f8d5fc-43f3-4f36-b8cc-89a41785f032",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_425/673681459.py:5: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"dic_base_ent2=['campaign_stats','campaigns','categories','contribution_sites','contributions','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets']\n",
|
||
"for nom_base in dic_base_ent2:\n",
|
||
" FILE_PATH_S3_fanta = 'bdc2324-data/2/2' + nom_base + '.csv'\n",
|
||
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
|
||
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "3e39a584-e02b-41b2-831c-33b920e298e9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"27"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(dic_base_ent2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "2b6c6f65",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"def calculer_proportion_valeurs_manquantes_et_exporter(databases, fichier_sortie='proportion_valeurs_manquantes.xlsx'):\n",
|
||
" \"\"\"\n",
|
||
" Calculer la proportion de valeurs manquantes pour chaque variable dans chaque base de données et exporter les résultats dans un fichier Excel.\n",
|
||
"\n",
|
||
" Paramètres:\n",
|
||
" - databases (dict): Un dictionnaire où les clés sont les noms des bases de données et les valeurs sont les DataFrames pandas.\n",
|
||
" - fichier_sortie (str): Le chemin du fichier Excel de sortie.\n",
|
||
"\n",
|
||
" Retourne:\n",
|
||
" - Un fichier Excel où chaque onglet représente une base de données différente avec la proportion de valeurs manquantes pour chaque variable.\n",
|
||
" \"\"\"\n",
|
||
" with pd.ExcelWriter(fichier_sortie) as writer:\n",
|
||
" for nom_db, df in databases.items():\n",
|
||
" # Calculer la proportion de valeurs manquantes pour chaque colonne\n",
|
||
" proportion_manquantes = df.isnull().mean()\n",
|
||
" # Convertir en DataFrame pour un meilleur affichage\n",
|
||
" resultats_df = pd.DataFrame(proportion_manquantes, columns=['ProportionValeursManquantes'])\n",
|
||
" resultats_df['ProportionValeursManquantes'] = resultats_df['ProportionValeursManquantes'].map(lambda x: f\"{x:.2%}\")\n",
|
||
" # Écrire le DataFrame dans un onglet du fichier Excel\n",
|
||
" resultats_df.to_excel(writer, sheet_name=nom_db)\n",
|
||
"\n",
|
||
" print(f\"Les résultats ont été exportés dans le fichier '{fichier_sortie}'.\")\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "06759646-9419-4841-b12f-bbfceb417f3a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#fonction calcul la proportion de valeur manquante\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"def calculer_proportion_valeurs_manquantes(databases):\n",
|
||
" \"\"\"\n",
|
||
" Calculer la proportion de valeurs manquantes pour chaque variable dans chaque base de données.\n",
|
||
"\n",
|
||
" Paramètres:\n",
|
||
" - databases (dict): Un dictionnaire où les clés sont les noms des bases de données et les valeurs sont les DataFrames pandas.\n",
|
||
"\n",
|
||
" Retourne:\n",
|
||
" - Un dictionnaire où les clés sont les noms des bases de données et les valeurs sont des DataFrames avec la proportion de valeurs manquantes pour chaque variable.\n",
|
||
" \"\"\"\n",
|
||
" resultats = {}\n",
|
||
" for nom_db, df in databases.items():\n",
|
||
" # Calculer la proportion de valeurs manquantes pour chaque colonne\n",
|
||
" proportion_manquantes = df.isnull().mean()\n",
|
||
" # Convertir en DataFrame pour un meilleur affichage\n",
|
||
" resultats_df = pd.DataFrame(proportion_manquantes, columns=['ProportionValeursManquantes'])\n",
|
||
" resultats_df['ProportionValeursManquantes'] = resultats_df['ProportionValeursManquantes'].map(lambda x: f\"{x:.2%}\")\n",
|
||
" # Ajouter le résultat au dictionnaire\n",
|
||
" resultats[nom_db] = resultats_df\n",
|
||
" return resultats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "0960daa8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Base de données: Base1\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"campaign_id 0.00%\n",
|
||
"customer_id 0.00%\n",
|
||
"opened_at 68.67%\n",
|
||
"sent_at 0.00%\n",
|
||
"delivered_at 1.61%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"\n",
|
||
"Base de données: Base2\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 0.00%\n",
|
||
"service_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"process_id 100.00%\n",
|
||
"report_url 100.00%\n",
|
||
"category 0.00%\n",
|
||
"to_be_synced 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"sent_at 0.00%\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Exemple d'utilisation\n",
|
||
"\n",
|
||
"databases = {'Base1': campaign_stats, 'Base2': campaigns}\n",
|
||
"\n",
|
||
"resultats = calculer_proportion_valeurs_manquantes(databases)\n",
|
||
"\n",
|
||
"for nom_db, resultat in resultats.items():\n",
|
||
" print(f\"Base de données: {nom_db}\")\n",
|
||
" print(resultat)\n",
|
||
" print()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "77dc02bb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Base de données: campaign_stats\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"campaign_id 0.00%\n",
|
||
"customer_id 0.00%\n",
|
||
"opened_at 68.67%\n",
|
||
"sent_at 0.00%\n",
|
||
"delivered_at 1.61%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"\n",
|
||
"Base de données: campaigns\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 0.00%\n",
|
||
"service_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"process_id 100.00%\n",
|
||
"report_url 100.00%\n",
|
||
"category 0.00%\n",
|
||
"to_be_synced 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"sent_at 0.00%\n",
|
||
"\n",
|
||
"Base de données: categories\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 100.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"extra_field 100.00%\n",
|
||
"quota 100.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: contribution_sites\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"facility_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"\n",
|
||
"Base de données: contributions\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"sent_at 0.00%\n",
|
||
"software 100.00%\n",
|
||
"satisfaction 39.65%\n",
|
||
"extra_field 100.00%\n",
|
||
"customer_id 0.00%\n",
|
||
"contribution_site_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: countries\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 1.63%\n",
|
||
"code 0.41%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: currencies\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: customer_target_mappings\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"customer_id 0.00%\n",
|
||
"target_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"name 100.00%\n",
|
||
"extra_field 100.00%\n",
|
||
"\n",
|
||
"Base de données: customersplus\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"lastname 0.02%\n",
|
||
"firstname 0.01%\n",
|
||
"birthdate 96.75%\n",
|
||
"email 1.05%\n",
|
||
"street_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"civility 100.00%\n",
|
||
"is_partner 0.00%\n",
|
||
"extra 100.00%\n",
|
||
"deleted_at 100.00%\n",
|
||
"reference 100.00%\n",
|
||
"gender 0.00%\n",
|
||
"is_email_true 0.00%\n",
|
||
"extra_field 100.00%\n",
|
||
"identifier 0.00%\n",
|
||
"opt_in 0.00%\n",
|
||
"structure_id 97.57%\n",
|
||
"note 97.84%\n",
|
||
"profession 100.00%\n",
|
||
"language 46.16%\n",
|
||
"mcp_contact_id 100.00%\n",
|
||
"need_reload 0.00%\n",
|
||
"last_buying_date 12.58%\n",
|
||
"max_price 12.58%\n",
|
||
"ticket_sum 0.00%\n",
|
||
"average_price 12.58%\n",
|
||
"fidelity 0.00%\n",
|
||
"average_purchase_delay 12.58%\n",
|
||
"average_price_basket 12.58%\n",
|
||
"average_ticket_basket 12.58%\n",
|
||
"total_price 0.00%\n",
|
||
"preferred_category 100.00%\n",
|
||
"preferred_supplier 100.00%\n",
|
||
"preferred_formula 100.00%\n",
|
||
"purchase_count 0.00%\n",
|
||
"first_buying_date 12.58%\n",
|
||
"last_visiting_date 100.00%\n",
|
||
"zipcode 98.80%\n",
|
||
"country 97.64%\n",
|
||
"age 96.75%\n",
|
||
"tenant_id 0.00%\n",
|
||
"\n",
|
||
"Base de données: event_types\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"fidelity_delay 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: events\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"season_id 0.00%\n",
|
||
"facility_id 0.00%\n",
|
||
"name 0.00%\n",
|
||
"event_type_id 0.00%\n",
|
||
"manual_added 0.00%\n",
|
||
"is_display 0.00%\n",
|
||
"event_type_key_id 0.00%\n",
|
||
"facility_key_id 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: facilities\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 50.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"street_id 0.00%\n",
|
||
"fixed_capacity 100.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: link_stats\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"clicked_at 0.00%\n",
|
||
"link_id 0.00%\n",
|
||
"customer_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"\n",
|
||
"Base de données: pricing_formulas\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"extra_field 100.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: product_packs\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 100.00%\n",
|
||
"type_of 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: products\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"amount 0.00%\n",
|
||
"is_full_price 0.00%\n",
|
||
"representation_id 0.00%\n",
|
||
"pricing_formula_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"category_id 0.00%\n",
|
||
"apply_price 0.00%\n",
|
||
"products_group_id 0.00%\n",
|
||
"product_pack_id 0.00%\n",
|
||
"extra_field 100.00%\n",
|
||
"amount_consumption 100.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: products_groups\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"percent_price 0.00%\n",
|
||
"max_price 0.00%\n",
|
||
"min_price 0.00%\n",
|
||
"category_id 0.00%\n",
|
||
"pricing_formula_id 0.00%\n",
|
||
"representation_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"\n",
|
||
"Base de données: purchases\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"purchase_date 0.00%\n",
|
||
"customer_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"number 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: representation_category_capacities\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"representation_id 0.00%\n",
|
||
"category_id 0.00%\n",
|
||
"expected_filling 100.00%\n",
|
||
"max_filling 100.00%\n",
|
||
"\n",
|
||
"Base de données: representations\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"serial 100.00%\n",
|
||
"event_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"start_date_time 0.00%\n",
|
||
"open 0.00%\n",
|
||
"satisfaction 100.00%\n",
|
||
"end_date_time 0.00%\n",
|
||
"name 100.00%\n",
|
||
"is_display 0.00%\n",
|
||
"representation_type_id 100.00%\n",
|
||
"expected_filling 100.00%\n",
|
||
"max_filling 100.00%\n",
|
||
"extra_field 100.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: seasons\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"start_date_time 100.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: structure_tag_mappings\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"structure_id 0.00%\n",
|
||
"tag_id 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"\n",
|
||
"Base de données: suppliers\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 20.00%\n",
|
||
"manually_added 0.00%\n",
|
||
"label 100.00%\n",
|
||
"itr 100.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"commission 100.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: tags\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"name 50.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: target_types\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"is_import 25.00%\n",
|
||
"name 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n",
|
||
"Base de données: targets\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"target_type_id 0.00%\n",
|
||
"name 5.26%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"\n",
|
||
"Base de données: tickets\n",
|
||
" ProportionValeursManquantes\n",
|
||
"id 0.00%\n",
|
||
"number 0.00%\n",
|
||
"created_at 0.00%\n",
|
||
"updated_at 0.00%\n",
|
||
"purchase_id 0.00%\n",
|
||
"product_id 0.00%\n",
|
||
"is_from_subscription 0.00%\n",
|
||
"type_of 0.00%\n",
|
||
"supplier_id 0.00%\n",
|
||
"barcode 100.00%\n",
|
||
"identifier 0.00%\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Exemple d'utilisation\n",
|
||
"dict={'campaign_stats': campaign_stats,\n",
|
||
" 'campaigns': campaigns,\n",
|
||
" 'categories': categories,\n",
|
||
" 'contribution_sites': contribution_sites,\n",
|
||
" 'contributions': contributions,\n",
|
||
" 'countries': countries,\n",
|
||
" 'currencies': currencies,\n",
|
||
" 'customer_target_mappings': customer_target_mappings,\n",
|
||
" 'customersplus': customersplus,\n",
|
||
" 'event_types': event_types,\n",
|
||
" 'events': events,\n",
|
||
" 'facilities': facilities,\n",
|
||
" 'link_stats': link_stats,\n",
|
||
" 'pricing_formulas': pricing_formulas,\n",
|
||
" 'product_packs': product_packs,\n",
|
||
" 'products': products,\n",
|
||
" 'products_groups': products_groups,\n",
|
||
" 'purchases': purchases,\n",
|
||
" 'representation_category_capacities': representation_category_capacities,\n",
|
||
" 'representations': representations,\n",
|
||
" 'seasons': seasons,\n",
|
||
" 'structure_tag_mappings': structure_tag_mappings,\n",
|
||
" 'suppliers': suppliers,\n",
|
||
" 'tags': tags,\n",
|
||
" 'target_types': target_types,\n",
|
||
" 'targets': targets,\n",
|
||
" 'tickets': tickets}\n",
|
||
"\n",
|
||
"resultats = calculer_proportion_valeurs_manquantes(dict)\n",
|
||
"\n",
|
||
"for nom_db, resultat in resultats.items():\n",
|
||
" print(f\"Base de données: {nom_db}\")\n",
|
||
" print(resultat)\n",
|
||
" print()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "60be9271",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#MEME CODE mais avec l'exportation de result a en format excel"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "955fe358",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"def calculer_proportion_valeurs_manquantes_et_exporter(databases, fichier_sortie='proportion_valeurs_manquantes.xlsx'):\n",
|
||
" \"\"\"\n",
|
||
" Calculer la proportion de valeurs manquantes pour chaque variable dans chaque base de données et exporter les résultats dans un fichier Excel.\n",
|
||
"\n",
|
||
" Paramètres:\n",
|
||
" - databases (dict): Un dictionnaire où les clés sont les noms des bases de données et les valeurs sont les DataFrames pandas.\n",
|
||
" - fichier_sortie (str): Le chemin du fichier Excel de sortie.\n",
|
||
"\n",
|
||
" Retourne:\n",
|
||
" - Un fichier Excel où chaque onglet représente une base de données différente avec la proportion de valeurs manquantes pour chaque variable.\n",
|
||
" \"\"\"\n",
|
||
" with pd.ExcelWriter(fichier_sortie) as writer:\n",
|
||
" for nom_db, df in databases.items():\n",
|
||
" # Calculer la proportion de valeurs manquantes pour chaque colonne\n",
|
||
" proportion_manquantes = df.isnull().mean()\n",
|
||
" # Convertir en DataFrame pour un meilleur affichage\n",
|
||
" resultats_df = pd.DataFrame(proportion_manquantes, columns=['ProportionValeursManquantes'])\n",
|
||
" resultats_df['ProportionValeursManquantes'] = resultats_df['ProportionValeursManquantes'].map(lambda x: f\"{x:.2%}\")\n",
|
||
" # Écrire le DataFrame dans un onglet du fichier Excel\n",
|
||
" resultats_df.to_excel(writer, sheet_name=nom_db)\n",
|
||
"\n",
|
||
" print(f\"Les résultats ont été exportés dans le fichier '{fichier_sortie}'.\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7897b689",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Exemple d'utilisation\n",
|
||
"dict={'campaign_stats': campaign_stats,\n",
|
||
" 'campaigns': campaigns,\n",
|
||
" 'categories': categories,\n",
|
||
" 'contribution_sites': contribution_sites,\n",
|
||
" 'contributions': contributions,\n",
|
||
" 'countries': countries,\n",
|
||
" 'currencies': currencies,\n",
|
||
" 'customer_target_mappings': customer_target_mappings,\n",
|
||
" 'customersplus': customersplus,\n",
|
||
" 'event_types': event_types,\n",
|
||
" 'events': events,\n",
|
||
" 'facilities': facilities,\n",
|
||
" 'link_stats': link_stats,\n",
|
||
" 'pricing_formulas': pricing_formulas,\n",
|
||
" 'product_packs': product_packs,\n",
|
||
" 'products': products,\n",
|
||
" 'products_groups': products_groups,\n",
|
||
" 'purchases': purchases,\n",
|
||
" 'representation_category_capacities': representation_category_capacities,\n",
|
||
" 'representations': representations,\n",
|
||
" 'seasons': seasons,\n",
|
||
" 'structure_tag_mappings': structure_tag_mappings,\n",
|
||
" 'suppliers': suppliers,\n",
|
||
" 'tags': tags,\n",
|
||
" 'target_types': target_types,\n",
|
||
" 'targets': targets,\n",
|
||
" 'tickets': tickets}\n",
|
||
"\n",
|
||
"calculer_proportion_valeurs_manquantes_et_exporter(dict, 'proportion_valeurs_manquantes_ent1.xlsx')\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "514273f4",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Entreprise 3"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"id": "69b8f59a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['bdc2324-data/3/3campaign_stats.csv',\n",
|
||
" 'bdc2324-data/3/3campaigns.csv',\n",
|
||
" 'bdc2324-data/3/3categories.csv',\n",
|
||
" 'bdc2324-data/3/3consumptions.csv',\n",
|
||
" 'bdc2324-data/3/3contribution_sites.csv',\n",
|
||
" 'bdc2324-data/3/3contributions.csv',\n",
|
||
" 'bdc2324-data/3/3countries.csv',\n",
|
||
" 'bdc2324-data/3/3currencies.csv',\n",
|
||
" 'bdc2324-data/3/3customer_target_mappings.csv',\n",
|
||
" 'bdc2324-data/3/3customersplus.csv',\n",
|
||
" 'bdc2324-data/3/3event_types.csv',\n",
|
||
" 'bdc2324-data/3/3events.csv',\n",
|
||
" 'bdc2324-data/3/3facilities.csv',\n",
|
||
" 'bdc2324-data/3/3link_stats.csv',\n",
|
||
" 'bdc2324-data/3/3pricing_formulas.csv',\n",
|
||
" 'bdc2324-data/3/3product_packs.csv',\n",
|
||
" 'bdc2324-data/3/3products.csv',\n",
|
||
" 'bdc2324-data/3/3products_groups.csv',\n",
|
||
" 'bdc2324-data/3/3purchases.csv',\n",
|
||
" 'bdc2324-data/3/3representation_category_capacities.csv',\n",
|
||
" 'bdc2324-data/3/3representations.csv',\n",
|
||
" 'bdc2324-data/3/3seasons.csv',\n",
|
||
" 'bdc2324-data/3/3structure_tag_mappings.csv',\n",
|
||
" 'bdc2324-data/3/3suppliers.csv',\n",
|
||
" 'bdc2324-data/3/3tags.csv',\n",
|
||
" 'bdc2324-data/3/3target_types.csv',\n",
|
||
" 'bdc2324-data/3/3targets.csv',\n",
|
||
" 'bdc2324-data/3/3tickets.csv']"
|
||
]
|
||
},
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"BUCKET = \"bdc2324-data/3\"\n",
|
||
"fs.ls(BUCKET)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "8a842d0b-f341-4752-b624-3a339ef0fe1e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Chargement des données temporaires\n",
|
||
"BUCKET = \"projet-bdc2324-team1\"\n",
|
||
"FILE_KEY_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n",
|
||
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" tickets_kpi = pd.read_csv(file_in, sep=\",\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"id": "9b4c005f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"dic_base_ent3=['campaign_stats','campaigns','categories','consumptions','contribution_sites','contributions','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "aae542d6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_425/4241072101.py:5: DtypeWarning: Columns (19,20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n",
|
||
"/tmp/ipykernel_425/4241072101.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"dic_base_ent3=['campaign_stats','campaigns','categories','consumptions','contribution_sites','contributions','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets']\n",
|
||
"for nom_base in dic_base_ent2:\n",
|
||
" FILE_PATH_S3_fanta = 'bdc2324-data/3/3' + nom_base + '.csv'\n",
|
||
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
|
||
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "907c650c-df7e-4e5c-b3cb-6595be061e99",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>fidelity_delay</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>60873</td>\n",
|
||
" <td>journees des plantes</td>\n",
|
||
" <td>2022-09-13 17:42:18.040557+02:00</td>\n",
|
||
" <td>2022-09-13 17:42:18.040557+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>58568a64f69dd864539e7a682b03ef3a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>60876</td>\n",
|
||
" <td>parking</td>\n",
|
||
" <td>2022-09-13 17:42:18.043821+02:00</td>\n",
|
||
" <td>2022-09-13 17:42:18.043821+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>3ac156eead4ae6b40e9c498d532b4448</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>60997</td>\n",
|
||
" <td>pass arc</td>\n",
|
||
" <td>2022-09-13 18:04:38.812389+02:00</td>\n",
|
||
" <td>2022-09-13 18:04:38.812389+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>ddffce8b0a072d76a766097b34208482</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>61233</td>\n",
|
||
" <td>paris pass museum</td>\n",
|
||
" <td>2022-09-13 18:53:15.878739+02:00</td>\n",
|
||
" <td>2022-09-13 18:53:15.878739+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>394a376e43e498dccf8a448004f2aa84</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>60911</td>\n",
|
||
" <td>spectacle noel</td>\n",
|
||
" <td>2022-09-13 17:48:50.549760+02:00</td>\n",
|
||
" <td>2022-09-13 17:48:50.549760+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>a2b0f7c330d4c8d0338e6edf5ce4c81a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>61007</td>\n",
|
||
" <td>domaine + spect noel</td>\n",
|
||
" <td>2022-09-13 18:07:25.121513+02:00</td>\n",
|
||
" <td>2022-09-13 18:07:25.121513+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>c65e9028b505dcfa71acef4e15f6e7be</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>62374</td>\n",
|
||
" <td>patrivia</td>\n",
|
||
" <td>2022-09-14 02:08:56.789118+02:00</td>\n",
|
||
" <td>2022-09-14 02:08:56.789118+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>8db1f5c8774a8cf07542249278d7d778</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>72615</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2023-08-14 06:20:26.491399+02:00</td>\n",
|
||
" <td>2023-08-14 06:20:26.491399+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>d41d8cd98f00b204e9800998ecf8427e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>61011</td>\n",
|
||
" <td>prestation annexe</td>\n",
|
||
" <td>2022-09-13 18:07:25.126517+02:00</td>\n",
|
||
" <td>2022-09-13 18:07:25.126517+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>2588ceebb05d3329f334687b8647887e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>60877</td>\n",
|
||
" <td>minibus</td>\n",
|
||
" <td>2022-09-13 17:42:18.045141+02:00</td>\n",
|
||
" <td>2022-09-13 17:42:18.045141+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>397361c9c0dc82d911aa931223bd7a4e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>61708</td>\n",
|
||
" <td>location espace</td>\n",
|
||
" <td>2022-09-13 21:52:57.785694+02:00</td>\n",
|
||
" <td>2022-09-13 21:52:57.785694+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>3738beebf604a960e016ffad6db1df74</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>60931</td>\n",
|
||
" <td>exposition</td>\n",
|
||
" <td>2022-09-13 17:52:36.164774+02:00</td>\n",
|
||
" <td>2022-09-13 17:52:36.164774+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>69033f19d53294c467d0fb7d3a4ad868</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>60871</td>\n",
|
||
" <td>domaine</td>\n",
|
||
" <td>2022-09-13 17:42:18.037831+02:00</td>\n",
|
||
" <td>2022-09-13 17:42:18.037831+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>b81285860b791e63dee94559f0a9e8e4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>60878</td>\n",
|
||
" <td>visite guidee</td>\n",
|
||
" <td>2022-09-13 17:42:18.046593+02:00</td>\n",
|
||
" <td>2022-09-13 17:42:18.046593+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>3474ed518d4fa7b86719680e70039ac2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>60875</td>\n",
|
||
" <td>parc</td>\n",
|
||
" <td>2022-09-13 17:42:18.042824+02:00</td>\n",
|
||
" <td>2022-09-13 17:42:18.042824+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>41c16554f8160b3bd2a6b3809f309e27</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>60870</td>\n",
|
||
" <td>spectacle saison</td>\n",
|
||
" <td>2022-09-13 17:42:18.028836+02:00</td>\n",
|
||
" <td>2022-09-13 17:42:18.028836+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>7afdec36b66f94b67e813ac7d092ea0c</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>60872</td>\n",
|
||
" <td>domaine + spect saison</td>\n",
|
||
" <td>2022-09-13 17:42:18.039515+02:00</td>\n",
|
||
" <td>2022-09-13 17:42:18.039515+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>ccd5dffaa070e9c7885ff3e9149f12f0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>60874</td>\n",
|
||
" <td>supplement spectacle</td>\n",
|
||
" <td>2022-09-13 17:42:18.041862+02:00</td>\n",
|
||
" <td>2022-09-13 17:42:18.041862+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>093d3e6b14b1ad33908a18522d02886b</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>61203</td>\n",
|
||
" <td>pique nique en blanc</td>\n",
|
||
" <td>2022-09-13 18:44:16.813045+02:00</td>\n",
|
||
" <td>2022-09-13 18:44:16.813045+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>ff9a87979d4a564a1d2a1055a1aa186e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>61427</td>\n",
|
||
" <td>restauration</td>\n",
|
||
" <td>2022-09-13 19:26:41.906836+02:00</td>\n",
|
||
" <td>2022-09-13 19:26:41.906836+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>f1bcd494fa3171bf042e62c311157547</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>60879</td>\n",
|
||
" <td>animation culturelle</td>\n",
|
||
" <td>2022-09-13 17:42:18.047567+02:00</td>\n",
|
||
" <td>2022-09-13 17:42:18.047567+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>27b0b018dfa7301abffcb243a876e4c4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>21</th>\n",
|
||
" <td>61270</td>\n",
|
||
" <td>ecurie</td>\n",
|
||
" <td>2022-09-13 18:57:52.734356+02:00</td>\n",
|
||
" <td>2022-09-13 18:57:52.734356+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>a21e22a0d924104179b25069de927909</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>22</th>\n",
|
||
" <td>60929</td>\n",
|
||
" <td>domaine + spect ete</td>\n",
|
||
" <td>2022-09-13 17:52:36.162726+02:00</td>\n",
|
||
" <td>2022-09-13 17:52:36.162726+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>8bdc1d1d5fba2317af8c4d733d8206d7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>23</th>\n",
|
||
" <td>60910</td>\n",
|
||
" <td>spectacle ete</td>\n",
|
||
" <td>2022-09-13 17:48:50.548826+02:00</td>\n",
|
||
" <td>2022-09-13 17:48:50.548826+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>44878d1fd6c7fe384274861294c59017</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 60873 journees des plantes 2022-09-13 17:42:18.040557+02:00 \n",
|
||
"1 60876 parking 2022-09-13 17:42:18.043821+02:00 \n",
|
||
"2 60997 pass arc 2022-09-13 18:04:38.812389+02:00 \n",
|
||
"3 61233 paris pass museum 2022-09-13 18:53:15.878739+02:00 \n",
|
||
"4 60911 spectacle noel 2022-09-13 17:48:50.549760+02:00 \n",
|
||
"5 61007 domaine + spect noel 2022-09-13 18:07:25.121513+02:00 \n",
|
||
"6 62374 patrivia 2022-09-14 02:08:56.789118+02:00 \n",
|
||
"7 72615 NaN 2023-08-14 06:20:26.491399+02:00 \n",
|
||
"8 61011 prestation annexe 2022-09-13 18:07:25.126517+02:00 \n",
|
||
"9 60877 minibus 2022-09-13 17:42:18.045141+02:00 \n",
|
||
"10 61708 location espace 2022-09-13 21:52:57.785694+02:00 \n",
|
||
"11 60931 exposition 2022-09-13 17:52:36.164774+02:00 \n",
|
||
"12 60871 domaine 2022-09-13 17:42:18.037831+02:00 \n",
|
||
"13 60878 visite guidee 2022-09-13 17:42:18.046593+02:00 \n",
|
||
"14 60875 parc 2022-09-13 17:42:18.042824+02:00 \n",
|
||
"15 60870 spectacle saison 2022-09-13 17:42:18.028836+02:00 \n",
|
||
"16 60872 domaine + spect saison 2022-09-13 17:42:18.039515+02:00 \n",
|
||
"17 60874 supplement spectacle 2022-09-13 17:42:18.041862+02:00 \n",
|
||
"18 61203 pique nique en blanc 2022-09-13 18:44:16.813045+02:00 \n",
|
||
"19 61427 restauration 2022-09-13 19:26:41.906836+02:00 \n",
|
||
"20 60879 animation culturelle 2022-09-13 17:42:18.047567+02:00 \n",
|
||
"21 61270 ecurie 2022-09-13 18:57:52.734356+02:00 \n",
|
||
"22 60929 domaine + spect ete 2022-09-13 17:52:36.162726+02:00 \n",
|
||
"23 60910 spectacle ete 2022-09-13 17:48:50.548826+02:00 \n",
|
||
"\n",
|
||
" updated_at fidelity_delay \\\n",
|
||
"0 2022-09-13 17:42:18.040557+02:00 36 \n",
|
||
"1 2022-09-13 17:42:18.043821+02:00 36 \n",
|
||
"2 2022-09-13 18:04:38.812389+02:00 36 \n",
|
||
"3 2022-09-13 18:53:15.878739+02:00 36 \n",
|
||
"4 2022-09-13 17:48:50.549760+02:00 36 \n",
|
||
"5 2022-09-13 18:07:25.121513+02:00 36 \n",
|
||
"6 2022-09-14 02:08:56.789118+02:00 36 \n",
|
||
"7 2023-08-14 06:20:26.491399+02:00 36 \n",
|
||
"8 2022-09-13 18:07:25.126517+02:00 36 \n",
|
||
"9 2022-09-13 17:42:18.045141+02:00 36 \n",
|
||
"10 2022-09-13 21:52:57.785694+02:00 36 \n",
|
||
"11 2022-09-13 17:52:36.164774+02:00 36 \n",
|
||
"12 2022-09-13 17:42:18.037831+02:00 36 \n",
|
||
"13 2022-09-13 17:42:18.046593+02:00 36 \n",
|
||
"14 2022-09-13 17:42:18.042824+02:00 36 \n",
|
||
"15 2022-09-13 17:42:18.028836+02:00 36 \n",
|
||
"16 2022-09-13 17:42:18.039515+02:00 36 \n",
|
||
"17 2022-09-13 17:42:18.041862+02:00 36 \n",
|
||
"18 2022-09-13 18:44:16.813045+02:00 36 \n",
|
||
"19 2022-09-13 19:26:41.906836+02:00 36 \n",
|
||
"20 2022-09-13 17:42:18.047567+02:00 36 \n",
|
||
"21 2022-09-13 18:57:52.734356+02:00 36 \n",
|
||
"22 2022-09-13 17:52:36.162726+02:00 36 \n",
|
||
"23 2022-09-13 17:48:50.548826+02:00 36 \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 58568a64f69dd864539e7a682b03ef3a \n",
|
||
"1 3ac156eead4ae6b40e9c498d532b4448 \n",
|
||
"2 ddffce8b0a072d76a766097b34208482 \n",
|
||
"3 394a376e43e498dccf8a448004f2aa84 \n",
|
||
"4 a2b0f7c330d4c8d0338e6edf5ce4c81a \n",
|
||
"5 c65e9028b505dcfa71acef4e15f6e7be \n",
|
||
"6 8db1f5c8774a8cf07542249278d7d778 \n",
|
||
"7 d41d8cd98f00b204e9800998ecf8427e \n",
|
||
"8 2588ceebb05d3329f334687b8647887e \n",
|
||
"9 397361c9c0dc82d911aa931223bd7a4e \n",
|
||
"10 3738beebf604a960e016ffad6db1df74 \n",
|
||
"11 69033f19d53294c467d0fb7d3a4ad868 \n",
|
||
"12 b81285860b791e63dee94559f0a9e8e4 \n",
|
||
"13 3474ed518d4fa7b86719680e70039ac2 \n",
|
||
"14 41c16554f8160b3bd2a6b3809f309e27 \n",
|
||
"15 7afdec36b66f94b67e813ac7d092ea0c \n",
|
||
"16 ccd5dffaa070e9c7885ff3e9149f12f0 \n",
|
||
"17 093d3e6b14b1ad33908a18522d02886b \n",
|
||
"18 ff9a87979d4a564a1d2a1055a1aa186e \n",
|
||
"19 f1bcd494fa3171bf042e62c311157547 \n",
|
||
"20 27b0b018dfa7301abffcb243a876e4c4 \n",
|
||
"21 a21e22a0d924104179b25069de927909 \n",
|
||
"22 8bdc1d1d5fba2317af8c4d733d8206d7 \n",
|
||
"23 44878d1fd6c7fe384274861294c59017 "
|
||
]
|
||
},
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"event_types"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"id": "617f6e54-c2a4-4627-9b94-9644a4dace71",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"entreprise_base=['bdc2324-data/1', 'bdc2324-data/2', 'bdc2324-data/3', 'bdc2324-data/4', 'bdc2324-data/5', 'bdc2324-data/6', 'bdc2324-data/7', 'bdc2324-data/8','bdc2324-data/9','bdc2324-data/10','bdc2324-data/11','bdc2324-data/12','bdc2324-data/13','bdc2324-data/14','bdc2324-data/101']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "b4ce5f9e-7358-4c09-871b-45989f142e25",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7a0b081c-e0ad-49a8-af25-78b2dc3cdcb8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|