BDC-team-1/Traitement_Fanta.ipynb

1721 lines
56 KiB
Plaintext
Raw Permalink Normal View History

2024-01-12 15:38:45 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "c4205b5d-e052-4863-a46b-20e4757052a7",
"metadata": {},
"source": [
"# Business Data Challenge - Team 1"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 1,
2024-01-12 15:38:45 +01:00
"id": "ae3af8e6-ced8-4994-8877-fa98d4297cc0",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "dd3184e7-54a1-4463-af42-5850d9517a41",
"metadata": {},
"source": [
"Configuration de l'accès aux données"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 2,
2024-01-12 15:38:45 +01:00
"id": "b6035982-9ff4-4013-9792-2d50e10db3d1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['bdc2324-data/1/1campaign_stats.csv',\n",
" 'bdc2324-data/1/1campaigns.csv',\n",
" 'bdc2324-data/1/1categories.csv',\n",
" 'bdc2324-data/1/1countries.csv',\n",
" 'bdc2324-data/1/1currencies.csv',\n",
" 'bdc2324-data/1/1customer_target_mappings.csv',\n",
" 'bdc2324-data/1/1customersplus.csv',\n",
" 'bdc2324-data/1/1event_types.csv',\n",
" 'bdc2324-data/1/1events.csv',\n",
" 'bdc2324-data/1/1facilities.csv',\n",
" 'bdc2324-data/1/1link_stats.csv',\n",
" 'bdc2324-data/1/1pricing_formulas.csv',\n",
" 'bdc2324-data/1/1product_packs.csv',\n",
" 'bdc2324-data/1/1products.csv',\n",
" 'bdc2324-data/1/1products_groups.csv',\n",
" 'bdc2324-data/1/1purchases.csv',\n",
" 'bdc2324-data/1/1representation_category_capacities.csv',\n",
" 'bdc2324-data/1/1representations.csv',\n",
" 'bdc2324-data/1/1seasons.csv',\n",
" 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
" 'bdc2324-data/1/1suppliers.csv',\n",
" 'bdc2324-data/1/1tags.csv',\n",
" 'bdc2324-data/1/1target_types.csv',\n",
" 'bdc2324-data/1/1targets.csv',\n",
" 'bdc2324-data/1/1tickets.csv',\n",
" 'bdc2324-data/1/1type_of_categories.csv',\n",
" 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
" 'bdc2324-data/1/1type_ofs.csv']"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 2,
2024-01-12 15:38:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"import s3fs\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"bdc2324-data/1\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 3,
2024-01-12 15:38:45 +01:00
"id": "b86c935d-124f-453f-80dd-83ea6770d09c",
"metadata": {},
"outputs": [],
"source": [
"dic_base=['campaign_stats','campaigns','categories','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets','type_of_categories','type_of_pricing_formulas','type_ofs']"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 4,
2024-01-12 15:38:45 +01:00
"id": "f6d0b27c-0ecd-406b-b042-6c3802dd68fd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-01-15 19:58:25 +01:00
"/tmp/ipykernel_447/1008972637.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-01-12 15:38:45 +01:00
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n"
]
}
],
"source": [
"dic_base=['campaign_stats','campaigns','categories','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets','type_of_categories','type_of_pricing_formulas','type_ofs']\n",
"for nom_base in dic_base:\n",
" FILE_PATH_S3_fanta = 'bdc2324-data/1/1' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 5,
2024-01-12 15:38:45 +01:00
"id": "2a6b5e22-3370-457f-83b7-dd1e13663229",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'bdc2324-data/1/1type_ofs.csv'"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 5,
2024-01-12 15:38:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"FILE_PATH_S3_fanta"
]
},
2024-01-15 01:45:40 +01:00
{
"cell_type": "markdown",
"id": "79012186-ea51-4252-843e-36a9bbe3847e",
"metadata": {},
"source": [
"# Analyse exploratoire "
]
},
{
"cell_type": "markdown",
"id": "1a365f29-4766-47d8-9796-24a5271867b2",
"metadata": {},
"source": [
"## I. Base type_of_pricing_formulas"
]
},
2024-01-12 15:38:45 +01:00
{
"cell_type": "markdown",
"id": "bcc14f93-2289-44eb-816b-a51049b258df",
"metadata": {},
"source": [
"## Detection des valeur manquantes"
]
},
{
2024-01-15 01:45:40 +01:00
"cell_type": "raw",
"id": "ab2ec4c4-9d38-4aeb-8202-9116df3cdd66",
2024-01-12 15:38:45 +01:00
"metadata": {},
"source": [
"dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']"
]
},
{
2024-01-15 01:45:40 +01:00
"cell_type": "markdown",
"id": "88759b4a-2633-478d-abce-29abeac376d1",
2024-01-12 15:38:45 +01:00
"metadata": {},
"source": [
"def verifier_donnees_manquantes(base):\n",
" donnees_manquantes = base.isna().sum()\n",
" print(\"Données manquantes pour la base :\")\n",
" print(donnees_manquantes)"
]
},
2024-01-15 01:45:40 +01:00
{
"cell_type": "markdown",
"id": "df3075b4-1490-4cf2-a3fe-c6d4e2144ae3",
"metadata": {},
"source": [
"for nom_base in dic_prod_princing:\n",
" verifier_donnees_manquantes(nom_base)"
]
},
2024-01-12 15:38:45 +01:00
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 6,
2024-01-15 01:45:40 +01:00
"id": "e0c67c01-e837-4772-b070-d1be0d895a36",
2024-01-12 15:38:45 +01:00
"metadata": {},
"outputs": [
{
2024-01-15 01:45:40 +01:00
"data": {
"text/plain": [
"id 0\n",
"type_of_id 0\n",
"pricing_formula_id 0\n",
"created_at 0\n",
"updated_at 0\n",
"identifier 0\n",
"dtype: int64"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 6,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
2024-01-12 15:38:45 +01:00
}
],
"source": [
2024-01-15 01:45:40 +01:00
"#detection des Nan d\n",
"\n",
"type_of_pricing_formulas.isna().sum()"
]
},
2024-01-15 21:10:39 +01:00
{
"cell_type": "code",
"execution_count": null,
"id": "83a6a48d-effe-4537-b4bb-d5a540b610f1",
"metadata": {},
"outputs": [],
"source": [
"#variable retenu:[[\"id\",\"type_of_id\",\"pricing_formula_id\"]]"
]
},
2024-01-15 01:45:40 +01:00
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 7,
2024-01-15 01:45:40 +01:00
"id": "3eaffaa6-1164-4ee9-a671-8b5eb3df797d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type_of_id</th>\n",
" <th>pricing_formula_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>identifier</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>127</td>\n",
" <td>2021-01-05 11:55:51.226960+01:00</td>\n",
" <td>2021-01-05 11:55:51.226960+01:00</td>\n",
" <td>cf2918b25e6dcf8c30798ca05c8ec8ed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2425</td>\n",
" <td>2021-01-05 11:55:51.235606+01:00</td>\n",
" <td>2021-01-05 11:55:51.235606+01:00</td>\n",
" <td>2c8ee3f7c1487d792b6c946314e681f2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2937</td>\n",
" <td>2021-01-05 11:55:51.240114+01:00</td>\n",
" <td>2021-01-05 11:55:51.240114+01:00</td>\n",
" <td>44e55c85e4eb59b3c3c01c137a6b25fc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>48</td>\n",
" <td>2021-01-05 11:55:51.244638+01:00</td>\n",
" <td>2021-01-05 11:55:51.244638+01:00</td>\n",
" <td>ee3bb93b7e2217cd86a49d547fedf6c6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2021-01-05 11:55:51.249409+01:00</td>\n",
" <td>2021-01-05 11:55:51.249409+01:00</td>\n",
" <td>ae701668574f1a653d2b21ddfd250620</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>563</th>\n",
" <td>564</td>\n",
" <td>4</td>\n",
" <td>6656</td>\n",
" <td>2022-02-18 16:15:58.872249+01:00</td>\n",
" <td>2022-02-18 16:15:58.872249+01:00</td>\n",
" <td>f669824cdca9de9697f07ff3ba365a8d</td>\n",
" </tr>\n",
" <tr>\n",
" <th>564</th>\n",
" <td>565</td>\n",
" <td>4</td>\n",
" <td>6607</td>\n",
" <td>2022-02-18 16:15:59.231018+01:00</td>\n",
" <td>2022-02-18 16:15:59.231018+01:00</td>\n",
" <td>6421c8146a598758139153b0e7b921ea</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565</th>\n",
" <td>566</td>\n",
" <td>4</td>\n",
" <td>6700</td>\n",
" <td>2022-02-18 16:15:59.724812+01:00</td>\n",
" <td>2022-02-18 16:15:59.724812+01:00</td>\n",
" <td>6823f6d4d80b322fbfb8b83545a9f96d</td>\n",
" </tr>\n",
" <tr>\n",
" <th>566</th>\n",
" <td>567</td>\n",
" <td>4</td>\n",
" <td>8118</td>\n",
" <td>2022-02-18 16:16:00.163381+01:00</td>\n",
" <td>2022-02-18 16:16:00.163381+01:00</td>\n",
" <td>35cfc12584b4d1b94795d97fd0aa56e8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>567</th>\n",
" <td>569</td>\n",
" <td>7</td>\n",
" <td>48157</td>\n",
" <td>2023-03-13 11:30:29.480161+01:00</td>\n",
" <td>2023-03-13 11:30:29.480161+01:00</td>\n",
" <td>55863541f33fd229ac9b54d9ec1f4874</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>568 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" id type_of_id pricing_formula_id created_at \\\n",
"0 1 1 127 2021-01-05 11:55:51.226960+01:00 \n",
"1 2 1 2425 2021-01-05 11:55:51.235606+01:00 \n",
"2 3 1 2937 2021-01-05 11:55:51.240114+01:00 \n",
"3 4 1 48 2021-01-05 11:55:51.244638+01:00 \n",
"4 5 1 7 2021-01-05 11:55:51.249409+01:00 \n",
".. ... ... ... ... \n",
"563 564 4 6656 2022-02-18 16:15:58.872249+01:00 \n",
"564 565 4 6607 2022-02-18 16:15:59.231018+01:00 \n",
"565 566 4 6700 2022-02-18 16:15:59.724812+01:00 \n",
"566 567 4 8118 2022-02-18 16:16:00.163381+01:00 \n",
"567 569 7 48157 2023-03-13 11:30:29.480161+01:00 \n",
"\n",
" updated_at identifier \n",
"0 2021-01-05 11:55:51.226960+01:00 cf2918b25e6dcf8c30798ca05c8ec8ed \n",
"1 2021-01-05 11:55:51.235606+01:00 2c8ee3f7c1487d792b6c946314e681f2 \n",
"2 2021-01-05 11:55:51.240114+01:00 44e55c85e4eb59b3c3c01c137a6b25fc \n",
"3 2021-01-05 11:55:51.244638+01:00 ee3bb93b7e2217cd86a49d547fedf6c6 \n",
"4 2021-01-05 11:55:51.249409+01:00 ae701668574f1a653d2b21ddfd250620 \n",
".. ... ... \n",
"563 2022-02-18 16:15:58.872249+01:00 f669824cdca9de9697f07ff3ba365a8d \n",
"564 2022-02-18 16:15:59.231018+01:00 6421c8146a598758139153b0e7b921ea \n",
"565 2022-02-18 16:15:59.724812+01:00 6823f6d4d80b322fbfb8b83545a9f96d \n",
"566 2022-02-18 16:16:00.163381+01:00 35cfc12584b4d1b94795d97fd0aa56e8 \n",
"567 2023-03-13 11:30:29.480161+01:00 55863541f33fd229ac9b54d9ec1f4874 \n",
"\n",
"[568 rows x 6 columns]"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 7,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type_of_pricing_formulas"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 8,
2024-01-15 01:45:40 +01:00
"id": "57298669-8d55-40d5-a5aa-4c5df984eec7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"type_of_id int64\n",
"pricing_formula_id int64\n",
"created_at object\n",
"updated_at object\n",
"identifier object\n",
"dtype: object"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 8,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#type des variables\n",
"\n",
"type_of_pricing_formulas.dtypes"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 9,
2024-01-15 01:45:40 +01:00
"id": "c11850cb-8833-44c0-a11d-9695d620a42b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type_of_id</th>\n",
" <th>pricing_formula_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>identifier</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [id, type_of_id, pricing_formula_id, created_at, updated_at, identifier]\n",
"Index: []"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 9,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Identification des doublons\n",
"type_of_pricing_formulas.loc[type_of_pricing_formulas['id'].duplicated(keep=False),:]"
]
},
{
"cell_type": "markdown",
"id": "7a40de03-5e18-4d3d-a0f8-da960c29fad8",
"metadata": {},
"source": [
"## II.products_groups"
2024-01-12 15:38:45 +01:00
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 10,
2024-01-15 01:45:40 +01:00
"id": "89909175-6734-4e8e-8632-d6f8ca812388",
2024-01-12 15:38:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0\n",
2024-01-15 01:45:40 +01:00
"percent_price 0\n",
"max_price 0\n",
"min_price 0\n",
"category_id 0\n",
2024-01-12 15:38:45 +01:00
"pricing_formula_id 0\n",
2024-01-15 01:45:40 +01:00
"representation_id 0\n",
2024-01-12 15:38:45 +01:00
"created_at 0\n",
"updated_at 0\n",
"dtype: int64"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 10,
2024-01-12 15:38:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-01-15 01:45:40 +01:00
"#detection des Nan \n",
"\n",
"products_groups.isna().sum()"
]
},
2024-01-15 21:10:39 +01:00
{
"cell_type": "code",
"execution_count": null,
"id": "e0518684-c83c-4f0a-89ea-d7dcfd60051d",
"metadata": {},
"outputs": [],
"source": [
"#variable retenu:[[\"id\",\"percent_price\",\"max_price\",\"min_price\",\"category_id\",\"pricing_formula_id\",\"representation_id\"]]"
]
},
2024-01-15 01:45:40 +01:00
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 11,
2024-01-15 01:45:40 +01:00
"id": "6a187170-96c4-48d2-9568-b270f67e2c27",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"percent_price float64\n",
"max_price float64\n",
"min_price float64\n",
"category_id int64\n",
"pricing_formula_id int64\n",
"representation_id int64\n",
"created_at object\n",
"updated_at object\n",
"dtype: object"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 11,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#type des variables\n",
"\n",
"products_groups.dtypes"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 12,
2024-01-15 01:45:40 +01:00
"id": "2fba2cb0-a6a4-43b2-a854-3be07939c28b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>percent_price</th>\n",
" <th>max_price</th>\n",
" <th>min_price</th>\n",
" <th>category_id</th>\n",
" <th>pricing_formula_id</th>\n",
" <th>representation_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [id, percent_price, max_price, min_price, category_id, pricing_formula_id, representation_id, created_at, updated_at]\n",
"Index: []"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 12,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Identification des doublons\n",
"products_groups.loc[products_groups[['id','pricing_formula_id','representation_id']].duplicated(keep=False),:]"
]
},
{
"cell_type": "markdown",
"id": "5312ac13-8fbd-4c3f-a98a-8c28f079a599",
"metadata": {},
"source": [
"## III.pricing_formulas"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 13,
2024-01-15 01:45:40 +01:00
"id": "3383a773-0817-4b23-84e7-8d5d0c74b179",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>extra_field</th>\n",
" <th>identifier</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>41909</td>\n",
" <td>visite mécènes 1h30</td>\n",
" <td>2022-07-08 07:08:26.802266+02:00</td>\n",
" <td>2022-07-08 07:08:26.802266+02:00</td>\n",
" <td>NaN</td>\n",
" <td>21d4b0043c12b21952b0797d140991a1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>502</td>\n",
" <td>entree mucem tp( expo picasso)</td>\n",
" <td>2020-09-03 13:43:59.816765+02:00</td>\n",
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
" <td>NaN</td>\n",
" <td>223b09e6c3f1f75dbf8df019af97a555</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>504</td>\n",
" <td>nombre de personnes cinema</td>\n",
" <td>2020-09-03 13:43:59.818198+02:00</td>\n",
" <td>2021-01-25 19:16:05.187114+01:00</td>\n",
" <td>NaN</td>\n",
" <td>ba33b7b6d225a75d713a356b49c4d915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>117</td>\n",
" <td>spectacle tarif e famille tr</td>\n",
" <td>2020-09-03 13:21:21.400249+02:00</td>\n",
" <td>2023-03-13 11:30:29.525335+01:00</td>\n",
" <td>NaN</td>\n",
" <td>a00b61ad933518856f86e63ca91a5750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1496</td>\n",
" <td>billet nb famille mecene 1a</td>\n",
" <td>2020-09-03 14:29:33.320952+02:00</td>\n",
" <td>2021-01-25 19:23:06.816402+01:00</td>\n",
" <td>NaN</td>\n",
" <td>7f6013803c242253a5ccde80f780984f</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>551</th>\n",
" <td>529</td>\n",
" <td>billet nb expo gr</td>\n",
" <td>2020-09-03 13:43:59.835944+02:00</td>\n",
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
" <td>NaN</td>\n",
" <td>7d888e42abe101fc8b21dc88948c8b74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>552</th>\n",
" <td>3153</td>\n",
" <td>nb pers visite scolaire rep</td>\n",
" <td>2020-09-03 16:32:37.068864+02:00</td>\n",
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
" <td>NaN</td>\n",
" <td>3cf21731c25eee650d5b232ee4780563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>553</th>\n",
" <td>5847</td>\n",
" <td>visite scolaire rep1h00</td>\n",
" <td>2021-06-09 18:10:49.742531+02:00</td>\n",
" <td>2022-02-18 15:55:03.576236+01:00</td>\n",
" <td>NaN</td>\n",
" <td>a7bb5a6892d55f0d5ee4ce5786ae5fc6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>554</th>\n",
" <td>5840</td>\n",
" <td>france billet - entree ts</td>\n",
" <td>2021-06-09 18:10:49.737576+02:00</td>\n",
" <td>2022-02-18 16:16:00.199543+01:00</td>\n",
" <td>NaN</td>\n",
" <td>4c53016fc65847646f600eff853593e5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>555</th>\n",
" <td>5863</td>\n",
" <td>france billet - entree tp</td>\n",
" <td>2021-06-09 18:12:49.269924+02:00</td>\n",
" <td>2022-02-18 16:16:00.199543+01:00</td>\n",
" <td>NaN</td>\n",
" <td>90e642c0e1ef6bc9f2bc43089798de00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>556 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" id name created_at \\\n",
"0 41909 visite mécènes 1h30 2022-07-08 07:08:26.802266+02:00 \n",
"1 502 entree mucem tp( expo picasso) 2020-09-03 13:43:59.816765+02:00 \n",
"2 504 nombre de personnes cinema 2020-09-03 13:43:59.818198+02:00 \n",
"3 117 spectacle tarif e famille tr 2020-09-03 13:21:21.400249+02:00 \n",
"4 1496 billet nb famille mecene 1a 2020-09-03 14:29:33.320952+02:00 \n",
".. ... ... ... \n",
"551 529 billet nb expo gr 2020-09-03 13:43:59.835944+02:00 \n",
"552 3153 nb pers visite scolaire rep 2020-09-03 16:32:37.068864+02:00 \n",
"553 5847 visite scolaire rep1h00 2021-06-09 18:10:49.742531+02:00 \n",
"554 5840 france billet - entree ts 2021-06-09 18:10:49.737576+02:00 \n",
"555 5863 france billet - entree tp 2021-06-09 18:12:49.269924+02:00 \n",
"\n",
" updated_at extra_field \\\n",
"0 2022-07-08 07:08:26.802266+02:00 NaN \n",
"1 2022-02-18 15:57:55.792581+01:00 NaN \n",
"2 2021-01-25 19:16:05.187114+01:00 NaN \n",
"3 2023-03-13 11:30:29.525335+01:00 NaN \n",
"4 2021-01-25 19:23:06.816402+01:00 NaN \n",
".. ... ... \n",
"551 2022-02-18 15:57:55.792581+01:00 NaN \n",
"552 2022-02-18 15:57:55.792581+01:00 NaN \n",
"553 2022-02-18 15:55:03.576236+01:00 NaN \n",
"554 2022-02-18 16:16:00.199543+01:00 NaN \n",
"555 2022-02-18 16:16:00.199543+01:00 NaN \n",
"\n",
" identifier \n",
"0 21d4b0043c12b21952b0797d140991a1 \n",
"1 223b09e6c3f1f75dbf8df019af97a555 \n",
"2 ba33b7b6d225a75d713a356b49c4d915 \n",
"3 a00b61ad933518856f86e63ca91a5750 \n",
"4 7f6013803c242253a5ccde80f780984f \n",
".. ... \n",
"551 7d888e42abe101fc8b21dc88948c8b74 \n",
"552 3cf21731c25eee650d5b232ee4780563 \n",
"553 a7bb5a6892d55f0d5ee4ce5786ae5fc6 \n",
"554 4c53016fc65847646f600eff853593e5 \n",
"555 90e642c0e1ef6bc9f2bc43089798de00 \n",
"\n",
"[556 rows x 6 columns]"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 13,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pricing_formulas"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 14,
2024-01-15 01:45:40 +01:00
"id": "d8130c73-6c5f-45b1-93ae-db7679c8ca56",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0.0\n",
"name 0.0\n",
"created_at 0.0\n",
"updated_at 0.0\n",
"extra_field 1.0\n",
"identifier 0.0\n",
"dtype: float64"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 14,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#detection des Nan \n",
"\n",
"pricing_formulas.isna().sum()/pricing_formulas.shape[0]"
]
},
2024-01-15 21:10:39 +01:00
{
"cell_type": "code",
"execution_count": null,
"id": "9f2909c1-bc6a-443f-a077-84f6ce6b7ab5",
"metadata": {},
"outputs": [],
"source": [
"#variable retenu: [[\"id\",\"name\"]]"
]
},
2024-01-15 01:45:40 +01:00
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 15,
2024-01-15 01:45:40 +01:00
"id": "44f1dbfd-c3cf-464b-9877-f37fcc61da92",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"name object\n",
"created_at object\n",
"updated_at object\n",
"extra_field float64\n",
"identifier object\n",
"dtype: object"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 15,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#type des variables\n",
"\n",
"pricing_formulas.dtypes"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 16,
2024-01-15 01:45:40 +01:00
"id": "6784b41b-da74-4fae-832e-16641ae710c1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>extra_field</th>\n",
" <th>identifier</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [id, name, created_at, updated_at, extra_field, identifier]\n",
"Index: []"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 16,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Identification des doublons\n",
"pricing_formulas.loc[pricing_formulas[['id']].duplicated(keep=False),:]"
]
},
{
"cell_type": "markdown",
"id": "2145b0a4-b73d-4530-8c12-a78b1cf86eae",
"metadata": {},
"source": [
"## IV. product_packs"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 17,
2024-01-15 01:45:40 +01:00
"id": "e36b07a7-4f0b-4711-86a0-12a1d8158eef",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0.0\n",
"name 1.0\n",
"type_of 0.0\n",
"created_at 0.0\n",
"updated_at 0.0\n",
"identifier 0.0\n",
"dtype: float64"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 17,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#detection des Nan \n",
"\n",
"product_packs.isna().sum()/product_packs.shape[0]"
]
},
2024-01-15 21:10:39 +01:00
{
"cell_type": "code",
"execution_count": null,
"id": "e0887a01-51ea-4034-84fe-dc4dbf2ad949",
"metadata": {},
"outputs": [],
"source": [
"#variable retenu:[[\"id\",\"name\"]]"
]
},
2024-01-15 01:45:40 +01:00
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 18,
2024-01-15 01:45:40 +01:00
"id": "8707396a-f86b-476d-a9f9-c39f8de1d02e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"name float64\n",
"type_of int64\n",
"created_at object\n",
"updated_at object\n",
"identifier object\n",
"dtype: object"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 18,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#type des variables\n",
"\n",
"product_packs.dtypes"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 19,
2024-01-15 01:45:40 +01:00
"id": "4b102bd3-924b-43da-8915-be7664c23f97",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>type_of</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>identifier</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [id, name, type_of, created_at, updated_at, identifier]\n",
"Index: []"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 19,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Identification des doublons\n",
"product_packs.loc[product_packs[['id']].duplicated(keep=False),:]"
]
},
{
"cell_type": "markdown",
"id": "cfe0c525-896b-4731-b38e-306ff6ea0c65",
"metadata": {},
"source": [
"## V.products"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 20,
2024-01-15 01:45:40 +01:00
"id": "968beb24-f70c-4eb6-8b1e-4b04bc7fe9c9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0.0\n",
"amount 0.0\n",
"is_full_price 0.0\n",
"representation_id 0.0\n",
"pricing_formula_id 0.0\n",
"created_at 0.0\n",
"updated_at 0.0\n",
"category_id 0.0\n",
"apply_price 0.0\n",
"products_group_id 0.0\n",
"product_pack_id 0.0\n",
"extra_field 1.0\n",
"amount_consumption 1.0\n",
"identifier 0.0\n",
"dtype: float64"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 20,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#detection des Nan \n",
"\n",
"products.isna().sum()/products.shape[0]"
]
},
{
"cell_type": "code",
2024-01-15 19:58:25 +01:00
"execution_count": 21,
2024-01-15 01:45:40 +01:00
"id": "15bc6ac6-67e8-4e2c-9641-7ee8bb2581a3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"amount float64\n",
"is_full_price bool\n",
"representation_id int64\n",
"pricing_formula_id int64\n",
"created_at object\n",
"updated_at object\n",
"category_id int64\n",
"apply_price float64\n",
"products_group_id int64\n",
"product_pack_id int64\n",
"extra_field float64\n",
"amount_consumption float64\n",
"identifier object\n",
"dtype: object"
]
},
2024-01-15 19:58:25 +01:00
"execution_count": 21,
2024-01-15 01:45:40 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#type des variables\n",
"\n",
"products.dtypes"
]
},
2024-01-15 19:58:25 +01:00
{
"cell_type": "code",
"execution_count": 22,
"id": "7daa4f1a-e429-4daf-a2e1-1e311b487e09",
"metadata": {},
"outputs": [],
"source": [
"#dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "dc12b746-6708-4708-826a-acb5a8e665a1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>extra_field</th>\n",
" <th>identifier</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>41909</td>\n",
" <td>visite mécènes 1h30</td>\n",
" <td>2022-07-08 07:08:26.802266+02:00</td>\n",
" <td>2022-07-08 07:08:26.802266+02:00</td>\n",
" <td>NaN</td>\n",
" <td>21d4b0043c12b21952b0797d140991a1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>502</td>\n",
" <td>entree mucem tp( expo picasso)</td>\n",
" <td>2020-09-03 13:43:59.816765+02:00</td>\n",
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
" <td>NaN</td>\n",
" <td>223b09e6c3f1f75dbf8df019af97a555</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>504</td>\n",
" <td>nombre de personnes cinema</td>\n",
" <td>2020-09-03 13:43:59.818198+02:00</td>\n",
" <td>2021-01-25 19:16:05.187114+01:00</td>\n",
" <td>NaN</td>\n",
" <td>ba33b7b6d225a75d713a356b49c4d915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>117</td>\n",
" <td>spectacle tarif e famille tr</td>\n",
" <td>2020-09-03 13:21:21.400249+02:00</td>\n",
" <td>2023-03-13 11:30:29.525335+01:00</td>\n",
" <td>NaN</td>\n",
" <td>a00b61ad933518856f86e63ca91a5750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1496</td>\n",
" <td>billet nb famille mecene 1a</td>\n",
" <td>2020-09-03 14:29:33.320952+02:00</td>\n",
" <td>2021-01-25 19:23:06.816402+01:00</td>\n",
" <td>NaN</td>\n",
" <td>7f6013803c242253a5ccde80f780984f</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>551</th>\n",
" <td>529</td>\n",
" <td>billet nb expo gr</td>\n",
" <td>2020-09-03 13:43:59.835944+02:00</td>\n",
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
" <td>NaN</td>\n",
" <td>7d888e42abe101fc8b21dc88948c8b74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>552</th>\n",
" <td>3153</td>\n",
" <td>nb pers visite scolaire rep</td>\n",
" <td>2020-09-03 16:32:37.068864+02:00</td>\n",
" <td>2022-02-18 15:57:55.792581+01:00</td>\n",
" <td>NaN</td>\n",
" <td>3cf21731c25eee650d5b232ee4780563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>553</th>\n",
" <td>5847</td>\n",
" <td>visite scolaire rep1h00</td>\n",
" <td>2021-06-09 18:10:49.742531+02:00</td>\n",
" <td>2022-02-18 15:55:03.576236+01:00</td>\n",
" <td>NaN</td>\n",
" <td>a7bb5a6892d55f0d5ee4ce5786ae5fc6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>554</th>\n",
" <td>5840</td>\n",
" <td>france billet - entree ts</td>\n",
" <td>2021-06-09 18:10:49.737576+02:00</td>\n",
" <td>2022-02-18 16:16:00.199543+01:00</td>\n",
" <td>NaN</td>\n",
" <td>4c53016fc65847646f600eff853593e5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>555</th>\n",
" <td>5863</td>\n",
" <td>france billet - entree tp</td>\n",
" <td>2021-06-09 18:12:49.269924+02:00</td>\n",
" <td>2022-02-18 16:16:00.199543+01:00</td>\n",
" <td>NaN</td>\n",
" <td>90e642c0e1ef6bc9f2bc43089798de00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>556 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" id name created_at \\\n",
"0 41909 visite mécènes 1h30 2022-07-08 07:08:26.802266+02:00 \n",
"1 502 entree mucem tp( expo picasso) 2020-09-03 13:43:59.816765+02:00 \n",
"2 504 nombre de personnes cinema 2020-09-03 13:43:59.818198+02:00 \n",
"3 117 spectacle tarif e famille tr 2020-09-03 13:21:21.400249+02:00 \n",
"4 1496 billet nb famille mecene 1a 2020-09-03 14:29:33.320952+02:00 \n",
".. ... ... ... \n",
"551 529 billet nb expo gr 2020-09-03 13:43:59.835944+02:00 \n",
"552 3153 nb pers visite scolaire rep 2020-09-03 16:32:37.068864+02:00 \n",
"553 5847 visite scolaire rep1h00 2021-06-09 18:10:49.742531+02:00 \n",
"554 5840 france billet - entree ts 2021-06-09 18:10:49.737576+02:00 \n",
"555 5863 france billet - entree tp 2021-06-09 18:12:49.269924+02:00 \n",
"\n",
" updated_at extra_field \\\n",
"0 2022-07-08 07:08:26.802266+02:00 NaN \n",
"1 2022-02-18 15:57:55.792581+01:00 NaN \n",
"2 2021-01-25 19:16:05.187114+01:00 NaN \n",
"3 2023-03-13 11:30:29.525335+01:00 NaN \n",
"4 2021-01-25 19:23:06.816402+01:00 NaN \n",
".. ... ... \n",
"551 2022-02-18 15:57:55.792581+01:00 NaN \n",
"552 2022-02-18 15:57:55.792581+01:00 NaN \n",
"553 2022-02-18 15:55:03.576236+01:00 NaN \n",
"554 2022-02-18 16:16:00.199543+01:00 NaN \n",
"555 2022-02-18 16:16:00.199543+01:00 NaN \n",
"\n",
" identifier \n",
"0 21d4b0043c12b21952b0797d140991a1 \n",
"1 223b09e6c3f1f75dbf8df019af97a555 \n",
"2 ba33b7b6d225a75d713a356b49c4d915 \n",
"3 a00b61ad933518856f86e63ca91a5750 \n",
"4 7f6013803c242253a5ccde80f780984f \n",
".. ... \n",
"551 7d888e42abe101fc8b21dc88948c8b74 \n",
"552 3cf21731c25eee650d5b232ee4780563 \n",
"553 a7bb5a6892d55f0d5ee4ce5786ae5fc6 \n",
"554 4c53016fc65847646f600eff853593e5 \n",
"555 90e642c0e1ef6bc9f2bc43089798de00 \n",
"\n",
"[556 rows x 6 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pricing_formulas"
]
},
2024-01-15 01:45:40 +01:00
{
"cell_type": "markdown",
"id": "46aad10f-8530-410e-872b-bb253c553a46",
"metadata": {},
"source": [
"# jointure entre les bases"
2024-01-12 15:38:45 +01:00
]
},
{
"cell_type": "code",
"execution_count": null,
2024-01-15 19:58:25 +01:00
"id": "a4c3edd1-6d58-4c57-b3e4-0ef3529f6b8c",
"metadata": {},
"outputs": [],
"source": [
"#dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']"
]
},
{
"cell_type": "code",
2024-01-15 20:46:45 +01:00
"execution_count": 74,
2024-01-15 19:58:25 +01:00
"id": "eac537e1-bbad-45bc-a85c-12b675da1088",
"metadata": {},
"outputs": [],
"source": [
"#Merge1 entre products et pricing_formulas\n",
2024-01-15 20:46:45 +01:00
"base1=products.merge(pricing_formulas, how='left', left_on= 'pricing_formula_id', right_on= 'id', suffixes = (\"_products\", \"_pricing_formula\"))"
2024-01-15 19:58:25 +01:00
]
},
2024-01-15 20:46:45 +01:00
{
"cell_type": "code",
"execution_count": 78,
"id": "75be3a30-3114-432d-87d6-697533c3c871",
"metadata": {},
"outputs": [],
"source": [
"#Merge2 entre base1 et products_groups\n",
"base2=base1.merge(products_groups, how='left', left_on= 'id_pricing_formula', right_on= 'id', suffixes = (\"_merge2\", \"_product_group\"))"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "34a169c6-07a8-4ac3-a9e1-d7e7461f7310",
"metadata": {},
"outputs": [],
"source": [
"#Merge3 entre base2 et type_of_pricing_formulas\n",
"base3=base2.merge(type_of_pricing_formulas, how='left', left_on= 'id_pricing_formula', right_on= 'pricing_formula_id', suffixes = (\"_merge3\", \"_type_of_pricing_f\"))"
]
},
2024-01-15 19:58:25 +01:00
{
"cell_type": "code",
2024-01-15 21:10:39 +01:00
"execution_count": 89,
2024-01-15 20:46:45 +01:00
"id": "f44f40d2-5304-4931-b7e6-fcc06b2657b6",
"metadata": {},
"outputs": [],
"source": [
"#Merge4 entre base3 et type_of_pricing_formulas\n",
2024-01-15 21:10:39 +01:00
"df_product_pricing=base3.merge(product_packs, how='left', left_on= 'product_pack_id', right_on= 'id', suffixes = (\"_merge4\", \"_product_pack\"))"
2024-01-15 20:46:45 +01:00
]
},
{
"cell_type": "code",
2024-01-15 21:10:39 +01:00
"execution_count": 90,
2024-01-15 20:46:45 +01:00
"id": "a28772c3-7bc1-46b4-acc8-1388dc60ec98",
2024-01-15 19:58:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id_products</th>\n",
" <th>amount</th>\n",
" <th>is_full_price</th>\n",
2024-01-15 20:46:45 +01:00
" <th>representation_id_merge2</th>\n",
" <th>pricing_formula_id_merge2</th>\n",
2024-01-15 19:58:25 +01:00
" <th>created_at_products</th>\n",
" <th>updated_at_products</th>\n",
2024-01-15 20:46:45 +01:00
" <th>category_id_merge2</th>\n",
2024-01-15 19:58:25 +01:00
" <th>apply_price</th>\n",
" <th>products_group_id</th>\n",
2024-01-15 20:46:45 +01:00
" <th>...</th>\n",
" <th>pricing_formula_id</th>\n",
" <th>created_at_type_of_pricing_f</th>\n",
" <th>updated_at_type_of_pricing_f</th>\n",
" <th>identifier_merge4</th>\n",
" <th>id</th>\n",
" <th>name_product_pack</th>\n",
" <th>type_of</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>identifier_product_pack</th>\n",
2024-01-15 19:58:25 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10682</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>914</td>\n",
" <td>114</td>\n",
" <td>2020-09-03 14:09:43.119798+02:00</td>\n",
" <td>2020-09-03 14:09:43.119798+02:00</td>\n",
" <td>41</td>\n",
" <td>0.0</td>\n",
" <td>10655</td>\n",
2024-01-15 20:46:45 +01:00
" <td>...</td>\n",
" <td>114.0</td>\n",
" <td>2021-02-15 17:02:27.395376+01:00</td>\n",
" <td>2021-02-15 17:02:27.395376+01:00</td>\n",
" <td>3706121eb9f43b635bef1433c06f679c</td>\n",
2024-01-15 19:58:25 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
2024-01-15 20:46:45 +01:00
" <td>0</td>\n",
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
" <td>a764b4bf13a360c7ac2a35ec4ca96c95</td>\n",
2024-01-15 19:58:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>478</td>\n",
" <td>9.5</td>\n",
" <td>False</td>\n",
" <td>273</td>\n",
" <td>131</td>\n",
" <td>2020-09-03 13:21:22.711773+02:00</td>\n",
" <td>2020-09-03 13:21:22.711773+02:00</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>471</td>\n",
2024-01-15 20:46:45 +01:00
" <td>...</td>\n",
" <td>131.0</td>\n",
" <td>2021-02-05 11:52:05.923905+01:00</td>\n",
" <td>2021-02-05 11:52:05.923905+01:00</td>\n",
" <td>0aceb248607671792298436004b95275</td>\n",
2024-01-15 19:58:25 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
2024-01-15 20:46:45 +01:00
" <td>0</td>\n",
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
" <td>a764b4bf13a360c7ac2a35ec4ca96c95</td>\n",
2024-01-15 19:58:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20873</td>\n",
" <td>11.5</td>\n",
" <td>False</td>\n",
" <td>275</td>\n",
" <td>137</td>\n",
" <td>2020-09-03 14:46:33.589030+02:00</td>\n",
" <td>2020-09-03 14:46:33.589030+02:00</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>20825</td>\n",
2024-01-15 20:46:45 +01:00
" <td>...</td>\n",
" <td>137.0</td>\n",
" <td>2021-02-05 11:52:05.939898+01:00</td>\n",
" <td>2021-02-05 11:52:05.939898+01:00</td>\n",
" <td>93002d4637331edd81ffc28b6e8e89c0</td>\n",
2024-01-15 19:58:25 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
2024-01-15 20:46:45 +01:00
" <td>0</td>\n",
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
" <td>a764b4bf13a360c7ac2a35ec4ca96c95</td>\n",
2024-01-15 19:58:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>157142</td>\n",
" <td>8.0</td>\n",
" <td>False</td>\n",
" <td>82519</td>\n",
" <td>9</td>\n",
" <td>2022-01-28 19:29:23.525722+01:00</td>\n",
" <td>2022-01-28 19:29:23.525722+01:00</td>\n",
" <td>5</td>\n",
" <td>0.0</td>\n",
" <td>156773</td>\n",
2024-01-15 20:46:45 +01:00
" <td>...</td>\n",
" <td>9.0</td>\n",
" <td>2021-02-05 11:52:06.107939+01:00</td>\n",
" <td>2021-02-05 11:52:06.107939+01:00</td>\n",
" <td>7d0b25bdfff9f366da8be820608c8191</td>\n",
2024-01-15 19:58:25 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
2024-01-15 20:46:45 +01:00
" <td>0</td>\n",
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
" <td>a764b4bf13a360c7ac2a35ec4ca96c95</td>\n",
2024-01-15 19:58:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1341</td>\n",
" <td>8.5</td>\n",
" <td>False</td>\n",
" <td>9</td>\n",
" <td>93</td>\n",
" <td>2020-09-03 13:29:30.773089+02:00</td>\n",
" <td>2020-09-03 13:29:30.773089+02:00</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>1175</td>\n",
" <td>...</td>\n",
2024-01-15 20:46:45 +01:00
" <td>93.0</td>\n",
" <td>2021-02-05 11:52:06.004162+01:00</td>\n",
" <td>2021-02-05 11:52:06.004162+01:00</td>\n",
" <td>1dbb0795e8f47cb75ba7cdb08c06be5f</td>\n",
2024-01-15 19:58:25 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
2024-01-15 20:46:45 +01:00
" <td>0</td>\n",
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
" <td>2020-09-03 13:11:24.501197+02:00</td>\n",
" <td>a764b4bf13a360c7ac2a35ec4ca96c95</td>\n",
2024-01-15 19:58:25 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-01-15 20:46:45 +01:00
"<p>5 rows × 41 columns</p>\n",
2024-01-15 19:58:25 +01:00
"</div>"
],
"text/plain": [
2024-01-15 20:46:45 +01:00
" id_products amount is_full_price representation_id_merge2 \\\n",
"0 10682 9.0 False 914 \n",
"1 478 9.5 False 273 \n",
"2 20873 11.5 False 275 \n",
"3 157142 8.0 False 82519 \n",
"4 1341 8.5 False 9 \n",
2024-01-15 19:58:25 +01:00
"\n",
2024-01-15 20:46:45 +01:00
" pricing_formula_id_merge2 created_at_products \\\n",
"0 114 2020-09-03 14:09:43.119798+02:00 \n",
"1 131 2020-09-03 13:21:22.711773+02:00 \n",
"2 137 2020-09-03 14:46:33.589030+02:00 \n",
"3 9 2022-01-28 19:29:23.525722+01:00 \n",
"4 93 2020-09-03 13:29:30.773089+02:00 \n",
2024-01-15 19:58:25 +01:00
"\n",
2024-01-15 20:46:45 +01:00
" updated_at_products category_id_merge2 apply_price \\\n",
"0 2020-09-03 14:09:43.119798+02:00 41 0.0 \n",
"1 2020-09-03 13:21:22.711773+02:00 1 0.0 \n",
"2 2020-09-03 14:46:33.589030+02:00 1 0.0 \n",
"3 2022-01-28 19:29:23.525722+01:00 5 0.0 \n",
"4 2020-09-03 13:29:30.773089+02:00 1 0.0 \n",
2024-01-15 19:58:25 +01:00
"\n",
2024-01-15 20:46:45 +01:00
" products_group_id ... pricing_formula_id \\\n",
"0 10655 ... 114.0 \n",
"1 471 ... 131.0 \n",
"2 20825 ... 137.0 \n",
"3 156773 ... 9.0 \n",
"4 1175 ... 93.0 \n",
2024-01-15 19:58:25 +01:00
"\n",
2024-01-15 20:46:45 +01:00
" created_at_type_of_pricing_f updated_at_type_of_pricing_f \\\n",
"0 2021-02-15 17:02:27.395376+01:00 2021-02-15 17:02:27.395376+01:00 \n",
"1 2021-02-05 11:52:05.923905+01:00 2021-02-05 11:52:05.923905+01:00 \n",
"2 2021-02-05 11:52:05.939898+01:00 2021-02-05 11:52:05.939898+01:00 \n",
"3 2021-02-05 11:52:06.107939+01:00 2021-02-05 11:52:06.107939+01:00 \n",
"4 2021-02-05 11:52:06.004162+01:00 2021-02-05 11:52:06.004162+01:00 \n",
2024-01-15 19:58:25 +01:00
"\n",
2024-01-15 20:46:45 +01:00
" identifier_merge4 id name_product_pack type_of \\\n",
"0 3706121eb9f43b635bef1433c06f679c 1 NaN 0 \n",
"1 0aceb248607671792298436004b95275 1 NaN 0 \n",
"2 93002d4637331edd81ffc28b6e8e89c0 1 NaN 0 \n",
"3 7d0b25bdfff9f366da8be820608c8191 1 NaN 0 \n",
"4 1dbb0795e8f47cb75ba7cdb08c06be5f 1 NaN 0 \n",
2024-01-15 19:58:25 +01:00
"\n",
2024-01-15 20:46:45 +01:00
" created_at updated_at \\\n",
"0 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n",
"1 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n",
"2 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n",
"3 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n",
"4 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n",
2024-01-15 19:58:25 +01:00
"\n",
2024-01-15 20:46:45 +01:00
" identifier_product_pack \n",
"0 a764b4bf13a360c7ac2a35ec4ca96c95 \n",
"1 a764b4bf13a360c7ac2a35ec4ca96c95 \n",
"2 a764b4bf13a360c7ac2a35ec4ca96c95 \n",
"3 a764b4bf13a360c7ac2a35ec4ca96c95 \n",
"4 a764b4bf13a360c7ac2a35ec4ca96c95 \n",
2024-01-15 19:58:25 +01:00
"\n",
2024-01-15 20:46:45 +01:00
"[5 rows x 41 columns]"
2024-01-15 19:58:25 +01:00
]
},
2024-01-15 21:10:39 +01:00
"execution_count": 90,
2024-01-15 19:58:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-01-15 21:10:39 +01:00
"df_product_pricing.head(5)"
2024-01-15 19:58:25 +01:00
]
},
{
"cell_type": "code",
"execution_count": null,
2024-01-15 20:46:45 +01:00
"id": "03442997-806f-4285-a139-3bad46bb4522",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d22a0d75-53c5-4b54-9060-c9e7c307fb13",
2024-01-12 15:38:45 +01:00
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}