{
"cells": [
{
"cell_type": "markdown",
"id": "c4205b5d-e052-4863-a46b-20e4757052a7",
"metadata": {},
"source": [
"# Business Data Challenge - Team 1"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "ae3af8e6-ced8-4994-8877-fa98d4297cc0",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "dd3184e7-54a1-4463-af42-5850d9517a41",
"metadata": {},
"source": [
"Configuration de l'accès aux données"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b6035982-9ff4-4013-9792-2d50e10db3d1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['bdc2324-data/1/1campaign_stats.csv',\n",
" 'bdc2324-data/1/1campaigns.csv',\n",
" 'bdc2324-data/1/1categories.csv',\n",
" 'bdc2324-data/1/1countries.csv',\n",
" 'bdc2324-data/1/1currencies.csv',\n",
" 'bdc2324-data/1/1customer_target_mappings.csv',\n",
" 'bdc2324-data/1/1customersplus.csv',\n",
" 'bdc2324-data/1/1event_types.csv',\n",
" 'bdc2324-data/1/1events.csv',\n",
" 'bdc2324-data/1/1facilities.csv',\n",
" 'bdc2324-data/1/1link_stats.csv',\n",
" 'bdc2324-data/1/1pricing_formulas.csv',\n",
" 'bdc2324-data/1/1product_packs.csv',\n",
" 'bdc2324-data/1/1products.csv',\n",
" 'bdc2324-data/1/1products_groups.csv',\n",
" 'bdc2324-data/1/1purchases.csv',\n",
" 'bdc2324-data/1/1representation_category_capacities.csv',\n",
" 'bdc2324-data/1/1representations.csv',\n",
" 'bdc2324-data/1/1seasons.csv',\n",
" 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
" 'bdc2324-data/1/1suppliers.csv',\n",
" 'bdc2324-data/1/1tags.csv',\n",
" 'bdc2324-data/1/1target_types.csv',\n",
" 'bdc2324-data/1/1targets.csv',\n",
" 'bdc2324-data/1/1tickets.csv',\n",
" 'bdc2324-data/1/1type_of_categories.csv',\n",
" 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
" 'bdc2324-data/1/1type_ofs.csv']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"import s3fs\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"bdc2324-data/1\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b86c935d-124f-453f-80dd-83ea6770d09c",
"metadata": {},
"outputs": [],
"source": [
"dic_base=['campaign_stats','campaigns','categories','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets','type_of_categories','type_of_pricing_formulas','type_ofs']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f6d0b27c-0ecd-406b-b042-6c3802dd68fd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_447/1008972637.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n"
]
}
],
"source": [
"dic_base=['campaign_stats','campaigns','categories','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets','type_of_categories','type_of_pricing_formulas','type_ofs']\n",
"for nom_base in dic_base:\n",
" FILE_PATH_S3_fanta = 'bdc2324-data/1/1' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2a6b5e22-3370-457f-83b7-dd1e13663229",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'bdc2324-data/1/1type_ofs.csv'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"FILE_PATH_S3_fanta"
]
},
{
"cell_type": "markdown",
"id": "79012186-ea51-4252-843e-36a9bbe3847e",
"metadata": {},
"source": [
"# Analyse exploratoire "
]
},
{
"cell_type": "markdown",
"id": "1a365f29-4766-47d8-9796-24a5271867b2",
"metadata": {},
"source": [
"## I. Base type_of_pricing_formulas"
]
},
{
"cell_type": "markdown",
"id": "bcc14f93-2289-44eb-816b-a51049b258df",
"metadata": {},
"source": [
"## Detection des valeur manquantes"
]
},
{
"cell_type": "raw",
"id": "ab2ec4c4-9d38-4aeb-8202-9116df3cdd66",
"metadata": {},
"source": [
"dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']"
]
},
{
"cell_type": "markdown",
"id": "88759b4a-2633-478d-abce-29abeac376d1",
"metadata": {},
"source": [
"def verifier_donnees_manquantes(base):\n",
" donnees_manquantes = base.isna().sum()\n",
" print(\"Données manquantes pour la base :\")\n",
" print(donnees_manquantes)"
]
},
{
"cell_type": "markdown",
"id": "df3075b4-1490-4cf2-a3fe-c6d4e2144ae3",
"metadata": {},
"source": [
"for nom_base in dic_prod_princing:\n",
" verifier_donnees_manquantes(nom_base)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e0c67c01-e837-4772-b070-d1be0d895a36",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0\n",
"type_of_id 0\n",
"pricing_formula_id 0\n",
"created_at 0\n",
"updated_at 0\n",
"identifier 0\n",
"dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#detection des Nan d\n",
"\n",
"type_of_pricing_formulas.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "3eaffaa6-1164-4ee9-a671-8b5eb3df797d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" type_of_id | \n",
" pricing_formula_id | \n",
" created_at | \n",
" updated_at | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 127 | \n",
" 2021-01-05 11:55:51.226960+01:00 | \n",
" 2021-01-05 11:55:51.226960+01:00 | \n",
" cf2918b25e6dcf8c30798ca05c8ec8ed | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 2425 | \n",
" 2021-01-05 11:55:51.235606+01:00 | \n",
" 2021-01-05 11:55:51.235606+01:00 | \n",
" 2c8ee3f7c1487d792b6c946314e681f2 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 1 | \n",
" 2937 | \n",
" 2021-01-05 11:55:51.240114+01:00 | \n",
" 2021-01-05 11:55:51.240114+01:00 | \n",
" 44e55c85e4eb59b3c3c01c137a6b25fc | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 1 | \n",
" 48 | \n",
" 2021-01-05 11:55:51.244638+01:00 | \n",
" 2021-01-05 11:55:51.244638+01:00 | \n",
" ee3bb93b7e2217cd86a49d547fedf6c6 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 1 | \n",
" 7 | \n",
" 2021-01-05 11:55:51.249409+01:00 | \n",
" 2021-01-05 11:55:51.249409+01:00 | \n",
" ae701668574f1a653d2b21ddfd250620 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 563 | \n",
" 564 | \n",
" 4 | \n",
" 6656 | \n",
" 2022-02-18 16:15:58.872249+01:00 | \n",
" 2022-02-18 16:15:58.872249+01:00 | \n",
" f669824cdca9de9697f07ff3ba365a8d | \n",
"
\n",
" \n",
" 564 | \n",
" 565 | \n",
" 4 | \n",
" 6607 | \n",
" 2022-02-18 16:15:59.231018+01:00 | \n",
" 2022-02-18 16:15:59.231018+01:00 | \n",
" 6421c8146a598758139153b0e7b921ea | \n",
"
\n",
" \n",
" 565 | \n",
" 566 | \n",
" 4 | \n",
" 6700 | \n",
" 2022-02-18 16:15:59.724812+01:00 | \n",
" 2022-02-18 16:15:59.724812+01:00 | \n",
" 6823f6d4d80b322fbfb8b83545a9f96d | \n",
"
\n",
" \n",
" 566 | \n",
" 567 | \n",
" 4 | \n",
" 8118 | \n",
" 2022-02-18 16:16:00.163381+01:00 | \n",
" 2022-02-18 16:16:00.163381+01:00 | \n",
" 35cfc12584b4d1b94795d97fd0aa56e8 | \n",
"
\n",
" \n",
" 567 | \n",
" 569 | \n",
" 7 | \n",
" 48157 | \n",
" 2023-03-13 11:30:29.480161+01:00 | \n",
" 2023-03-13 11:30:29.480161+01:00 | \n",
" 55863541f33fd229ac9b54d9ec1f4874 | \n",
"
\n",
" \n",
"
\n",
"
568 rows × 6 columns
\n",
"
"
],
"text/plain": [
" id type_of_id pricing_formula_id created_at \\\n",
"0 1 1 127 2021-01-05 11:55:51.226960+01:00 \n",
"1 2 1 2425 2021-01-05 11:55:51.235606+01:00 \n",
"2 3 1 2937 2021-01-05 11:55:51.240114+01:00 \n",
"3 4 1 48 2021-01-05 11:55:51.244638+01:00 \n",
"4 5 1 7 2021-01-05 11:55:51.249409+01:00 \n",
".. ... ... ... ... \n",
"563 564 4 6656 2022-02-18 16:15:58.872249+01:00 \n",
"564 565 4 6607 2022-02-18 16:15:59.231018+01:00 \n",
"565 566 4 6700 2022-02-18 16:15:59.724812+01:00 \n",
"566 567 4 8118 2022-02-18 16:16:00.163381+01:00 \n",
"567 569 7 48157 2023-03-13 11:30:29.480161+01:00 \n",
"\n",
" updated_at identifier \n",
"0 2021-01-05 11:55:51.226960+01:00 cf2918b25e6dcf8c30798ca05c8ec8ed \n",
"1 2021-01-05 11:55:51.235606+01:00 2c8ee3f7c1487d792b6c946314e681f2 \n",
"2 2021-01-05 11:55:51.240114+01:00 44e55c85e4eb59b3c3c01c137a6b25fc \n",
"3 2021-01-05 11:55:51.244638+01:00 ee3bb93b7e2217cd86a49d547fedf6c6 \n",
"4 2021-01-05 11:55:51.249409+01:00 ae701668574f1a653d2b21ddfd250620 \n",
".. ... ... \n",
"563 2022-02-18 16:15:58.872249+01:00 f669824cdca9de9697f07ff3ba365a8d \n",
"564 2022-02-18 16:15:59.231018+01:00 6421c8146a598758139153b0e7b921ea \n",
"565 2022-02-18 16:15:59.724812+01:00 6823f6d4d80b322fbfb8b83545a9f96d \n",
"566 2022-02-18 16:16:00.163381+01:00 35cfc12584b4d1b94795d97fd0aa56e8 \n",
"567 2023-03-13 11:30:29.480161+01:00 55863541f33fd229ac9b54d9ec1f4874 \n",
"\n",
"[568 rows x 6 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type_of_pricing_formulas"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "57298669-8d55-40d5-a5aa-4c5df984eec7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"type_of_id int64\n",
"pricing_formula_id int64\n",
"created_at object\n",
"updated_at object\n",
"identifier object\n",
"dtype: object"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#type des variables\n",
"\n",
"type_of_pricing_formulas.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c11850cb-8833-44c0-a11d-9695d620a42b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" type_of_id | \n",
" pricing_formula_id | \n",
" created_at | \n",
" updated_at | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [id, type_of_id, pricing_formula_id, created_at, updated_at, identifier]\n",
"Index: []"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Identification des doublons\n",
"type_of_pricing_formulas.loc[type_of_pricing_formulas['id'].duplicated(keep=False),:]"
]
},
{
"cell_type": "markdown",
"id": "7a40de03-5e18-4d3d-a0f8-da960c29fad8",
"metadata": {},
"source": [
"## II.products_groups"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "89909175-6734-4e8e-8632-d6f8ca812388",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0\n",
"percent_price 0\n",
"max_price 0\n",
"min_price 0\n",
"category_id 0\n",
"pricing_formula_id 0\n",
"representation_id 0\n",
"created_at 0\n",
"updated_at 0\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#detection des Nan \n",
"\n",
"products_groups.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "6a187170-96c4-48d2-9568-b270f67e2c27",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"percent_price float64\n",
"max_price float64\n",
"min_price float64\n",
"category_id int64\n",
"pricing_formula_id int64\n",
"representation_id int64\n",
"created_at object\n",
"updated_at object\n",
"dtype: object"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#type des variables\n",
"\n",
"products_groups.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "2fba2cb0-a6a4-43b2-a854-3be07939c28b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" percent_price | \n",
" max_price | \n",
" min_price | \n",
" category_id | \n",
" pricing_formula_id | \n",
" representation_id | \n",
" created_at | \n",
" updated_at | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [id, percent_price, max_price, min_price, category_id, pricing_formula_id, representation_id, created_at, updated_at]\n",
"Index: []"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Identification des doublons\n",
"products_groups.loc[products_groups[['id','pricing_formula_id','representation_id']].duplicated(keep=False),:]"
]
},
{
"cell_type": "markdown",
"id": "5312ac13-8fbd-4c3f-a98a-8c28f079a599",
"metadata": {},
"source": [
"## III.pricing_formulas"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "3383a773-0817-4b23-84e7-8d5d0c74b179",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" name | \n",
" created_at | \n",
" updated_at | \n",
" extra_field | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 41909 | \n",
" visite mécènes 1h30 | \n",
" 2022-07-08 07:08:26.802266+02:00 | \n",
" 2022-07-08 07:08:26.802266+02:00 | \n",
" NaN | \n",
" 21d4b0043c12b21952b0797d140991a1 | \n",
"
\n",
" \n",
" 1 | \n",
" 502 | \n",
" entree mucem tp( expo picasso) | \n",
" 2020-09-03 13:43:59.816765+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 223b09e6c3f1f75dbf8df019af97a555 | \n",
"
\n",
" \n",
" 2 | \n",
" 504 | \n",
" nombre de personnes cinema | \n",
" 2020-09-03 13:43:59.818198+02:00 | \n",
" 2021-01-25 19:16:05.187114+01:00 | \n",
" NaN | \n",
" ba33b7b6d225a75d713a356b49c4d915 | \n",
"
\n",
" \n",
" 3 | \n",
" 117 | \n",
" spectacle tarif e famille tr | \n",
" 2020-09-03 13:21:21.400249+02:00 | \n",
" 2023-03-13 11:30:29.525335+01:00 | \n",
" NaN | \n",
" a00b61ad933518856f86e63ca91a5750 | \n",
"
\n",
" \n",
" 4 | \n",
" 1496 | \n",
" billet nb famille mecene 1a | \n",
" 2020-09-03 14:29:33.320952+02:00 | \n",
" 2021-01-25 19:23:06.816402+01:00 | \n",
" NaN | \n",
" 7f6013803c242253a5ccde80f780984f | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 551 | \n",
" 529 | \n",
" billet nb expo gr | \n",
" 2020-09-03 13:43:59.835944+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 7d888e42abe101fc8b21dc88948c8b74 | \n",
"
\n",
" \n",
" 552 | \n",
" 3153 | \n",
" nb pers visite scolaire rep | \n",
" 2020-09-03 16:32:37.068864+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 3cf21731c25eee650d5b232ee4780563 | \n",
"
\n",
" \n",
" 553 | \n",
" 5847 | \n",
" visite scolaire rep1h00 | \n",
" 2021-06-09 18:10:49.742531+02:00 | \n",
" 2022-02-18 15:55:03.576236+01:00 | \n",
" NaN | \n",
" a7bb5a6892d55f0d5ee4ce5786ae5fc6 | \n",
"
\n",
" \n",
" 554 | \n",
" 5840 | \n",
" france billet - entree ts | \n",
" 2021-06-09 18:10:49.737576+02:00 | \n",
" 2022-02-18 16:16:00.199543+01:00 | \n",
" NaN | \n",
" 4c53016fc65847646f600eff853593e5 | \n",
"
\n",
" \n",
" 555 | \n",
" 5863 | \n",
" france billet - entree tp | \n",
" 2021-06-09 18:12:49.269924+02:00 | \n",
" 2022-02-18 16:16:00.199543+01:00 | \n",
" NaN | \n",
" 90e642c0e1ef6bc9f2bc43089798de00 | \n",
"
\n",
" \n",
"
\n",
"
556 rows × 6 columns
\n",
"
"
],
"text/plain": [
" id name created_at \\\n",
"0 41909 visite mécènes 1h30 2022-07-08 07:08:26.802266+02:00 \n",
"1 502 entree mucem tp( expo picasso) 2020-09-03 13:43:59.816765+02:00 \n",
"2 504 nombre de personnes cinema 2020-09-03 13:43:59.818198+02:00 \n",
"3 117 spectacle tarif e famille tr 2020-09-03 13:21:21.400249+02:00 \n",
"4 1496 billet nb famille mecene 1a 2020-09-03 14:29:33.320952+02:00 \n",
".. ... ... ... \n",
"551 529 billet nb expo gr 2020-09-03 13:43:59.835944+02:00 \n",
"552 3153 nb pers visite scolaire rep 2020-09-03 16:32:37.068864+02:00 \n",
"553 5847 visite scolaire rep1h00 2021-06-09 18:10:49.742531+02:00 \n",
"554 5840 france billet - entree ts 2021-06-09 18:10:49.737576+02:00 \n",
"555 5863 france billet - entree tp 2021-06-09 18:12:49.269924+02:00 \n",
"\n",
" updated_at extra_field \\\n",
"0 2022-07-08 07:08:26.802266+02:00 NaN \n",
"1 2022-02-18 15:57:55.792581+01:00 NaN \n",
"2 2021-01-25 19:16:05.187114+01:00 NaN \n",
"3 2023-03-13 11:30:29.525335+01:00 NaN \n",
"4 2021-01-25 19:23:06.816402+01:00 NaN \n",
".. ... ... \n",
"551 2022-02-18 15:57:55.792581+01:00 NaN \n",
"552 2022-02-18 15:57:55.792581+01:00 NaN \n",
"553 2022-02-18 15:55:03.576236+01:00 NaN \n",
"554 2022-02-18 16:16:00.199543+01:00 NaN \n",
"555 2022-02-18 16:16:00.199543+01:00 NaN \n",
"\n",
" identifier \n",
"0 21d4b0043c12b21952b0797d140991a1 \n",
"1 223b09e6c3f1f75dbf8df019af97a555 \n",
"2 ba33b7b6d225a75d713a356b49c4d915 \n",
"3 a00b61ad933518856f86e63ca91a5750 \n",
"4 7f6013803c242253a5ccde80f780984f \n",
".. ... \n",
"551 7d888e42abe101fc8b21dc88948c8b74 \n",
"552 3cf21731c25eee650d5b232ee4780563 \n",
"553 a7bb5a6892d55f0d5ee4ce5786ae5fc6 \n",
"554 4c53016fc65847646f600eff853593e5 \n",
"555 90e642c0e1ef6bc9f2bc43089798de00 \n",
"\n",
"[556 rows x 6 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pricing_formulas"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "d8130c73-6c5f-45b1-93ae-db7679c8ca56",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0.0\n",
"name 0.0\n",
"created_at 0.0\n",
"updated_at 0.0\n",
"extra_field 1.0\n",
"identifier 0.0\n",
"dtype: float64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#detection des Nan \n",
"\n",
"pricing_formulas.isna().sum()/pricing_formulas.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "44f1dbfd-c3cf-464b-9877-f37fcc61da92",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"name object\n",
"created_at object\n",
"updated_at object\n",
"extra_field float64\n",
"identifier object\n",
"dtype: object"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#type des variables\n",
"\n",
"pricing_formulas.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "6784b41b-da74-4fae-832e-16641ae710c1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" name | \n",
" created_at | \n",
" updated_at | \n",
" extra_field | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [id, name, created_at, updated_at, extra_field, identifier]\n",
"Index: []"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Identification des doublons\n",
"pricing_formulas.loc[pricing_formulas[['id']].duplicated(keep=False),:]"
]
},
{
"cell_type": "markdown",
"id": "2145b0a4-b73d-4530-8c12-a78b1cf86eae",
"metadata": {},
"source": [
"## IV. product_packs"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "e36b07a7-4f0b-4711-86a0-12a1d8158eef",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0.0\n",
"name 1.0\n",
"type_of 0.0\n",
"created_at 0.0\n",
"updated_at 0.0\n",
"identifier 0.0\n",
"dtype: float64"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#detection des Nan \n",
"\n",
"product_packs.isna().sum()/product_packs.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "8707396a-f86b-476d-a9f9-c39f8de1d02e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"name float64\n",
"type_of int64\n",
"created_at object\n",
"updated_at object\n",
"identifier object\n",
"dtype: object"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#type des variables\n",
"\n",
"product_packs.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "4b102bd3-924b-43da-8915-be7664c23f97",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" name | \n",
" type_of | \n",
" created_at | \n",
" updated_at | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [id, name, type_of, created_at, updated_at, identifier]\n",
"Index: []"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Identification des doublons\n",
"product_packs.loc[product_packs[['id']].duplicated(keep=False),:]"
]
},
{
"cell_type": "markdown",
"id": "cfe0c525-896b-4731-b38e-306ff6ea0c65",
"metadata": {},
"source": [
"## V.products"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "968beb24-f70c-4eb6-8b1e-4b04bc7fe9c9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0.0\n",
"amount 0.0\n",
"is_full_price 0.0\n",
"representation_id 0.0\n",
"pricing_formula_id 0.0\n",
"created_at 0.0\n",
"updated_at 0.0\n",
"category_id 0.0\n",
"apply_price 0.0\n",
"products_group_id 0.0\n",
"product_pack_id 0.0\n",
"extra_field 1.0\n",
"amount_consumption 1.0\n",
"identifier 0.0\n",
"dtype: float64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#detection des Nan \n",
"\n",
"products.isna().sum()/products.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "15bc6ac6-67e8-4e2c-9641-7ee8bb2581a3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"amount float64\n",
"is_full_price bool\n",
"representation_id int64\n",
"pricing_formula_id int64\n",
"created_at object\n",
"updated_at object\n",
"category_id int64\n",
"apply_price float64\n",
"products_group_id int64\n",
"product_pack_id int64\n",
"extra_field float64\n",
"amount_consumption float64\n",
"identifier object\n",
"dtype: object"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#type des variables\n",
"\n",
"products.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "7daa4f1a-e429-4daf-a2e1-1e311b487e09",
"metadata": {},
"outputs": [],
"source": [
"#dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "dc12b746-6708-4708-826a-acb5a8e665a1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" name | \n",
" created_at | \n",
" updated_at | \n",
" extra_field | \n",
" identifier | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 41909 | \n",
" visite mécènes 1h30 | \n",
" 2022-07-08 07:08:26.802266+02:00 | \n",
" 2022-07-08 07:08:26.802266+02:00 | \n",
" NaN | \n",
" 21d4b0043c12b21952b0797d140991a1 | \n",
"
\n",
" \n",
" 1 | \n",
" 502 | \n",
" entree mucem tp( expo picasso) | \n",
" 2020-09-03 13:43:59.816765+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 223b09e6c3f1f75dbf8df019af97a555 | \n",
"
\n",
" \n",
" 2 | \n",
" 504 | \n",
" nombre de personnes cinema | \n",
" 2020-09-03 13:43:59.818198+02:00 | \n",
" 2021-01-25 19:16:05.187114+01:00 | \n",
" NaN | \n",
" ba33b7b6d225a75d713a356b49c4d915 | \n",
"
\n",
" \n",
" 3 | \n",
" 117 | \n",
" spectacle tarif e famille tr | \n",
" 2020-09-03 13:21:21.400249+02:00 | \n",
" 2023-03-13 11:30:29.525335+01:00 | \n",
" NaN | \n",
" a00b61ad933518856f86e63ca91a5750 | \n",
"
\n",
" \n",
" 4 | \n",
" 1496 | \n",
" billet nb famille mecene 1a | \n",
" 2020-09-03 14:29:33.320952+02:00 | \n",
" 2021-01-25 19:23:06.816402+01:00 | \n",
" NaN | \n",
" 7f6013803c242253a5ccde80f780984f | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 551 | \n",
" 529 | \n",
" billet nb expo gr | \n",
" 2020-09-03 13:43:59.835944+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 7d888e42abe101fc8b21dc88948c8b74 | \n",
"
\n",
" \n",
" 552 | \n",
" 3153 | \n",
" nb pers visite scolaire rep | \n",
" 2020-09-03 16:32:37.068864+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 3cf21731c25eee650d5b232ee4780563 | \n",
"
\n",
" \n",
" 553 | \n",
" 5847 | \n",
" visite scolaire rep1h00 | \n",
" 2021-06-09 18:10:49.742531+02:00 | \n",
" 2022-02-18 15:55:03.576236+01:00 | \n",
" NaN | \n",
" a7bb5a6892d55f0d5ee4ce5786ae5fc6 | \n",
"
\n",
" \n",
" 554 | \n",
" 5840 | \n",
" france billet - entree ts | \n",
" 2021-06-09 18:10:49.737576+02:00 | \n",
" 2022-02-18 16:16:00.199543+01:00 | \n",
" NaN | \n",
" 4c53016fc65847646f600eff853593e5 | \n",
"
\n",
" \n",
" 555 | \n",
" 5863 | \n",
" france billet - entree tp | \n",
" 2021-06-09 18:12:49.269924+02:00 | \n",
" 2022-02-18 16:16:00.199543+01:00 | \n",
" NaN | \n",
" 90e642c0e1ef6bc9f2bc43089798de00 | \n",
"
\n",
" \n",
"
\n",
"
556 rows × 6 columns
\n",
"
"
],
"text/plain": [
" id name created_at \\\n",
"0 41909 visite mécènes 1h30 2022-07-08 07:08:26.802266+02:00 \n",
"1 502 entree mucem tp( expo picasso) 2020-09-03 13:43:59.816765+02:00 \n",
"2 504 nombre de personnes cinema 2020-09-03 13:43:59.818198+02:00 \n",
"3 117 spectacle tarif e famille tr 2020-09-03 13:21:21.400249+02:00 \n",
"4 1496 billet nb famille mecene 1a 2020-09-03 14:29:33.320952+02:00 \n",
".. ... ... ... \n",
"551 529 billet nb expo gr 2020-09-03 13:43:59.835944+02:00 \n",
"552 3153 nb pers visite scolaire rep 2020-09-03 16:32:37.068864+02:00 \n",
"553 5847 visite scolaire rep1h00 2021-06-09 18:10:49.742531+02:00 \n",
"554 5840 france billet - entree ts 2021-06-09 18:10:49.737576+02:00 \n",
"555 5863 france billet - entree tp 2021-06-09 18:12:49.269924+02:00 \n",
"\n",
" updated_at extra_field \\\n",
"0 2022-07-08 07:08:26.802266+02:00 NaN \n",
"1 2022-02-18 15:57:55.792581+01:00 NaN \n",
"2 2021-01-25 19:16:05.187114+01:00 NaN \n",
"3 2023-03-13 11:30:29.525335+01:00 NaN \n",
"4 2021-01-25 19:23:06.816402+01:00 NaN \n",
".. ... ... \n",
"551 2022-02-18 15:57:55.792581+01:00 NaN \n",
"552 2022-02-18 15:57:55.792581+01:00 NaN \n",
"553 2022-02-18 15:55:03.576236+01:00 NaN \n",
"554 2022-02-18 16:16:00.199543+01:00 NaN \n",
"555 2022-02-18 16:16:00.199543+01:00 NaN \n",
"\n",
" identifier \n",
"0 21d4b0043c12b21952b0797d140991a1 \n",
"1 223b09e6c3f1f75dbf8df019af97a555 \n",
"2 ba33b7b6d225a75d713a356b49c4d915 \n",
"3 a00b61ad933518856f86e63ca91a5750 \n",
"4 7f6013803c242253a5ccde80f780984f \n",
".. ... \n",
"551 7d888e42abe101fc8b21dc88948c8b74 \n",
"552 3cf21731c25eee650d5b232ee4780563 \n",
"553 a7bb5a6892d55f0d5ee4ce5786ae5fc6 \n",
"554 4c53016fc65847646f600eff853593e5 \n",
"555 90e642c0e1ef6bc9f2bc43089798de00 \n",
"\n",
"[556 rows x 6 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pricing_formulas"
]
},
{
"cell_type": "markdown",
"id": "46aad10f-8530-410e-872b-bb253c553a46",
"metadata": {},
"source": [
"# jointure entre les bases"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4c3edd1-6d58-4c57-b3e4-0ef3529f6b8c",
"metadata": {},
"outputs": [],
"source": [
"#dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "eac537e1-bbad-45bc-a85c-12b675da1088",
"metadata": {},
"outputs": [],
"source": [
"#Merge1 entre products et pricing_formulas\n",
"base1=products.merge(pricing_formulas, how='outer', left_on= 'pricing_formula_id', right_on= 'id', suffixes = (\"_products\", \"_pricing_formula\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7dd0bdae-c1c5-44ba-a446-42b28def2343",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 36,
"id": "75be3a30-3114-432d-87d6-697533c3c871",
"metadata": {},
"outputs": [],
"source": [
"#Merge2 entre products et pricing_formulas\n",
"base2=base1.merge(products_groups, how='outer', left_on= 'pricing_formula_id', right_on= 'id', suffixes = (\"_merge2\", \"_product_group\"))"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "13f4e606-3678-476c-8b8f-9d3f1123e563",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id_products | \n",
" amount | \n",
" is_full_price | \n",
" representation_id_merge2 | \n",
" pricing_formula_id_merge2 | \n",
" created_at_products | \n",
" updated_at_products | \n",
" category_id_merge2 | \n",
" apply_price | \n",
" products_group_id | \n",
" ... | \n",
" identifier_pricing_formula | \n",
" id | \n",
" percent_price | \n",
" max_price | \n",
" min_price | \n",
" category_id_product_group | \n",
" pricing_formula_id_product_group | \n",
" representation_id_product_group | \n",
" created_at | \n",
" updated_at | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 10682.0 | \n",
" 9.0 | \n",
" False | \n",
" 914.0 | \n",
" 114.0 | \n",
" 2020-09-03 14:09:43.119798+02:00 | \n",
" 2020-09-03 14:09:43.119798+02:00 | \n",
" 41.0 | \n",
" 0.0 | \n",
" 10655.0 | \n",
" ... | \n",
" 039003ae0b0cd2df183786804b713470 | \n",
" 114.0 | \n",
" 100.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 16.0 | \n",
" 79.0 | \n",
" 38.0 | \n",
" 2020-09-03 13:21:21.456941+02:00 | \n",
" 2020-09-03 13:21:21.456941+02:00 | \n",
"
\n",
" \n",
" 1 | \n",
" 23519.0 | \n",
" 9.0 | \n",
" False | \n",
" 866.0 | \n",
" 114.0 | \n",
" 2020-09-03 15:01:05.950869+02:00 | \n",
" 2020-09-03 15:01:05.950869+02:00 | \n",
" 41.0 | \n",
" 0.0 | \n",
" 23460.0 | \n",
" ... | \n",
" 039003ae0b0cd2df183786804b713470 | \n",
" 114.0 | \n",
" 100.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 16.0 | \n",
" 79.0 | \n",
" 38.0 | \n",
" 2020-09-03 13:21:21.456941+02:00 | \n",
" 2020-09-03 13:21:21.456941+02:00 | \n",
"
\n",
" \n",
" 2 | \n",
" 3703.0 | \n",
" 9.0 | \n",
" False | \n",
" 1949.0 | \n",
" 114.0 | \n",
" 2020-09-03 13:44:01.666354+02:00 | \n",
" 2020-09-03 13:44:01.666354+02:00 | \n",
" 41.0 | \n",
" 0.0 | \n",
" 3690.0 | \n",
" ... | \n",
" 039003ae0b0cd2df183786804b713470 | \n",
" 114.0 | \n",
" 100.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 16.0 | \n",
" 79.0 | \n",
" 38.0 | \n",
" 2020-09-03 13:21:21.456941+02:00 | \n",
" 2020-09-03 13:21:21.456941+02:00 | \n",
"
\n",
" \n",
" 3 | \n",
" 23527.0 | \n",
" 9.0 | \n",
" False | \n",
" 5573.0 | \n",
" 114.0 | \n",
" 2020-09-03 15:01:05.958931+02:00 | \n",
" 2020-09-03 15:01:05.958931+02:00 | \n",
" 41.0 | \n",
" 0.0 | \n",
" 23468.0 | \n",
" ... | \n",
" 039003ae0b0cd2df183786804b713470 | \n",
" 114.0 | \n",
" 100.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 16.0 | \n",
" 79.0 | \n",
" 38.0 | \n",
" 2020-09-03 13:21:21.456941+02:00 | \n",
" 2020-09-03 13:21:21.456941+02:00 | \n",
"
\n",
" \n",
" 4 | \n",
" 3712.0 | \n",
" 9.0 | \n",
" False | \n",
" 509.0 | \n",
" 114.0 | \n",
" 2020-09-03 13:44:01.675588+02:00 | \n",
" 2020-09-03 13:44:01.675588+02:00 | \n",
" 41.0 | \n",
" 0.0 | \n",
" 3699.0 | \n",
" ... | \n",
" 039003ae0b0cd2df183786804b713470 | \n",
" 114.0 | \n",
" 100.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 16.0 | \n",
" 79.0 | \n",
" 38.0 | \n",
" 2020-09-03 13:21:21.456941+02:00 | \n",
" 2020-09-03 13:21:21.456941+02:00 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 187391 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" ... | \n",
" NaN | \n",
" 404833.0 | \n",
" 100.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.0 | \n",
" 4.0 | \n",
" 188494.0 | \n",
" 2023-10-31 06:06:49.074426+01:00 | \n",
" 2023-10-31 06:06:49.074426+01:00 | \n",
"
\n",
" \n",
" 187392 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" ... | \n",
" NaN | \n",
" 404834.0 | \n",
" 100.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 2799.0 | \n",
" 188494.0 | \n",
" 2023-10-31 06:06:49.075541+01:00 | \n",
" 2023-10-31 06:06:49.075541+01:00 | \n",
"
\n",
" \n",
" 187393 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" ... | \n",
" NaN | \n",
" 404835.0 | \n",
" 100.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 5676.0 | \n",
" 188494.0 | \n",
" 2023-10-31 06:06:49.077170+01:00 | \n",
" 2023-10-31 06:06:49.077170+01:00 | \n",
"
\n",
" \n",
" 187394 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" ... | \n",
" NaN | \n",
" 404836.0 | \n",
" 100.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 32.0 | \n",
" 189434.0 | \n",
" 2023-10-31 06:06:49.079215+01:00 | \n",
" 2023-10-31 06:06:49.079215+01:00 | \n",
"
\n",
" \n",
" 187395 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" ... | \n",
" NaN | \n",
" 404837.0 | \n",
" 100.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 6656.0 | \n",
" 189434.0 | \n",
" 2023-10-31 06:06:49.081039+01:00 | \n",
" 2023-10-31 06:06:49.081039+01:00 | \n",
"
\n",
" \n",
"
\n",
"
187396 rows × 29 columns
\n",
"
"
],
"text/plain": [
" id_products amount is_full_price representation_id_merge2 \\\n",
"0 10682.0 9.0 False 914.0 \n",
"1 23519.0 9.0 False 866.0 \n",
"2 3703.0 9.0 False 1949.0 \n",
"3 23527.0 9.0 False 5573.0 \n",
"4 3712.0 9.0 False 509.0 \n",
"... ... ... ... ... \n",
"187391 NaN NaN NaN NaN \n",
"187392 NaN NaN NaN NaN \n",
"187393 NaN NaN NaN NaN \n",
"187394 NaN NaN NaN NaN \n",
"187395 NaN NaN NaN NaN \n",
"\n",
" pricing_formula_id_merge2 created_at_products \\\n",
"0 114.0 2020-09-03 14:09:43.119798+02:00 \n",
"1 114.0 2020-09-03 15:01:05.950869+02:00 \n",
"2 114.0 2020-09-03 13:44:01.666354+02:00 \n",
"3 114.0 2020-09-03 15:01:05.958931+02:00 \n",
"4 114.0 2020-09-03 13:44:01.675588+02:00 \n",
"... ... ... \n",
"187391 NaN NaN \n",
"187392 NaN NaN \n",
"187393 NaN NaN \n",
"187394 NaN NaN \n",
"187395 NaN NaN \n",
"\n",
" updated_at_products category_id_merge2 apply_price \\\n",
"0 2020-09-03 14:09:43.119798+02:00 41.0 0.0 \n",
"1 2020-09-03 15:01:05.950869+02:00 41.0 0.0 \n",
"2 2020-09-03 13:44:01.666354+02:00 41.0 0.0 \n",
"3 2020-09-03 15:01:05.958931+02:00 41.0 0.0 \n",
"4 2020-09-03 13:44:01.675588+02:00 41.0 0.0 \n",
"... ... ... ... \n",
"187391 NaN NaN NaN \n",
"187392 NaN NaN NaN \n",
"187393 NaN NaN NaN \n",
"187394 NaN NaN NaN \n",
"187395 NaN NaN NaN \n",
"\n",
" products_group_id ... identifier_pricing_formula id \\\n",
"0 10655.0 ... 039003ae0b0cd2df183786804b713470 114.0 \n",
"1 23460.0 ... 039003ae0b0cd2df183786804b713470 114.0 \n",
"2 3690.0 ... 039003ae0b0cd2df183786804b713470 114.0 \n",
"3 23468.0 ... 039003ae0b0cd2df183786804b713470 114.0 \n",
"4 3699.0 ... 039003ae0b0cd2df183786804b713470 114.0 \n",
"... ... ... ... ... \n",
"187391 NaN ... NaN 404833.0 \n",
"187392 NaN ... NaN 404834.0 \n",
"187393 NaN ... NaN 404835.0 \n",
"187394 NaN ... NaN 404836.0 \n",
"187395 NaN ... NaN 404837.0 \n",
"\n",
" percent_price max_price min_price category_id_product_group \\\n",
"0 100.0 0.0 0.0 16.0 \n",
"1 100.0 0.0 0.0 16.0 \n",
"2 100.0 0.0 0.0 16.0 \n",
"3 100.0 0.0 0.0 16.0 \n",
"4 100.0 0.0 0.0 16.0 \n",
"... ... ... ... ... \n",
"187391 100.0 0.0 0.0 2.0 \n",
"187392 100.0 0.0 0.0 5.0 \n",
"187393 100.0 0.0 0.0 5.0 \n",
"187394 100.0 0.0 0.0 5.0 \n",
"187395 100.0 0.0 0.0 1.0 \n",
"\n",
" pricing_formula_id_product_group representation_id_product_group \\\n",
"0 79.0 38.0 \n",
"1 79.0 38.0 \n",
"2 79.0 38.0 \n",
"3 79.0 38.0 \n",
"4 79.0 38.0 \n",
"... ... ... \n",
"187391 4.0 188494.0 \n",
"187392 2799.0 188494.0 \n",
"187393 5676.0 188494.0 \n",
"187394 32.0 189434.0 \n",
"187395 6656.0 189434.0 \n",
"\n",
" created_at updated_at \n",
"0 2020-09-03 13:21:21.456941+02:00 2020-09-03 13:21:21.456941+02:00 \n",
"1 2020-09-03 13:21:21.456941+02:00 2020-09-03 13:21:21.456941+02:00 \n",
"2 2020-09-03 13:21:21.456941+02:00 2020-09-03 13:21:21.456941+02:00 \n",
"3 2020-09-03 13:21:21.456941+02:00 2020-09-03 13:21:21.456941+02:00 \n",
"4 2020-09-03 13:21:21.456941+02:00 2020-09-03 13:21:21.456941+02:00 \n",
"... ... ... \n",
"187391 2023-10-31 06:06:49.074426+01:00 2023-10-31 06:06:49.074426+01:00 \n",
"187392 2023-10-31 06:06:49.075541+01:00 2023-10-31 06:06:49.075541+01:00 \n",
"187393 2023-10-31 06:06:49.077170+01:00 2023-10-31 06:06:49.077170+01:00 \n",
"187394 2023-10-31 06:06:49.079215+01:00 2023-10-31 06:06:49.079215+01:00 \n",
"187395 2023-10-31 06:06:49.081039+01:00 2023-10-31 06:06:49.081039+01:00 \n",
"\n",
"[187396 rows x 29 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base2"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "053a4829-b0e7-4c8f-80b2-e57e99cdcedc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id_products | \n",
" amount | \n",
" is_full_price | \n",
" representation_id | \n",
" pricing_formula_id | \n",
" created_at_products | \n",
" updated_at_products | \n",
" category_id | \n",
" apply_price | \n",
" products_group_id | \n",
" product_pack_id | \n",
" extra_field_products | \n",
" amount_consumption | \n",
" identifier_products | \n",
" id_pricing_formula | \n",
" name | \n",
" created_at_pricing_formula | \n",
" updated_at_pricing_formula | \n",
" extra_field_pricing_formula | \n",
" identifier_pricing_formula | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 10682 | \n",
" 9.0 | \n",
" False | \n",
" 914 | \n",
" 114 | \n",
" 2020-09-03 14:09:43.119798+02:00 | \n",
" 2020-09-03 14:09:43.119798+02:00 | \n",
" 41 | \n",
" 0.0 | \n",
" 10655 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 35c88f2db8a63d7474e46eb8ca9260e7 | \n",
" 114 | \n",
" visite guidée indiv tr | \n",
" 2020-09-03 13:21:21.397923+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 039003ae0b0cd2df183786804b713470 | \n",
"
\n",
" \n",
" 1 | \n",
" 478 | \n",
" 9.5 | \n",
" False | \n",
" 273 | \n",
" 131 | \n",
" 2020-09-03 13:21:22.711773+02:00 | \n",
" 2020-09-03 13:21:22.711773+02:00 | \n",
" 1 | \n",
" 0.0 | \n",
" 471 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 8a179671ab198e570e6a104c4451379f | \n",
" 131 | \n",
" billet mucem tp - picasso | \n",
" 2020-09-03 13:21:21.410521+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 916c92362b8e7767715bc966de43e32b | \n",
"
\n",
" \n",
" 2 | \n",
" 20873 | \n",
" 11.5 | \n",
" False | \n",
" 275 | \n",
" 137 | \n",
" 2020-09-03 14:46:33.589030+02:00 | \n",
" 2020-09-03 14:46:33.589030+02:00 | \n",
" 1 | \n",
" 0.0 | \n",
" 20825 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" ee83779ce29e67ad251e40234b426d6a | \n",
" 137 | \n",
" billet mucem tp - pic. + guide | \n",
" 2020-09-03 13:21:21.414922+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 91dd2ee46f5f2d78af989019ebd60a60 | \n",
"
\n",
" \n",
" 3 | \n",
" 157142 | \n",
" 8.0 | \n",
" False | \n",
" 82519 | \n",
" 9 | \n",
" 2022-01-28 19:29:23.525722+01:00 | \n",
" 2022-01-28 19:29:23.525722+01:00 | \n",
" 5 | \n",
" 0.0 | \n",
" 156773 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" d865383579314b791aa4bcf3fb418f17 | \n",
" 9 | \n",
" billet mucem ts | \n",
" 2020-09-03 13:11:23.997074+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 37532348a1f1ef1cb916c55ebaf8345c | \n",
"
\n",
" \n",
" 4 | \n",
" 1341 | \n",
" 8.5 | \n",
" False | \n",
" 9 | \n",
" 93 | \n",
" 2020-09-03 13:29:30.773089+02:00 | \n",
" 2020-09-03 13:29:30.773089+02:00 | \n",
" 1 | \n",
" 0.0 | \n",
" 1175 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" f1c4689bc47dee6f60b56d74b593dd46 | \n",
" 93 | \n",
" billet mucem tr + guide | \n",
" 2020-09-03 13:21:21.382586+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 722a8610e4391c27f2ec6f1224b42b3d | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 94798 | \n",
" 388554 | \n",
" 0.0 | \n",
" False | \n",
" 181331 | \n",
" 3153 | \n",
" 2023-04-05 03:34:01.850415+02:00 | \n",
" 2023-04-05 03:34:01.850415+02:00 | \n",
" 9 | \n",
" 0.0 | \n",
" 387862 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 64147cc47279f4b1b76814b9f7f20619 | \n",
" 3153 | \n",
" nb pers visite scolaire rep | \n",
" 2020-09-03 16:32:37.068864+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 3cf21731c25eee650d5b232ee4780563 | \n",
"
\n",
" \n",
" 94799 | \n",
" 388555 | \n",
" 0.0 | \n",
" False | \n",
" 181331 | \n",
" 20441 | \n",
" 2023-04-05 03:34:01.853121+02:00 | \n",
" 2023-04-05 03:34:01.853121+02:00 | \n",
" 17 | \n",
" 0.0 | \n",
" 387863 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 93ec10028cfa7557a8788dd2fb4f6a17 | \n",
" 20441 | \n",
" visite scolaire rep1h30 | \n",
" 2021-12-17 05:15:30.142593+01:00 | \n",
" 2022-02-18 15:55:03.576236+01:00 | \n",
" NaN | \n",
" 6844d69dcb1178aa9e04567bce71df1f | \n",
"
\n",
" \n",
" 94800 | \n",
" 388556 | \n",
" 70.0 | \n",
" False | \n",
" 181332 | \n",
" 17798 | \n",
" 2023-04-05 03:34:01.855466+02:00 | \n",
" 2023-04-05 03:34:01.855466+02:00 | \n",
" 17 | \n",
" 0.0 | \n",
" 387864 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" a0cc8e58412502e24e38dfeded032485 | \n",
" 17798 | \n",
" visite scolaire 1h30 | \n",
" 2021-09-30 08:15:49.172362+02:00 | \n",
" 2022-02-18 15:55:03.576236+01:00 | \n",
" NaN | \n",
" cfa6ba42e14f2a170a0fc566a183fe94 | \n",
"
\n",
" \n",
" 94801 | \n",
" 388557 | \n",
" 0.0 | \n",
" False | \n",
" 181332 | \n",
" 3126 | \n",
" 2023-04-05 03:34:01.857441+02:00 | \n",
" 2023-04-05 03:34:01.857441+02:00 | \n",
" 9 | \n",
" 0.0 | \n",
" 387865 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 94532a14d98637ac9ee2b7d9aedad188 | \n",
" 3126 | \n",
" nb pers visite scolaire | \n",
" 2020-09-03 16:31:39.822413+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" ab5dbcde8e5e1fce29737070bce06799 | \n",
"
\n",
" \n",
" 94802 | \n",
" 388526 | \n",
" 5.0 | \n",
" False | \n",
" 181318 | \n",
" 96 | \n",
" 2023-04-04 06:06:31.784073+02:00 | \n",
" 2023-04-04 06:06:31.784073+02:00 | \n",
" 39 | \n",
" 0.0 | \n",
" 387834 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" 4c1ee03106e34585d57d96da5e7c5f50 | \n",
" 96 | \n",
" visite guidée indiv tu | \n",
" 2020-09-03 13:21:21.384812+02:00 | \n",
" 2022-02-18 15:57:55.792581+01:00 | \n",
" NaN | \n",
" 62fd9a5b40c116e45eda51222d135dfd | \n",
"
\n",
" \n",
"
\n",
"
94803 rows × 20 columns
\n",
"
"
],
"text/plain": [
" id_products amount is_full_price representation_id \\\n",
"0 10682 9.0 False 914 \n",
"1 478 9.5 False 273 \n",
"2 20873 11.5 False 275 \n",
"3 157142 8.0 False 82519 \n",
"4 1341 8.5 False 9 \n",
"... ... ... ... ... \n",
"94798 388554 0.0 False 181331 \n",
"94799 388555 0.0 False 181331 \n",
"94800 388556 70.0 False 181332 \n",
"94801 388557 0.0 False 181332 \n",
"94802 388526 5.0 False 181318 \n",
"\n",
" pricing_formula_id created_at_products \\\n",
"0 114 2020-09-03 14:09:43.119798+02:00 \n",
"1 131 2020-09-03 13:21:22.711773+02:00 \n",
"2 137 2020-09-03 14:46:33.589030+02:00 \n",
"3 9 2022-01-28 19:29:23.525722+01:00 \n",
"4 93 2020-09-03 13:29:30.773089+02:00 \n",
"... ... ... \n",
"94798 3153 2023-04-05 03:34:01.850415+02:00 \n",
"94799 20441 2023-04-05 03:34:01.853121+02:00 \n",
"94800 17798 2023-04-05 03:34:01.855466+02:00 \n",
"94801 3126 2023-04-05 03:34:01.857441+02:00 \n",
"94802 96 2023-04-04 06:06:31.784073+02:00 \n",
"\n",
" updated_at_products category_id apply_price \\\n",
"0 2020-09-03 14:09:43.119798+02:00 41 0.0 \n",
"1 2020-09-03 13:21:22.711773+02:00 1 0.0 \n",
"2 2020-09-03 14:46:33.589030+02:00 1 0.0 \n",
"3 2022-01-28 19:29:23.525722+01:00 5 0.0 \n",
"4 2020-09-03 13:29:30.773089+02:00 1 0.0 \n",
"... ... ... ... \n",
"94798 2023-04-05 03:34:01.850415+02:00 9 0.0 \n",
"94799 2023-04-05 03:34:01.853121+02:00 17 0.0 \n",
"94800 2023-04-05 03:34:01.855466+02:00 17 0.0 \n",
"94801 2023-04-05 03:34:01.857441+02:00 9 0.0 \n",
"94802 2023-04-04 06:06:31.784073+02:00 39 0.0 \n",
"\n",
" products_group_id product_pack_id extra_field_products \\\n",
"0 10655 1 NaN \n",
"1 471 1 NaN \n",
"2 20825 1 NaN \n",
"3 156773 1 NaN \n",
"4 1175 1 NaN \n",
"... ... ... ... \n",
"94798 387862 1 NaN \n",
"94799 387863 1 NaN \n",
"94800 387864 1 NaN \n",
"94801 387865 1 NaN \n",
"94802 387834 1 NaN \n",
"\n",
" amount_consumption identifier_products \\\n",
"0 NaN 35c88f2db8a63d7474e46eb8ca9260e7 \n",
"1 NaN 8a179671ab198e570e6a104c4451379f \n",
"2 NaN ee83779ce29e67ad251e40234b426d6a \n",
"3 NaN d865383579314b791aa4bcf3fb418f17 \n",
"4 NaN f1c4689bc47dee6f60b56d74b593dd46 \n",
"... ... ... \n",
"94798 NaN 64147cc47279f4b1b76814b9f7f20619 \n",
"94799 NaN 93ec10028cfa7557a8788dd2fb4f6a17 \n",
"94800 NaN a0cc8e58412502e24e38dfeded032485 \n",
"94801 NaN 94532a14d98637ac9ee2b7d9aedad188 \n",
"94802 NaN 4c1ee03106e34585d57d96da5e7c5f50 \n",
"\n",
" id_pricing_formula name \\\n",
"0 114 visite guidée indiv tr \n",
"1 131 billet mucem tp - picasso \n",
"2 137 billet mucem tp - pic. + guide \n",
"3 9 billet mucem ts \n",
"4 93 billet mucem tr + guide \n",
"... ... ... \n",
"94798 3153 nb pers visite scolaire rep \n",
"94799 20441 visite scolaire rep1h30 \n",
"94800 17798 visite scolaire 1h30 \n",
"94801 3126 nb pers visite scolaire \n",
"94802 96 visite guidée indiv tu \n",
"\n",
" created_at_pricing_formula updated_at_pricing_formula \\\n",
"0 2020-09-03 13:21:21.397923+02:00 2022-02-18 15:57:55.792581+01:00 \n",
"1 2020-09-03 13:21:21.410521+02:00 2022-02-18 15:57:55.792581+01:00 \n",
"2 2020-09-03 13:21:21.414922+02:00 2022-02-18 15:57:55.792581+01:00 \n",
"3 2020-09-03 13:11:23.997074+02:00 2022-02-18 15:57:55.792581+01:00 \n",
"4 2020-09-03 13:21:21.382586+02:00 2022-02-18 15:57:55.792581+01:00 \n",
"... ... ... \n",
"94798 2020-09-03 16:32:37.068864+02:00 2022-02-18 15:57:55.792581+01:00 \n",
"94799 2021-12-17 05:15:30.142593+01:00 2022-02-18 15:55:03.576236+01:00 \n",
"94800 2021-09-30 08:15:49.172362+02:00 2022-02-18 15:55:03.576236+01:00 \n",
"94801 2020-09-03 16:31:39.822413+02:00 2022-02-18 15:57:55.792581+01:00 \n",
"94802 2020-09-03 13:21:21.384812+02:00 2022-02-18 15:57:55.792581+01:00 \n",
"\n",
" extra_field_pricing_formula identifier_pricing_formula \n",
"0 NaN 039003ae0b0cd2df183786804b713470 \n",
"1 NaN 916c92362b8e7767715bc966de43e32b \n",
"2 NaN 91dd2ee46f5f2d78af989019ebd60a60 \n",
"3 NaN 37532348a1f1ef1cb916c55ebaf8345c \n",
"4 NaN 722a8610e4391c27f2ec6f1224b42b3d \n",
"... ... ... \n",
"94798 NaN 3cf21731c25eee650d5b232ee4780563 \n",
"94799 NaN 6844d69dcb1178aa9e04567bce71df1f \n",
"94800 NaN cfa6ba42e14f2a170a0fc566a183fe94 \n",
"94801 NaN ab5dbcde8e5e1fce29737070bce06799 \n",
"94802 NaN 62fd9a5b40c116e45eda51222d135dfd \n",
"\n",
"[94803 rows x 20 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Merge2 entre products et pricing_formulas\n",
"base2=base1.merge(pricing_formulas, how='left', left_on= 'id_pricing_formula', right_on= 'id', suffixes = (\"\", \"\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bad94144-2474-49dd-9133-ab6082565ff1",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}