{ "cells": [ { "cell_type": "markdown", "id": "c4205b5d-e052-4863-a46b-20e4757052a7", "metadata": {}, "source": [ "# Business Data Challenge - Team 1" ] }, { "cell_type": "code", "execution_count": 1, "id": "ae3af8e6-ced8-4994-8877-fa98d4297cc0", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "id": "dd3184e7-54a1-4463-af42-5850d9517a41", "metadata": {}, "source": [ "Configuration de l'accès aux données" ] }, { "cell_type": "code", "execution_count": 2, "id": "b6035982-9ff4-4013-9792-2d50e10db3d1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['bdc2324-data/1/1campaign_stats.csv',\n", " 'bdc2324-data/1/1campaigns.csv',\n", " 'bdc2324-data/1/1categories.csv',\n", " 'bdc2324-data/1/1countries.csv',\n", " 'bdc2324-data/1/1currencies.csv',\n", " 'bdc2324-data/1/1customer_target_mappings.csv',\n", " 'bdc2324-data/1/1customersplus.csv',\n", " 'bdc2324-data/1/1event_types.csv',\n", " 'bdc2324-data/1/1events.csv',\n", " 'bdc2324-data/1/1facilities.csv',\n", " 'bdc2324-data/1/1link_stats.csv',\n", " 'bdc2324-data/1/1pricing_formulas.csv',\n", " 'bdc2324-data/1/1product_packs.csv',\n", " 'bdc2324-data/1/1products.csv',\n", " 'bdc2324-data/1/1products_groups.csv',\n", " 'bdc2324-data/1/1purchases.csv',\n", " 'bdc2324-data/1/1representation_category_capacities.csv',\n", " 'bdc2324-data/1/1representations.csv',\n", " 'bdc2324-data/1/1seasons.csv',\n", " 'bdc2324-data/1/1structure_tag_mappings.csv',\n", " 'bdc2324-data/1/1suppliers.csv',\n", " 'bdc2324-data/1/1tags.csv',\n", " 'bdc2324-data/1/1target_types.csv',\n", " 'bdc2324-data/1/1targets.csv',\n", " 'bdc2324-data/1/1tickets.csv',\n", " 'bdc2324-data/1/1type_of_categories.csv',\n", " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n", " 'bdc2324-data/1/1type_ofs.csv']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "import s3fs\n", "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", "\n", "BUCKET = \"bdc2324-data/1\"\n", "fs.ls(BUCKET)" ] }, { "cell_type": "code", "execution_count": 3, "id": "b86c935d-124f-453f-80dd-83ea6770d09c", "metadata": {}, "outputs": [], "source": [ "dic_base=['campaign_stats','campaigns','categories','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets','type_of_categories','type_of_pricing_formulas','type_ofs']" ] }, { "cell_type": "code", "execution_count": 4, "id": "f6d0b27c-0ecd-406b-b042-6c3802dd68fd", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_447/1008972637.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n" ] } ], "source": [ "dic_base=['campaign_stats','campaigns','categories','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets','type_of_categories','type_of_pricing_formulas','type_ofs']\n", "for nom_base in dic_base:\n", " FILE_PATH_S3_fanta = 'bdc2324-data/1/1' + nom_base + '.csv'\n", " with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n", " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "2a6b5e22-3370-457f-83b7-dd1e13663229", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'bdc2324-data/1/1type_ofs.csv'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "FILE_PATH_S3_fanta" ] }, { "cell_type": "markdown", "id": "79012186-ea51-4252-843e-36a9bbe3847e", "metadata": {}, "source": [ "# Analyse exploratoire " ] }, { "cell_type": "markdown", "id": "1a365f29-4766-47d8-9796-24a5271867b2", "metadata": {}, "source": [ "## I. Base type_of_pricing_formulas" ] }, { "cell_type": "markdown", "id": "bcc14f93-2289-44eb-816b-a51049b258df", "metadata": {}, "source": [ "## Detection des valeur manquantes" ] }, { "cell_type": "raw", "id": "ab2ec4c4-9d38-4aeb-8202-9116df3cdd66", "metadata": {}, "source": [ "dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']" ] }, { "cell_type": "markdown", "id": "88759b4a-2633-478d-abce-29abeac376d1", "metadata": {}, "source": [ "def verifier_donnees_manquantes(base):\n", " donnees_manquantes = base.isna().sum()\n", " print(\"Données manquantes pour la base :\")\n", " print(donnees_manquantes)" ] }, { "cell_type": "markdown", "id": "df3075b4-1490-4cf2-a3fe-c6d4e2144ae3", "metadata": {}, "source": [ "for nom_base in dic_prod_princing:\n", " verifier_donnees_manquantes(nom_base)" ] }, { "cell_type": "code", "execution_count": 6, "id": "e0c67c01-e837-4772-b070-d1be0d895a36", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id 0\n", "type_of_id 0\n", "pricing_formula_id 0\n", "created_at 0\n", "updated_at 0\n", "identifier 0\n", "dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#detection des Nan d\n", "\n", "type_of_pricing_formulas.isna().sum()" ] }, { "cell_type": "code", "execution_count": 7, "id": "3eaffaa6-1164-4ee9-a671-8b5eb3df797d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtype_of_idpricing_formula_idcreated_atupdated_atidentifier
0111272021-01-05 11:55:51.226960+01:002021-01-05 11:55:51.226960+01:00cf2918b25e6dcf8c30798ca05c8ec8ed
12124252021-01-05 11:55:51.235606+01:002021-01-05 11:55:51.235606+01:002c8ee3f7c1487d792b6c946314e681f2
23129372021-01-05 11:55:51.240114+01:002021-01-05 11:55:51.240114+01:0044e55c85e4eb59b3c3c01c137a6b25fc
341482021-01-05 11:55:51.244638+01:002021-01-05 11:55:51.244638+01:00ee3bb93b7e2217cd86a49d547fedf6c6
45172021-01-05 11:55:51.249409+01:002021-01-05 11:55:51.249409+01:00ae701668574f1a653d2b21ddfd250620
.....................
563564466562022-02-18 16:15:58.872249+01:002022-02-18 16:15:58.872249+01:00f669824cdca9de9697f07ff3ba365a8d
564565466072022-02-18 16:15:59.231018+01:002022-02-18 16:15:59.231018+01:006421c8146a598758139153b0e7b921ea
565566467002022-02-18 16:15:59.724812+01:002022-02-18 16:15:59.724812+01:006823f6d4d80b322fbfb8b83545a9f96d
566567481182022-02-18 16:16:00.163381+01:002022-02-18 16:16:00.163381+01:0035cfc12584b4d1b94795d97fd0aa56e8
5675697481572023-03-13 11:30:29.480161+01:002023-03-13 11:30:29.480161+01:0055863541f33fd229ac9b54d9ec1f4874
\n", "

568 rows × 6 columns

\n", "
" ], "text/plain": [ " id type_of_id pricing_formula_id created_at \\\n", "0 1 1 127 2021-01-05 11:55:51.226960+01:00 \n", "1 2 1 2425 2021-01-05 11:55:51.235606+01:00 \n", "2 3 1 2937 2021-01-05 11:55:51.240114+01:00 \n", "3 4 1 48 2021-01-05 11:55:51.244638+01:00 \n", "4 5 1 7 2021-01-05 11:55:51.249409+01:00 \n", ".. ... ... ... ... \n", "563 564 4 6656 2022-02-18 16:15:58.872249+01:00 \n", "564 565 4 6607 2022-02-18 16:15:59.231018+01:00 \n", "565 566 4 6700 2022-02-18 16:15:59.724812+01:00 \n", "566 567 4 8118 2022-02-18 16:16:00.163381+01:00 \n", "567 569 7 48157 2023-03-13 11:30:29.480161+01:00 \n", "\n", " updated_at identifier \n", "0 2021-01-05 11:55:51.226960+01:00 cf2918b25e6dcf8c30798ca05c8ec8ed \n", "1 2021-01-05 11:55:51.235606+01:00 2c8ee3f7c1487d792b6c946314e681f2 \n", "2 2021-01-05 11:55:51.240114+01:00 44e55c85e4eb59b3c3c01c137a6b25fc \n", "3 2021-01-05 11:55:51.244638+01:00 ee3bb93b7e2217cd86a49d547fedf6c6 \n", "4 2021-01-05 11:55:51.249409+01:00 ae701668574f1a653d2b21ddfd250620 \n", ".. ... ... \n", "563 2022-02-18 16:15:58.872249+01:00 f669824cdca9de9697f07ff3ba365a8d \n", "564 2022-02-18 16:15:59.231018+01:00 6421c8146a598758139153b0e7b921ea \n", "565 2022-02-18 16:15:59.724812+01:00 6823f6d4d80b322fbfb8b83545a9f96d \n", "566 2022-02-18 16:16:00.163381+01:00 35cfc12584b4d1b94795d97fd0aa56e8 \n", "567 2023-03-13 11:30:29.480161+01:00 55863541f33fd229ac9b54d9ec1f4874 \n", "\n", "[568 rows x 6 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type_of_pricing_formulas" ] }, { "cell_type": "code", "execution_count": 8, "id": "57298669-8d55-40d5-a5aa-4c5df984eec7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id int64\n", "type_of_id int64\n", "pricing_formula_id int64\n", "created_at object\n", "updated_at object\n", "identifier object\n", "dtype: object" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#type des variables\n", "\n", "type_of_pricing_formulas.dtypes" ] }, { "cell_type": "code", "execution_count": 9, "id": "c11850cb-8833-44c0-a11d-9695d620a42b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtype_of_idpricing_formula_idcreated_atupdated_atidentifier
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [id, type_of_id, pricing_formula_id, created_at, updated_at, identifier]\n", "Index: []" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Identification des doublons\n", "type_of_pricing_formulas.loc[type_of_pricing_formulas['id'].duplicated(keep=False),:]" ] }, { "cell_type": "markdown", "id": "7a40de03-5e18-4d3d-a0f8-da960c29fad8", "metadata": {}, "source": [ "## II.products_groups" ] }, { "cell_type": "code", "execution_count": 10, "id": "89909175-6734-4e8e-8632-d6f8ca812388", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id 0\n", "percent_price 0\n", "max_price 0\n", "min_price 0\n", "category_id 0\n", "pricing_formula_id 0\n", "representation_id 0\n", "created_at 0\n", "updated_at 0\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#detection des Nan \n", "\n", "products_groups.isna().sum()" ] }, { "cell_type": "code", "execution_count": 11, "id": "6a187170-96c4-48d2-9568-b270f67e2c27", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id int64\n", "percent_price float64\n", "max_price float64\n", "min_price float64\n", "category_id int64\n", "pricing_formula_id int64\n", "representation_id int64\n", "created_at object\n", "updated_at object\n", "dtype: object" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#type des variables\n", "\n", "products_groups.dtypes" ] }, { "cell_type": "code", "execution_count": 12, "id": "2fba2cb0-a6a4-43b2-a854-3be07939c28b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idpercent_pricemax_pricemin_pricecategory_idpricing_formula_idrepresentation_idcreated_atupdated_at
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [id, percent_price, max_price, min_price, category_id, pricing_formula_id, representation_id, created_at, updated_at]\n", "Index: []" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Identification des doublons\n", "products_groups.loc[products_groups[['id','pricing_formula_id','representation_id']].duplicated(keep=False),:]" ] }, { "cell_type": "markdown", "id": "5312ac13-8fbd-4c3f-a98a-8c28f079a599", "metadata": {}, "source": [ "## III.pricing_formulas" ] }, { "cell_type": "code", "execution_count": 13, "id": "3383a773-0817-4b23-84e7-8d5d0c74b179", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnamecreated_atupdated_atextra_fieldidentifier
041909visite mécènes 1h302022-07-08 07:08:26.802266+02:002022-07-08 07:08:26.802266+02:00NaN21d4b0043c12b21952b0797d140991a1
1502entree mucem tp( expo picasso)2020-09-03 13:43:59.816765+02:002022-02-18 15:57:55.792581+01:00NaN223b09e6c3f1f75dbf8df019af97a555
2504nombre de personnes cinema2020-09-03 13:43:59.818198+02:002021-01-25 19:16:05.187114+01:00NaNba33b7b6d225a75d713a356b49c4d915
3117spectacle tarif e famille tr2020-09-03 13:21:21.400249+02:002023-03-13 11:30:29.525335+01:00NaNa00b61ad933518856f86e63ca91a5750
41496billet nb famille mecene 1a2020-09-03 14:29:33.320952+02:002021-01-25 19:23:06.816402+01:00NaN7f6013803c242253a5ccde80f780984f
.....................
551529billet nb expo gr2020-09-03 13:43:59.835944+02:002022-02-18 15:57:55.792581+01:00NaN7d888e42abe101fc8b21dc88948c8b74
5523153nb pers visite scolaire rep2020-09-03 16:32:37.068864+02:002022-02-18 15:57:55.792581+01:00NaN3cf21731c25eee650d5b232ee4780563
5535847visite scolaire rep1h002021-06-09 18:10:49.742531+02:002022-02-18 15:55:03.576236+01:00NaNa7bb5a6892d55f0d5ee4ce5786ae5fc6
5545840france billet - entree ts2021-06-09 18:10:49.737576+02:002022-02-18 16:16:00.199543+01:00NaN4c53016fc65847646f600eff853593e5
5555863france billet - entree tp2021-06-09 18:12:49.269924+02:002022-02-18 16:16:00.199543+01:00NaN90e642c0e1ef6bc9f2bc43089798de00
\n", "

556 rows × 6 columns

\n", "
" ], "text/plain": [ " id name created_at \\\n", "0 41909 visite mécènes 1h30 2022-07-08 07:08:26.802266+02:00 \n", "1 502 entree mucem tp( expo picasso) 2020-09-03 13:43:59.816765+02:00 \n", "2 504 nombre de personnes cinema 2020-09-03 13:43:59.818198+02:00 \n", "3 117 spectacle tarif e famille tr 2020-09-03 13:21:21.400249+02:00 \n", "4 1496 billet nb famille mecene 1a 2020-09-03 14:29:33.320952+02:00 \n", ".. ... ... ... \n", "551 529 billet nb expo gr 2020-09-03 13:43:59.835944+02:00 \n", "552 3153 nb pers visite scolaire rep 2020-09-03 16:32:37.068864+02:00 \n", "553 5847 visite scolaire rep1h00 2021-06-09 18:10:49.742531+02:00 \n", "554 5840 france billet - entree ts 2021-06-09 18:10:49.737576+02:00 \n", "555 5863 france billet - entree tp 2021-06-09 18:12:49.269924+02:00 \n", "\n", " updated_at extra_field \\\n", "0 2022-07-08 07:08:26.802266+02:00 NaN \n", "1 2022-02-18 15:57:55.792581+01:00 NaN \n", "2 2021-01-25 19:16:05.187114+01:00 NaN \n", "3 2023-03-13 11:30:29.525335+01:00 NaN \n", "4 2021-01-25 19:23:06.816402+01:00 NaN \n", ".. ... ... \n", "551 2022-02-18 15:57:55.792581+01:00 NaN \n", "552 2022-02-18 15:57:55.792581+01:00 NaN \n", "553 2022-02-18 15:55:03.576236+01:00 NaN \n", "554 2022-02-18 16:16:00.199543+01:00 NaN \n", "555 2022-02-18 16:16:00.199543+01:00 NaN \n", "\n", " identifier \n", "0 21d4b0043c12b21952b0797d140991a1 \n", "1 223b09e6c3f1f75dbf8df019af97a555 \n", "2 ba33b7b6d225a75d713a356b49c4d915 \n", "3 a00b61ad933518856f86e63ca91a5750 \n", "4 7f6013803c242253a5ccde80f780984f \n", ".. ... \n", "551 7d888e42abe101fc8b21dc88948c8b74 \n", "552 3cf21731c25eee650d5b232ee4780563 \n", "553 a7bb5a6892d55f0d5ee4ce5786ae5fc6 \n", "554 4c53016fc65847646f600eff853593e5 \n", "555 90e642c0e1ef6bc9f2bc43089798de00 \n", "\n", "[556 rows x 6 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pricing_formulas" ] }, { "cell_type": "code", "execution_count": 14, "id": "d8130c73-6c5f-45b1-93ae-db7679c8ca56", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id 0.0\n", "name 0.0\n", "created_at 0.0\n", "updated_at 0.0\n", "extra_field 1.0\n", "identifier 0.0\n", "dtype: float64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#detection des Nan \n", "\n", "pricing_formulas.isna().sum()/pricing_formulas.shape[0]" ] }, { "cell_type": "code", "execution_count": 15, "id": "44f1dbfd-c3cf-464b-9877-f37fcc61da92", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id int64\n", "name object\n", "created_at object\n", "updated_at object\n", "extra_field float64\n", "identifier object\n", "dtype: object" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#type des variables\n", "\n", "pricing_formulas.dtypes" ] }, { "cell_type": "code", "execution_count": 16, "id": "6784b41b-da74-4fae-832e-16641ae710c1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnamecreated_atupdated_atextra_fieldidentifier
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [id, name, created_at, updated_at, extra_field, identifier]\n", "Index: []" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Identification des doublons\n", "pricing_formulas.loc[pricing_formulas[['id']].duplicated(keep=False),:]" ] }, { "cell_type": "markdown", "id": "2145b0a4-b73d-4530-8c12-a78b1cf86eae", "metadata": {}, "source": [ "## IV. product_packs" ] }, { "cell_type": "code", "execution_count": 17, "id": "e36b07a7-4f0b-4711-86a0-12a1d8158eef", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id 0.0\n", "name 1.0\n", "type_of 0.0\n", "created_at 0.0\n", "updated_at 0.0\n", "identifier 0.0\n", "dtype: float64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#detection des Nan \n", "\n", "product_packs.isna().sum()/product_packs.shape[0]" ] }, { "cell_type": "code", "execution_count": 18, "id": "8707396a-f86b-476d-a9f9-c39f8de1d02e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id int64\n", "name float64\n", "type_of int64\n", "created_at object\n", "updated_at object\n", "identifier object\n", "dtype: object" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#type des variables\n", "\n", "product_packs.dtypes" ] }, { "cell_type": "code", "execution_count": 19, "id": "4b102bd3-924b-43da-8915-be7664c23f97", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnametype_ofcreated_atupdated_atidentifier
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [id, name, type_of, created_at, updated_at, identifier]\n", "Index: []" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Identification des doublons\n", "product_packs.loc[product_packs[['id']].duplicated(keep=False),:]" ] }, { "cell_type": "markdown", "id": "cfe0c525-896b-4731-b38e-306ff6ea0c65", "metadata": {}, "source": [ "## V.products" ] }, { "cell_type": "code", "execution_count": 20, "id": "968beb24-f70c-4eb6-8b1e-4b04bc7fe9c9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id 0.0\n", "amount 0.0\n", "is_full_price 0.0\n", "representation_id 0.0\n", "pricing_formula_id 0.0\n", "created_at 0.0\n", "updated_at 0.0\n", "category_id 0.0\n", "apply_price 0.0\n", "products_group_id 0.0\n", "product_pack_id 0.0\n", "extra_field 1.0\n", "amount_consumption 1.0\n", "identifier 0.0\n", "dtype: float64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#detection des Nan \n", "\n", "products.isna().sum()/products.shape[0]" ] }, { "cell_type": "code", "execution_count": 21, "id": "15bc6ac6-67e8-4e2c-9641-7ee8bb2581a3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id int64\n", "amount float64\n", "is_full_price bool\n", "representation_id int64\n", "pricing_formula_id int64\n", "created_at object\n", "updated_at object\n", "category_id int64\n", "apply_price float64\n", "products_group_id int64\n", "product_pack_id int64\n", "extra_field float64\n", "amount_consumption float64\n", "identifier object\n", "dtype: object" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#type des variables\n", "\n", "products.dtypes" ] }, { "cell_type": "code", "execution_count": 22, "id": "7daa4f1a-e429-4daf-a2e1-1e311b487e09", "metadata": {}, "outputs": [], "source": [ "#dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']" ] }, { "cell_type": "code", "execution_count": 26, "id": "dc12b746-6708-4708-826a-acb5a8e665a1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnamecreated_atupdated_atextra_fieldidentifier
041909visite mécènes 1h302022-07-08 07:08:26.802266+02:002022-07-08 07:08:26.802266+02:00NaN21d4b0043c12b21952b0797d140991a1
1502entree mucem tp( expo picasso)2020-09-03 13:43:59.816765+02:002022-02-18 15:57:55.792581+01:00NaN223b09e6c3f1f75dbf8df019af97a555
2504nombre de personnes cinema2020-09-03 13:43:59.818198+02:002021-01-25 19:16:05.187114+01:00NaNba33b7b6d225a75d713a356b49c4d915
3117spectacle tarif e famille tr2020-09-03 13:21:21.400249+02:002023-03-13 11:30:29.525335+01:00NaNa00b61ad933518856f86e63ca91a5750
41496billet nb famille mecene 1a2020-09-03 14:29:33.320952+02:002021-01-25 19:23:06.816402+01:00NaN7f6013803c242253a5ccde80f780984f
.....................
551529billet nb expo gr2020-09-03 13:43:59.835944+02:002022-02-18 15:57:55.792581+01:00NaN7d888e42abe101fc8b21dc88948c8b74
5523153nb pers visite scolaire rep2020-09-03 16:32:37.068864+02:002022-02-18 15:57:55.792581+01:00NaN3cf21731c25eee650d5b232ee4780563
5535847visite scolaire rep1h002021-06-09 18:10:49.742531+02:002022-02-18 15:55:03.576236+01:00NaNa7bb5a6892d55f0d5ee4ce5786ae5fc6
5545840france billet - entree ts2021-06-09 18:10:49.737576+02:002022-02-18 16:16:00.199543+01:00NaN4c53016fc65847646f600eff853593e5
5555863france billet - entree tp2021-06-09 18:12:49.269924+02:002022-02-18 16:16:00.199543+01:00NaN90e642c0e1ef6bc9f2bc43089798de00
\n", "

556 rows × 6 columns

\n", "
" ], "text/plain": [ " id name created_at \\\n", "0 41909 visite mécènes 1h30 2022-07-08 07:08:26.802266+02:00 \n", "1 502 entree mucem tp( expo picasso) 2020-09-03 13:43:59.816765+02:00 \n", "2 504 nombre de personnes cinema 2020-09-03 13:43:59.818198+02:00 \n", "3 117 spectacle tarif e famille tr 2020-09-03 13:21:21.400249+02:00 \n", "4 1496 billet nb famille mecene 1a 2020-09-03 14:29:33.320952+02:00 \n", ".. ... ... ... \n", "551 529 billet nb expo gr 2020-09-03 13:43:59.835944+02:00 \n", "552 3153 nb pers visite scolaire rep 2020-09-03 16:32:37.068864+02:00 \n", "553 5847 visite scolaire rep1h00 2021-06-09 18:10:49.742531+02:00 \n", "554 5840 france billet - entree ts 2021-06-09 18:10:49.737576+02:00 \n", "555 5863 france billet - entree tp 2021-06-09 18:12:49.269924+02:00 \n", "\n", " updated_at extra_field \\\n", "0 2022-07-08 07:08:26.802266+02:00 NaN \n", "1 2022-02-18 15:57:55.792581+01:00 NaN \n", "2 2021-01-25 19:16:05.187114+01:00 NaN \n", "3 2023-03-13 11:30:29.525335+01:00 NaN \n", "4 2021-01-25 19:23:06.816402+01:00 NaN \n", ".. ... ... \n", "551 2022-02-18 15:57:55.792581+01:00 NaN \n", "552 2022-02-18 15:57:55.792581+01:00 NaN \n", "553 2022-02-18 15:55:03.576236+01:00 NaN \n", "554 2022-02-18 16:16:00.199543+01:00 NaN \n", "555 2022-02-18 16:16:00.199543+01:00 NaN \n", "\n", " identifier \n", "0 21d4b0043c12b21952b0797d140991a1 \n", "1 223b09e6c3f1f75dbf8df019af97a555 \n", "2 ba33b7b6d225a75d713a356b49c4d915 \n", "3 a00b61ad933518856f86e63ca91a5750 \n", "4 7f6013803c242253a5ccde80f780984f \n", ".. ... \n", "551 7d888e42abe101fc8b21dc88948c8b74 \n", "552 3cf21731c25eee650d5b232ee4780563 \n", "553 a7bb5a6892d55f0d5ee4ce5786ae5fc6 \n", "554 4c53016fc65847646f600eff853593e5 \n", "555 90e642c0e1ef6bc9f2bc43089798de00 \n", "\n", "[556 rows x 6 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pricing_formulas" ] }, { "cell_type": "markdown", "id": "46aad10f-8530-410e-872b-bb253c553a46", "metadata": {}, "source": [ "# jointure entre les bases" ] }, { "cell_type": "code", "execution_count": null, "id": "a4c3edd1-6d58-4c57-b3e4-0ef3529f6b8c", "metadata": {}, "outputs": [], "source": [ "#dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']" ] }, { "cell_type": "code", "execution_count": 74, "id": "eac537e1-bbad-45bc-a85c-12b675da1088", "metadata": {}, "outputs": [], "source": [ "#Merge1 entre products et pricing_formulas\n", "base1=products.merge(pricing_formulas, how='left', left_on= 'pricing_formula_id', right_on= 'id', suffixes = (\"_products\", \"_pricing_formula\"))" ] }, { "cell_type": "code", "execution_count": 75, "id": "7dd0bdae-c1c5-44ba-a446-42b28def2343", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id_productsamountis_full_pricerepresentation_idpricing_formula_idcreated_at_productsupdated_at_productscategory_idapply_priceproducts_group_idproduct_pack_idextra_field_productsamount_consumptionidentifier_productsid_pricing_formulanamecreated_at_pricing_formulaupdated_at_pricing_formulaextra_field_pricing_formulaidentifier_pricing_formula
0106829.0False9141142020-09-03 14:09:43.119798+02:002020-09-03 14:09:43.119798+02:00410.0106551NaNNaN35c88f2db8a63d7474e46eb8ca9260e7114visite guidée indiv tr2020-09-03 13:21:21.397923+02:002022-02-18 15:57:55.792581+01:00NaN039003ae0b0cd2df183786804b713470
14789.5False2731312020-09-03 13:21:22.711773+02:002020-09-03 13:21:22.711773+02:0010.04711NaNNaN8a179671ab198e570e6a104c4451379f131billet mucem tp - picasso2020-09-03 13:21:21.410521+02:002022-02-18 15:57:55.792581+01:00NaN916c92362b8e7767715bc966de43e32b
22087311.5False2751372020-09-03 14:46:33.589030+02:002020-09-03 14:46:33.589030+02:0010.0208251NaNNaNee83779ce29e67ad251e40234b426d6a137billet mucem tp - pic. + guide2020-09-03 13:21:21.414922+02:002022-02-18 15:57:55.792581+01:00NaN91dd2ee46f5f2d78af989019ebd60a60
31571428.0False8251992022-01-28 19:29:23.525722+01:002022-01-28 19:29:23.525722+01:0050.01567731NaNNaNd865383579314b791aa4bcf3fb418f179billet mucem ts2020-09-03 13:11:23.997074+02:002022-02-18 15:57:55.792581+01:00NaN37532348a1f1ef1cb916c55ebaf8345c
413418.5False9932020-09-03 13:29:30.773089+02:002020-09-03 13:29:30.773089+02:0010.011751NaNNaNf1c4689bc47dee6f60b56d74b593dd4693billet mucem tr + guide2020-09-03 13:21:21.382586+02:002022-02-18 15:57:55.792581+01:00NaN722a8610e4391c27f2ec6f1224b42b3d
\n", "
" ], "text/plain": [ " id_products amount is_full_price representation_id pricing_formula_id \\\n", "0 10682 9.0 False 914 114 \n", "1 478 9.5 False 273 131 \n", "2 20873 11.5 False 275 137 \n", "3 157142 8.0 False 82519 9 \n", "4 1341 8.5 False 9 93 \n", "\n", " created_at_products updated_at_products \\\n", "0 2020-09-03 14:09:43.119798+02:00 2020-09-03 14:09:43.119798+02:00 \n", "1 2020-09-03 13:21:22.711773+02:00 2020-09-03 13:21:22.711773+02:00 \n", "2 2020-09-03 14:46:33.589030+02:00 2020-09-03 14:46:33.589030+02:00 \n", "3 2022-01-28 19:29:23.525722+01:00 2022-01-28 19:29:23.525722+01:00 \n", "4 2020-09-03 13:29:30.773089+02:00 2020-09-03 13:29:30.773089+02:00 \n", "\n", " category_id apply_price products_group_id product_pack_id \\\n", "0 41 0.0 10655 1 \n", "1 1 0.0 471 1 \n", "2 1 0.0 20825 1 \n", "3 5 0.0 156773 1 \n", "4 1 0.0 1175 1 \n", "\n", " extra_field_products amount_consumption identifier_products \\\n", "0 NaN NaN 35c88f2db8a63d7474e46eb8ca9260e7 \n", "1 NaN NaN 8a179671ab198e570e6a104c4451379f \n", "2 NaN NaN ee83779ce29e67ad251e40234b426d6a \n", "3 NaN NaN d865383579314b791aa4bcf3fb418f17 \n", "4 NaN NaN f1c4689bc47dee6f60b56d74b593dd46 \n", "\n", " id_pricing_formula name \\\n", "0 114 visite guidée indiv tr \n", "1 131 billet mucem tp - picasso \n", "2 137 billet mucem tp - pic. + guide \n", "3 9 billet mucem ts \n", "4 93 billet mucem tr + guide \n", "\n", " created_at_pricing_formula updated_at_pricing_formula \\\n", "0 2020-09-03 13:21:21.397923+02:00 2022-02-18 15:57:55.792581+01:00 \n", "1 2020-09-03 13:21:21.410521+02:00 2022-02-18 15:57:55.792581+01:00 \n", "2 2020-09-03 13:21:21.414922+02:00 2022-02-18 15:57:55.792581+01:00 \n", "3 2020-09-03 13:11:23.997074+02:00 2022-02-18 15:57:55.792581+01:00 \n", "4 2020-09-03 13:21:21.382586+02:00 2022-02-18 15:57:55.792581+01:00 \n", "\n", " extra_field_pricing_formula identifier_pricing_formula \n", "0 NaN 039003ae0b0cd2df183786804b713470 \n", "1 NaN 916c92362b8e7767715bc966de43e32b \n", "2 NaN 91dd2ee46f5f2d78af989019ebd60a60 \n", "3 NaN 37532348a1f1ef1cb916c55ebaf8345c \n", "4 NaN 722a8610e4391c27f2ec6f1224b42b3d " ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base1.head(5)" ] }, { "cell_type": "code", "execution_count": 77, "id": "d8ef07cf-29a3-40fb-a3d7-bf32a729c053", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id 0\n", "percent_price 0\n", "max_price 0\n", "min_price 0\n", "category_id 0\n", "pricing_formula_id 0\n", "representation_id 0\n", "created_at 0\n", "updated_at 0\n", "dtype: int64" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "products_groups.isna().sum()" ] }, { "cell_type": "code", "execution_count": 78, "id": "75be3a30-3114-432d-87d6-697533c3c871", "metadata": {}, "outputs": [], "source": [ "#Merge2 entre base1 et products_groups\n", "base2=base1.merge(products_groups, how='left', left_on= 'id_pricing_formula', right_on= 'id', suffixes = (\"_merge2\", \"_product_group\"))" ] }, { "cell_type": "code", "execution_count": 81, "id": "34a169c6-07a8-4ac3-a9e1-d7e7461f7310", "metadata": {}, "outputs": [], "source": [ "#Merge3 entre base2 et type_of_pricing_formulas\n", "base3=base2.merge(type_of_pricing_formulas, how='left', left_on= 'id_pricing_formula', right_on= 'pricing_formula_id', suffixes = (\"_merge3\", \"_type_of_pricing_f\"))" ] }, { "cell_type": "code", "execution_count": 82, "id": "508a8082-da34-4d2f-bfa9-9ca7703c3fda", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id 0\n", "type_of_id 0\n", "pricing_formula_id 0\n", "created_at 0\n", "updated_at 0\n", "identifier 0\n", "dtype: int64" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type_of_pricing_formulas.isna().sum()" ] }, { "cell_type": "code", "execution_count": 83, "id": "0e255d11-57eb-47ca-afd3-31f59b8d8822", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id 0\n", "amount 0\n", "is_full_price 0\n", "representation_id 0\n", "pricing_formula_id 0\n", "created_at 0\n", "updated_at 0\n", "category_id 0\n", "apply_price 0\n", "products_group_id 0\n", "product_pack_id 0\n", "extra_field 94803\n", "amount_consumption 94803\n", "identifier 0\n", "dtype: int64" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "products.isna().sum()" ] }, { "cell_type": "code", "execution_count": 84, "id": "3f034e65-1b33-4908-b7e6-60660ef251c6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id 0\n", "name 1\n", "type_of 0\n", "created_at 0\n", "updated_at 0\n", "identifier 0\n", "dtype: int64" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "product_packs.isna().sum()" ] }, { "cell_type": "code", "execution_count": 85, "id": "f44f40d2-5304-4931-b7e6-fcc06b2657b6", "metadata": {}, "outputs": [], "source": [ "#Merge4 entre base3 et type_of_pricing_formulas\n", "base4=base3.merge(product_packs, how='left', left_on= 'product_pack_id', right_on= 'id', suffixes = (\"_merge4\", \"_product_pack\"))" ] }, { "cell_type": "code", "execution_count": 87, "id": "a28772c3-7bc1-46b4-acc8-1388dc60ec98", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id_productsamountis_full_pricerepresentation_id_merge2pricing_formula_id_merge2created_at_productsupdated_at_productscategory_id_merge2apply_priceproducts_group_id...pricing_formula_idcreated_at_type_of_pricing_fupdated_at_type_of_pricing_fidentifier_merge4idname_product_packtype_ofcreated_atupdated_atidentifier_product_pack
0106829.0False9141142020-09-03 14:09:43.119798+02:002020-09-03 14:09:43.119798+02:00410.010655...114.02021-02-15 17:02:27.395376+01:002021-02-15 17:02:27.395376+01:003706121eb9f43b635bef1433c06f679c1NaN02020-09-03 13:11:24.501197+02:002020-09-03 13:11:24.501197+02:00a764b4bf13a360c7ac2a35ec4ca96c95
14789.5False2731312020-09-03 13:21:22.711773+02:002020-09-03 13:21:22.711773+02:0010.0471...131.02021-02-05 11:52:05.923905+01:002021-02-05 11:52:05.923905+01:000aceb248607671792298436004b952751NaN02020-09-03 13:11:24.501197+02:002020-09-03 13:11:24.501197+02:00a764b4bf13a360c7ac2a35ec4ca96c95
22087311.5False2751372020-09-03 14:46:33.589030+02:002020-09-03 14:46:33.589030+02:0010.020825...137.02021-02-05 11:52:05.939898+01:002021-02-05 11:52:05.939898+01:0093002d4637331edd81ffc28b6e8e89c01NaN02020-09-03 13:11:24.501197+02:002020-09-03 13:11:24.501197+02:00a764b4bf13a360c7ac2a35ec4ca96c95
31571428.0False8251992022-01-28 19:29:23.525722+01:002022-01-28 19:29:23.525722+01:0050.0156773...9.02021-02-05 11:52:06.107939+01:002021-02-05 11:52:06.107939+01:007d0b25bdfff9f366da8be820608c81911NaN02020-09-03 13:11:24.501197+02:002020-09-03 13:11:24.501197+02:00a764b4bf13a360c7ac2a35ec4ca96c95
413418.5False9932020-09-03 13:29:30.773089+02:002020-09-03 13:29:30.773089+02:0010.01175...93.02021-02-05 11:52:06.004162+01:002021-02-05 11:52:06.004162+01:001dbb0795e8f47cb75ba7cdb08c06be5f1NaN02020-09-03 13:11:24.501197+02:002020-09-03 13:11:24.501197+02:00a764b4bf13a360c7ac2a35ec4ca96c95
\n", "

5 rows × 41 columns

\n", "
" ], "text/plain": [ " id_products amount is_full_price representation_id_merge2 \\\n", "0 10682 9.0 False 914 \n", "1 478 9.5 False 273 \n", "2 20873 11.5 False 275 \n", "3 157142 8.0 False 82519 \n", "4 1341 8.5 False 9 \n", "\n", " pricing_formula_id_merge2 created_at_products \\\n", "0 114 2020-09-03 14:09:43.119798+02:00 \n", "1 131 2020-09-03 13:21:22.711773+02:00 \n", "2 137 2020-09-03 14:46:33.589030+02:00 \n", "3 9 2022-01-28 19:29:23.525722+01:00 \n", "4 93 2020-09-03 13:29:30.773089+02:00 \n", "\n", " updated_at_products category_id_merge2 apply_price \\\n", "0 2020-09-03 14:09:43.119798+02:00 41 0.0 \n", "1 2020-09-03 13:21:22.711773+02:00 1 0.0 \n", "2 2020-09-03 14:46:33.589030+02:00 1 0.0 \n", "3 2022-01-28 19:29:23.525722+01:00 5 0.0 \n", "4 2020-09-03 13:29:30.773089+02:00 1 0.0 \n", "\n", " products_group_id ... pricing_formula_id \\\n", "0 10655 ... 114.0 \n", "1 471 ... 131.0 \n", "2 20825 ... 137.0 \n", "3 156773 ... 9.0 \n", "4 1175 ... 93.0 \n", "\n", " created_at_type_of_pricing_f updated_at_type_of_pricing_f \\\n", "0 2021-02-15 17:02:27.395376+01:00 2021-02-15 17:02:27.395376+01:00 \n", "1 2021-02-05 11:52:05.923905+01:00 2021-02-05 11:52:05.923905+01:00 \n", "2 2021-02-05 11:52:05.939898+01:00 2021-02-05 11:52:05.939898+01:00 \n", "3 2021-02-05 11:52:06.107939+01:00 2021-02-05 11:52:06.107939+01:00 \n", "4 2021-02-05 11:52:06.004162+01:00 2021-02-05 11:52:06.004162+01:00 \n", "\n", " identifier_merge4 id name_product_pack type_of \\\n", "0 3706121eb9f43b635bef1433c06f679c 1 NaN 0 \n", "1 0aceb248607671792298436004b95275 1 NaN 0 \n", "2 93002d4637331edd81ffc28b6e8e89c0 1 NaN 0 \n", "3 7d0b25bdfff9f366da8be820608c8191 1 NaN 0 \n", "4 1dbb0795e8f47cb75ba7cdb08c06be5f 1 NaN 0 \n", "\n", " created_at updated_at \\\n", "0 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n", "1 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n", "2 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n", "3 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n", "4 2020-09-03 13:11:24.501197+02:00 2020-09-03 13:11:24.501197+02:00 \n", "\n", " identifier_product_pack \n", "0 a764b4bf13a360c7ac2a35ec4ca96c95 \n", "1 a764b4bf13a360c7ac2a35ec4ca96c95 \n", "2 a764b4bf13a360c7ac2a35ec4ca96c95 \n", "3 a764b4bf13a360c7ac2a35ec4ca96c95 \n", "4 a764b4bf13a360c7ac2a35ec4ca96c95 \n", "\n", "[5 rows x 41 columns]" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base4.head(5)" ] }, { "cell_type": "code", "execution_count": null, "id": "03442997-806f-4285-a139-3bad46bb4522", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d22a0d75-53c5-4b54-9060-c9e7c307fb13", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }