diff --git a/Traitement_Fanta.ipynb b/Traitement_Fanta.ipynb
index a456ad0..c373bd7 100644
--- a/Traitement_Fanta.ipynb
+++ b/Traitement_Fanta.ipynb
@@ -10,7 +10,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 3,
"id": "ae3af8e6-ced8-4994-8877-fa98d4297cc0",
"metadata": {},
"outputs": [],
@@ -29,7 +29,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"id": "b6035982-9ff4-4013-9792-2d50e10db3d1",
"metadata": {},
"outputs": [
@@ -66,7 +66,7 @@
" 'bdc2324-data/1/1type_ofs.csv']"
]
},
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -84,7 +84,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"id": "b86c935d-124f-453f-80dd-83ea6770d09c",
"metadata": {},
"outputs": [],
@@ -94,7 +94,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 6,
"id": "f6d0b27c-0ecd-406b-b042-6c3802dd68fd",
"metadata": {},
"outputs": [
@@ -102,7 +102,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1054/1008972637.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ "/tmp/ipykernel_432/1008972637.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n"
]
}
@@ -117,7 +117,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 7,
"id": "2a6b5e22-3370-457f-83b7-dd1e13663229",
"metadata": {},
"outputs": [
@@ -127,7 +127,7 @@
"'bdc2324-data/1/1type_ofs.csv'"
]
},
- "execution_count": 11,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -136,6 +136,22 @@
"FILE_PATH_S3_fanta"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "79012186-ea51-4252-843e-36a9bbe3847e",
+ "metadata": {},
+ "source": [
+ "# Analyse exploratoire "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1a365f29-4766-47d8-9796-24a5271867b2",
+ "metadata": {},
+ "source": [
+ "## I. Base type_of_pricing_formulas"
+ ]
+ },
{
"cell_type": "markdown",
"id": "bcc14f93-2289-44eb-816b-a51049b258df",
@@ -145,21 +161,17 @@
]
},
{
- "cell_type": "code",
- "execution_count": 12,
- "id": "7f8083ec-3d08-4c4e-8d26-a5a4948c1c02",
+ "cell_type": "raw",
+ "id": "ab2ec4c4-9d38-4aeb-8202-9116df3cdd66",
"metadata": {},
- "outputs": [],
"source": [
"dic_prod_princing=['type_of_pricing_formulas','products_groups','pricing_formulas','product_packs','products']"
]
},
{
- "cell_type": "code",
- "execution_count": 16,
- "id": "a6de36fa-3d35-4b20-97f2-3e24d54c7f99",
+ "cell_type": "markdown",
+ "id": "88759b4a-2633-478d-abce-29abeac376d1",
"metadata": {},
- "outputs": [],
"source": [
"def verifier_donnees_manquantes(base):\n",
" donnees_manquantes = base.isna().sum()\n",
@@ -168,24 +180,9 @@
]
},
{
- "cell_type": "code",
- "execution_count": 17,
- "id": "1c261736-11fb-44f4-a4b1-830cae755a65",
+ "cell_type": "markdown",
+ "id": "df3075b4-1490-4cf2-a3fe-c6d4e2144ae3",
"metadata": {},
- "outputs": [
- {
- "ename": "AttributeError",
- "evalue": "'str' object has no attribute 'isna'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[17], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m nom_base \u001b[38;5;129;01min\u001b[39;00m dic_prod_princing:\n\u001b[0;32m----> 2\u001b[0m \u001b[43mverifier_donnees_manquantes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnom_base\u001b[49m\u001b[43m)\u001b[49m\n",
- "Cell \u001b[0;32mIn[16], line 2\u001b[0m, in \u001b[0;36mverifier_donnees_manquantes\u001b[0;34m(base)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mverifier_donnees_manquantes\u001b[39m(base):\n\u001b[0;32m----> 2\u001b[0m donnees_manquantes \u001b[38;5;241m=\u001b[39m \u001b[43mbase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43misna\u001b[49m()\u001b[38;5;241m.\u001b[39msum()\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDonnées manquantes pour la base :\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(donnees_manquantes)\n",
- "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'isna'"
- ]
- }
- ],
"source": [
"for nom_base in dic_prod_princing:\n",
" verifier_donnees_manquantes(nom_base)"
@@ -193,7 +190,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 8,
"id": "e0c67c01-e837-4772-b070-d1be0d895a36",
"metadata": {},
"outputs": [
@@ -209,19 +206,942 @@
"dtype: int64"
]
},
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#detection des Nan d\n",
+ "\n",
+ "type_of_pricing_formulas.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "3eaffaa6-1164-4ee9-a671-8b5eb3df797d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " type_of_id | \n",
+ " pricing_formula_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 127 | \n",
+ " 2021-01-05 11:55:51.226960+01:00 | \n",
+ " 2021-01-05 11:55:51.226960+01:00 | \n",
+ " cf2918b25e6dcf8c30798ca05c8ec8ed | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2425 | \n",
+ " 2021-01-05 11:55:51.235606+01:00 | \n",
+ " 2021-01-05 11:55:51.235606+01:00 | \n",
+ " 2c8ee3f7c1487d792b6c946314e681f2 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2937 | \n",
+ " 2021-01-05 11:55:51.240114+01:00 | \n",
+ " 2021-01-05 11:55:51.240114+01:00 | \n",
+ " 44e55c85e4eb59b3c3c01c137a6b25fc | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 48 | \n",
+ " 2021-01-05 11:55:51.244638+01:00 | \n",
+ " 2021-01-05 11:55:51.244638+01:00 | \n",
+ " ee3bb93b7e2217cd86a49d547fedf6c6 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ " 2021-01-05 11:55:51.249409+01:00 | \n",
+ " 2021-01-05 11:55:51.249409+01:00 | \n",
+ " ae701668574f1a653d2b21ddfd250620 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 563 | \n",
+ " 564 | \n",
+ " 4 | \n",
+ " 6656 | \n",
+ " 2022-02-18 16:15:58.872249+01:00 | \n",
+ " 2022-02-18 16:15:58.872249+01:00 | \n",
+ " f669824cdca9de9697f07ff3ba365a8d | \n",
+ "
\n",
+ " \n",
+ " 564 | \n",
+ " 565 | \n",
+ " 4 | \n",
+ " 6607 | \n",
+ " 2022-02-18 16:15:59.231018+01:00 | \n",
+ " 2022-02-18 16:15:59.231018+01:00 | \n",
+ " 6421c8146a598758139153b0e7b921ea | \n",
+ "
\n",
+ " \n",
+ " 565 | \n",
+ " 566 | \n",
+ " 4 | \n",
+ " 6700 | \n",
+ " 2022-02-18 16:15:59.724812+01:00 | \n",
+ " 2022-02-18 16:15:59.724812+01:00 | \n",
+ " 6823f6d4d80b322fbfb8b83545a9f96d | \n",
+ "
\n",
+ " \n",
+ " 566 | \n",
+ " 567 | \n",
+ " 4 | \n",
+ " 8118 | \n",
+ " 2022-02-18 16:16:00.163381+01:00 | \n",
+ " 2022-02-18 16:16:00.163381+01:00 | \n",
+ " 35cfc12584b4d1b94795d97fd0aa56e8 | \n",
+ "
\n",
+ " \n",
+ " 567 | \n",
+ " 569 | \n",
+ " 7 | \n",
+ " 48157 | \n",
+ " 2023-03-13 11:30:29.480161+01:00 | \n",
+ " 2023-03-13 11:30:29.480161+01:00 | \n",
+ " 55863541f33fd229ac9b54d9ec1f4874 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
568 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id type_of_id pricing_formula_id created_at \\\n",
+ "0 1 1 127 2021-01-05 11:55:51.226960+01:00 \n",
+ "1 2 1 2425 2021-01-05 11:55:51.235606+01:00 \n",
+ "2 3 1 2937 2021-01-05 11:55:51.240114+01:00 \n",
+ "3 4 1 48 2021-01-05 11:55:51.244638+01:00 \n",
+ "4 5 1 7 2021-01-05 11:55:51.249409+01:00 \n",
+ ".. ... ... ... ... \n",
+ "563 564 4 6656 2022-02-18 16:15:58.872249+01:00 \n",
+ "564 565 4 6607 2022-02-18 16:15:59.231018+01:00 \n",
+ "565 566 4 6700 2022-02-18 16:15:59.724812+01:00 \n",
+ "566 567 4 8118 2022-02-18 16:16:00.163381+01:00 \n",
+ "567 569 7 48157 2023-03-13 11:30:29.480161+01:00 \n",
+ "\n",
+ " updated_at identifier \n",
+ "0 2021-01-05 11:55:51.226960+01:00 cf2918b25e6dcf8c30798ca05c8ec8ed \n",
+ "1 2021-01-05 11:55:51.235606+01:00 2c8ee3f7c1487d792b6c946314e681f2 \n",
+ "2 2021-01-05 11:55:51.240114+01:00 44e55c85e4eb59b3c3c01c137a6b25fc \n",
+ "3 2021-01-05 11:55:51.244638+01:00 ee3bb93b7e2217cd86a49d547fedf6c6 \n",
+ "4 2021-01-05 11:55:51.249409+01:00 ae701668574f1a653d2b21ddfd250620 \n",
+ ".. ... ... \n",
+ "563 2022-02-18 16:15:58.872249+01:00 f669824cdca9de9697f07ff3ba365a8d \n",
+ "564 2022-02-18 16:15:59.231018+01:00 6421c8146a598758139153b0e7b921ea \n",
+ "565 2022-02-18 16:15:59.724812+01:00 6823f6d4d80b322fbfb8b83545a9f96d \n",
+ "566 2022-02-18 16:16:00.163381+01:00 35cfc12584b4d1b94795d97fd0aa56e8 \n",
+ "567 2023-03-13 11:30:29.480161+01:00 55863541f33fd229ac9b54d9ec1f4874 \n",
+ "\n",
+ "[568 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type_of_pricing_formulas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "57298669-8d55-40d5-a5aa-4c5df984eec7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "type_of_id int64\n",
+ "pricing_formula_id int64\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "identifier object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#type des variables\n",
+ "\n",
+ "type_of_pricing_formulas.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "c11850cb-8833-44c0-a11d-9695d620a42b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " type_of_id | \n",
+ " pricing_formula_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [id, type_of_id, pricing_formula_id, created_at, updated_at, identifier]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Identification des doublons\n",
+ "type_of_pricing_formulas.loc[type_of_pricing_formulas['id'].duplicated(keep=False),:]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7a40de03-5e18-4d3d-a0f8-da960c29fad8",
+ "metadata": {},
+ "source": [
+ "## II.products_groups"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "89909175-6734-4e8e-8632-d6f8ca812388",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0\n",
+ "percent_price 0\n",
+ "max_price 0\n",
+ "min_price 0\n",
+ "category_id 0\n",
+ "pricing_formula_id 0\n",
+ "representation_id 0\n",
+ "created_at 0\n",
+ "updated_at 0\n",
+ "dtype: int64"
+ ]
+ },
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "type_of_pricing_formulas.isna().sum()"
+ "#detection des Nan \n",
+ "\n",
+ "products_groups.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "6a187170-96c4-48d2-9568-b270f67e2c27",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "percent_price float64\n",
+ "max_price float64\n",
+ "min_price float64\n",
+ "category_id int64\n",
+ "pricing_formula_id int64\n",
+ "representation_id int64\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#type des variables\n",
+ "\n",
+ "products_groups.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "2fba2cb0-a6a4-43b2-a854-3be07939c28b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " percent_price | \n",
+ " max_price | \n",
+ " min_price | \n",
+ " category_id | \n",
+ " pricing_formula_id | \n",
+ " representation_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [id, percent_price, max_price, min_price, category_id, pricing_formula_id, representation_id, created_at, updated_at]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Identification des doublons\n",
+ "products_groups.loc[products_groups[['id','pricing_formula_id','representation_id']].duplicated(keep=False),:]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5312ac13-8fbd-4c3f-a98a-8c28f079a599",
+ "metadata": {},
+ "source": [
+ "## III.pricing_formulas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "3383a773-0817-4b23-84e7-8d5d0c74b179",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " extra_field | \n",
+ " identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 41909 | \n",
+ " visite mécènes 1h30 | \n",
+ " 2022-07-08 07:08:26.802266+02:00 | \n",
+ " 2022-07-08 07:08:26.802266+02:00 | \n",
+ " NaN | \n",
+ " 21d4b0043c12b21952b0797d140991a1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 502 | \n",
+ " entree mucem tp( expo picasso) | \n",
+ " 2020-09-03 13:43:59.816765+02:00 | \n",
+ " 2022-02-18 15:57:55.792581+01:00 | \n",
+ " NaN | \n",
+ " 223b09e6c3f1f75dbf8df019af97a555 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 504 | \n",
+ " nombre de personnes cinema | \n",
+ " 2020-09-03 13:43:59.818198+02:00 | \n",
+ " 2021-01-25 19:16:05.187114+01:00 | \n",
+ " NaN | \n",
+ " ba33b7b6d225a75d713a356b49c4d915 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 117 | \n",
+ " spectacle tarif e famille tr | \n",
+ " 2020-09-03 13:21:21.400249+02:00 | \n",
+ " 2023-03-13 11:30:29.525335+01:00 | \n",
+ " NaN | \n",
+ " a00b61ad933518856f86e63ca91a5750 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1496 | \n",
+ " billet nb famille mecene 1a | \n",
+ " 2020-09-03 14:29:33.320952+02:00 | \n",
+ " 2021-01-25 19:23:06.816402+01:00 | \n",
+ " NaN | \n",
+ " 7f6013803c242253a5ccde80f780984f | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 551 | \n",
+ " 529 | \n",
+ " billet nb expo gr | \n",
+ " 2020-09-03 13:43:59.835944+02:00 | \n",
+ " 2022-02-18 15:57:55.792581+01:00 | \n",
+ " NaN | \n",
+ " 7d888e42abe101fc8b21dc88948c8b74 | \n",
+ "
\n",
+ " \n",
+ " 552 | \n",
+ " 3153 | \n",
+ " nb pers visite scolaire rep | \n",
+ " 2020-09-03 16:32:37.068864+02:00 | \n",
+ " 2022-02-18 15:57:55.792581+01:00 | \n",
+ " NaN | \n",
+ " 3cf21731c25eee650d5b232ee4780563 | \n",
+ "
\n",
+ " \n",
+ " 553 | \n",
+ " 5847 | \n",
+ " visite scolaire rep1h00 | \n",
+ " 2021-06-09 18:10:49.742531+02:00 | \n",
+ " 2022-02-18 15:55:03.576236+01:00 | \n",
+ " NaN | \n",
+ " a7bb5a6892d55f0d5ee4ce5786ae5fc6 | \n",
+ "
\n",
+ " \n",
+ " 554 | \n",
+ " 5840 | \n",
+ " france billet - entree ts | \n",
+ " 2021-06-09 18:10:49.737576+02:00 | \n",
+ " 2022-02-18 16:16:00.199543+01:00 | \n",
+ " NaN | \n",
+ " 4c53016fc65847646f600eff853593e5 | \n",
+ "
\n",
+ " \n",
+ " 555 | \n",
+ " 5863 | \n",
+ " france billet - entree tp | \n",
+ " 2021-06-09 18:12:49.269924+02:00 | \n",
+ " 2022-02-18 16:16:00.199543+01:00 | \n",
+ " NaN | \n",
+ " 90e642c0e1ef6bc9f2bc43089798de00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
556 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name created_at \\\n",
+ "0 41909 visite mécènes 1h30 2022-07-08 07:08:26.802266+02:00 \n",
+ "1 502 entree mucem tp( expo picasso) 2020-09-03 13:43:59.816765+02:00 \n",
+ "2 504 nombre de personnes cinema 2020-09-03 13:43:59.818198+02:00 \n",
+ "3 117 spectacle tarif e famille tr 2020-09-03 13:21:21.400249+02:00 \n",
+ "4 1496 billet nb famille mecene 1a 2020-09-03 14:29:33.320952+02:00 \n",
+ ".. ... ... ... \n",
+ "551 529 billet nb expo gr 2020-09-03 13:43:59.835944+02:00 \n",
+ "552 3153 nb pers visite scolaire rep 2020-09-03 16:32:37.068864+02:00 \n",
+ "553 5847 visite scolaire rep1h00 2021-06-09 18:10:49.742531+02:00 \n",
+ "554 5840 france billet - entree ts 2021-06-09 18:10:49.737576+02:00 \n",
+ "555 5863 france billet - entree tp 2021-06-09 18:12:49.269924+02:00 \n",
+ "\n",
+ " updated_at extra_field \\\n",
+ "0 2022-07-08 07:08:26.802266+02:00 NaN \n",
+ "1 2022-02-18 15:57:55.792581+01:00 NaN \n",
+ "2 2021-01-25 19:16:05.187114+01:00 NaN \n",
+ "3 2023-03-13 11:30:29.525335+01:00 NaN \n",
+ "4 2021-01-25 19:23:06.816402+01:00 NaN \n",
+ ".. ... ... \n",
+ "551 2022-02-18 15:57:55.792581+01:00 NaN \n",
+ "552 2022-02-18 15:57:55.792581+01:00 NaN \n",
+ "553 2022-02-18 15:55:03.576236+01:00 NaN \n",
+ "554 2022-02-18 16:16:00.199543+01:00 NaN \n",
+ "555 2022-02-18 16:16:00.199543+01:00 NaN \n",
+ "\n",
+ " identifier \n",
+ "0 21d4b0043c12b21952b0797d140991a1 \n",
+ "1 223b09e6c3f1f75dbf8df019af97a555 \n",
+ "2 ba33b7b6d225a75d713a356b49c4d915 \n",
+ "3 a00b61ad933518856f86e63ca91a5750 \n",
+ "4 7f6013803c242253a5ccde80f780984f \n",
+ ".. ... \n",
+ "551 7d888e42abe101fc8b21dc88948c8b74 \n",
+ "552 3cf21731c25eee650d5b232ee4780563 \n",
+ "553 a7bb5a6892d55f0d5ee4ce5786ae5fc6 \n",
+ "554 4c53016fc65847646f600eff853593e5 \n",
+ "555 90e642c0e1ef6bc9f2bc43089798de00 \n",
+ "\n",
+ "[556 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pricing_formulas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "d8130c73-6c5f-45b1-93ae-db7679c8ca56",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0.0\n",
+ "name 0.0\n",
+ "created_at 0.0\n",
+ "updated_at 0.0\n",
+ "extra_field 1.0\n",
+ "identifier 0.0\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#detection des Nan \n",
+ "\n",
+ "pricing_formulas.isna().sum()/pricing_formulas.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "44f1dbfd-c3cf-464b-9877-f37fcc61da92",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "name object\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "extra_field float64\n",
+ "identifier object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#type des variables\n",
+ "\n",
+ "pricing_formulas.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "6784b41b-da74-4fae-832e-16641ae710c1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " extra_field | \n",
+ " identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [id, name, created_at, updated_at, extra_field, identifier]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Identification des doublons\n",
+ "pricing_formulas.loc[pricing_formulas[['id']].duplicated(keep=False),:]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2145b0a4-b73d-4530-8c12-a78b1cf86eae",
+ "metadata": {},
+ "source": [
+ "## IV. product_packs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "e36b07a7-4f0b-4711-86a0-12a1d8158eef",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0.0\n",
+ "name 1.0\n",
+ "type_of 0.0\n",
+ "created_at 0.0\n",
+ "updated_at 0.0\n",
+ "identifier 0.0\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#detection des Nan \n",
+ "\n",
+ "product_packs.isna().sum()/product_packs.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "8707396a-f86b-476d-a9f9-c39f8de1d02e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "name float64\n",
+ "type_of int64\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "identifier object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#type des variables\n",
+ "\n",
+ "product_packs.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "4b102bd3-924b-43da-8915-be7664c23f97",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " type_of | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [id, name, type_of, created_at, updated_at, identifier]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Identification des doublons\n",
+ "product_packs.loc[product_packs[['id']].duplicated(keep=False),:]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cfe0c525-896b-4731-b38e-306ff6ea0c65",
+ "metadata": {},
+ "source": [
+ "## V.products"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "968beb24-f70c-4eb6-8b1e-4b04bc7fe9c9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0.0\n",
+ "amount 0.0\n",
+ "is_full_price 0.0\n",
+ "representation_id 0.0\n",
+ "pricing_formula_id 0.0\n",
+ "created_at 0.0\n",
+ "updated_at 0.0\n",
+ "category_id 0.0\n",
+ "apply_price 0.0\n",
+ "products_group_id 0.0\n",
+ "product_pack_id 0.0\n",
+ "extra_field 1.0\n",
+ "amount_consumption 1.0\n",
+ "identifier 0.0\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#detection des Nan \n",
+ "\n",
+ "products.isna().sum()/products.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "15bc6ac6-67e8-4e2c-9641-7ee8bb2581a3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "amount float64\n",
+ "is_full_price bool\n",
+ "representation_id int64\n",
+ "pricing_formula_id int64\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "category_id int64\n",
+ "apply_price float64\n",
+ "products_group_id int64\n",
+ "product_pack_id int64\n",
+ "extra_field float64\n",
+ "amount_consumption float64\n",
+ "identifier object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#type des variables\n",
+ "\n",
+ "products.dtypes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "46aad10f-8530-410e-872b-bb253c553a46",
+ "metadata": {},
+ "source": [
+ "# jointure entre les bases"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "57298669-8d55-40d5-a5aa-4c5df984eec7",
+ "id": "eac537e1-bbad-45bc-a85c-12b675da1088",
"metadata": {},
"outputs": [],
"source": []