diff --git a/Brouillon_AJ.ipynb b/Brouillon_AJ.ipynb
index 815f332..8f5529a 100644
--- a/Brouillon_AJ.ipynb
+++ b/Brouillon_AJ.ipynb
@@ -10,7 +10,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "88af2795-8bf9-4df0-a059-be7c28fb4289",
"metadata": {},
"outputs": [],
@@ -29,35 +29,10 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "3ba1f385-2a2f-4b0c-be79-66f618469a9f",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['bdc2324-data/1',\n",
- " 'bdc2324-data/10',\n",
- " 'bdc2324-data/101',\n",
- " 'bdc2324-data/11',\n",
- " 'bdc2324-data/12',\n",
- " 'bdc2324-data/13',\n",
- " 'bdc2324-data/14',\n",
- " 'bdc2324-data/2',\n",
- " 'bdc2324-data/3',\n",
- " 'bdc2324-data/4',\n",
- " 'bdc2324-data/5',\n",
- " 'bdc2324-data/6',\n",
- " 'bdc2324-data/7',\n",
- " 'bdc2324-data/8',\n",
- " 'bdc2324-data/9']"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"import os\n",
"import s3fs\n",
@@ -71,7 +46,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "ba9d04ad-6cc1-4bac-b1a0-44bedfb09763",
"metadata": {},
"outputs": [],
@@ -95,7 +70,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "cacaecc1-4d8a-4e20-8cd3-b452cf17db56",
"metadata": {},
"outputs": [],
@@ -108,23 +83,10 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "2ec4b583-dc64-43e9-b3ae-6bbaee0bc135",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2023-11-09 18:10:45+00:00\n",
- "2020-06-02 08:24:08+00:00\n",
- "2023-10-12 01:39:48+00:00\n",
- "2023-10-10 17:06:29+00:00\n",
- "2023-11-01 09:20:48+00:00\n",
- "2021-03-31 14:59:02+00:00\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Chaque unites correspond à une période ? --> Non, les dossiers ont juste pour but de réduire la taille des fichiers\n",
"print(campaign_stats_1['sent_at'].max())\n",
@@ -139,32 +101,10 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "77894273-b3e5-4f29-bd63-9f4df8082b9b",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 2021-03-28 16:01:09+00:00\n",
- "1 2021-03-28 16:01:09+00:00\n",
- "2 2021-03-28 16:00:59+00:00\n",
- "3 2021-03-28 16:00:59+00:00\n",
- "4 2021-03-28 16:01:06+00:00\n",
- " ... \n",
- "6214803 2023-10-23 09:32:33+00:00\n",
- "6214804 2023-10-23 09:32:49+00:00\n",
- "6214805 2023-10-23 09:33:28+00:00\n",
- "6214806 2023-10-23 09:31:53+00:00\n",
- "6214807 2023-10-23 09:33:54+00:00\n",
- "Name: sent_at, Length: 6214808, dtype: datetime64[ns, UTC]"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"campaign_stats_1['sent_at']"
]
@@ -179,19 +119,10 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "4223c873-cbd3-46d1-ac96-c9a3b9e97092",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_1362/4118060109.py:9: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
- " customers_plus_2 = pd.read_csv(file_in, sep=\",\")\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"FILE_PATH_S3 = 'bdc2324-data/1/1customersplus.csv'\n",
"\n",
@@ -206,32 +137,10 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "460f853a-68c0-42a7-9877-b83d3aaec813",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['id', 'lastname', 'firstname', 'birthdate', 'email', 'street_id',\n",
- " 'created_at', 'updated_at', 'civility', 'is_partner', 'extra',\n",
- " 'deleted_at', 'reference', 'gender', 'is_email_true', 'extra_field',\n",
- " 'identifier', 'opt_in', 'structure_id', 'note', 'profession',\n",
- " 'language', 'mcp_contact_id', 'need_reload', 'last_buying_date',\n",
- " 'max_price', 'ticket_sum', 'average_price', 'fidelity',\n",
- " 'average_purchase_delay', 'average_price_basket',\n",
- " 'average_ticket_basket', 'total_price', 'preferred_category',\n",
- " 'preferred_supplier', 'preferred_formula', 'purchase_count',\n",
- " 'first_buying_date', 'last_visiting_date', 'zipcode', 'country', 'age',\n",
- " 'tenant_id'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"customers_plus_1.columns"
]
@@ -278,21 +187,10 @@
},
{
"cell_type": "code",
- "execution_count": 61,
+ "execution_count": null,
"id": "32fa2215-3c79-40b5-8643-755865959fc7",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "1"
- ]
- },
- "execution_count": 61,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"common_id = set(customers_plus_2['id']).intersection(customers_plus_1['id'])\n",
"# Exemple id commun = caractéristiques communes\n",
@@ -303,70 +201,19 @@
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": null,
"id": "0eb345e4-69f5-4e16-ac57-e33674c6c43d",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "id 0.000000\n",
- "lastname 43.461341\n",
- "firstname 44.995588\n",
- "birthdate 96.419870\n",
- "email 8.622075\n",
- "street_id 0.000000\n",
- "created_at 0.000000\n",
- "updated_at 0.000000\n",
- "civility 100.000000\n",
- "is_partner 0.000000\n",
- "extra 100.000000\n",
- "deleted_at 100.000000\n",
- "reference 100.000000\n",
- "gender 0.000000\n",
- "is_email_true 0.000000\n",
- "extra_field 100.000000\n",
- "identifier 0.000000\n",
- "opt_in 0.000000\n",
- "structure_id 88.072380\n",
- "note 99.403421\n",
- "profession 95.913503\n",
- "language 99.280945\n",
- "mcp_contact_id 34.876141\n",
- "need_reload 0.000000\n",
- "last_buying_date 51.653431\n",
- "max_price 51.653431\n",
- "ticket_sum 0.000000\n",
- "average_price 8.639195\n",
- "fidelity 0.000000\n",
- "average_purchase_delay 51.653431\n",
- "average_price_basket 51.653431\n",
- "average_ticket_basket 51.653431\n",
- "total_price 43.014236\n",
- "preferred_category 100.000000\n",
- "preferred_supplier 100.000000\n",
- "preferred_formula 100.000000\n",
- "purchase_count 0.000000\n",
- "first_buying_date 51.653431\n",
- "last_visiting_date 100.000000\n",
- "zipcode 71.176564\n",
- "country 5.459418\n",
- "age 96.419870\n",
- "tenant_id 0.000000\n",
- "dtype: float64\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "pd.DataFrame(customers_plus_1.isna().mean()*100)"
+ "customers_plus_1.isna().mean()*100"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "6f6ce60d-0912-497d-9108-330acccef394",
"metadata": {},
"outputs": [],
@@ -382,413 +229,12 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "fa8ee17d-5092-40ac-8a0a-3790b016dd4e",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " lastname | \n",
- " firstname | \n",
- " birthdate | \n",
- " email | \n",
- " street_id | \n",
- " created_at | \n",
- " updated_at | \n",
- " civility | \n",
- " is_partner | \n",
- " ... | \n",
- " tenant_id | \n",
- " id_x | \n",
- " customer_id | \n",
- " purchase_date | \n",
- " type_of | \n",
- " is_from_subscription | \n",
- " amount | \n",
- " is_full_price | \n",
- " start_date_time | \n",
- " event_name | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 405082 | \n",
- " lastname405082 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 6 | \n",
- " 2023-01-12 06:30:31.197484+01:00 | \n",
- " 2023-01-12 06:30:31.197484+01:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " 1556 | \n",
- " 992423 | \n",
- " 405082 | \n",
- " 2023-01-11 17:08:41+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 13.0 | \n",
- " False | \n",
- " 2023-02-06 20:00:00+01:00 | \n",
- " zaide | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 405082 | \n",
- " lastname405082 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 6 | \n",
- " 2023-01-12 06:30:31.197484+01:00 | \n",
- " 2023-01-12 06:30:31.197484+01:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " 1556 | \n",
- " 992423 | \n",
- " 405082 | \n",
- " 2023-01-11 17:08:41+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 13.0 | \n",
- " False | \n",
- " 2023-02-06 20:00:00+01:00 | \n",
- " zaide | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 411168 | \n",
- " lastname411168 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 6 | \n",
- " 2023-03-17 06:30:35.431967+01:00 | \n",
- " 2023-03-17 06:30:35.431967+01:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " 1556 | \n",
- " 1053934 | \n",
- " 411168 | \n",
- " 2023-03-16 16:23:10+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 62.0 | \n",
- " False | \n",
- " 2023-03-19 16:00:00+01:00 | \n",
- " luisa miller | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 411168 | \n",
- " lastname411168 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 6 | \n",
- " 2023-03-17 06:30:35.431967+01:00 | \n",
- " 2023-03-17 06:30:35.431967+01:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " 1556 | \n",
- " 1053934 | \n",
- " 411168 | \n",
- " 2023-03-16 16:23:10+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 62.0 | \n",
- " False | \n",
- " 2023-03-19 16:00:00+01:00 | \n",
- " luisa miller | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 4380 | \n",
- " lastname4380 | \n",
- " firstname4380 | \n",
- " NaN | \n",
- " NaN | \n",
- " 1 | \n",
- " 2021-04-22 14:51:55.432952+02:00 | \n",
- " 2022-04-14 11:41:33.738500+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " 1556 | \n",
- " 1189141 | \n",
- " 4380 | \n",
- " 2020-11-26 13:12:53+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 51.3 | \n",
- " False | \n",
- " 2020-12-01 20:00:00+01:00 | \n",
- " iphigenie en tauride | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 318964 | \n",
- " 19095 | \n",
- " lastname19095 | \n",
- " firstname19095 | \n",
- " 1979-07-16 | \n",
- " email19095 | \n",
- " 6 | \n",
- " 2021-04-22 15:06:30.120537+02:00 | \n",
- " 2023-09-12 18:27:36.904104+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " 1556 | \n",
- " 1090839 | \n",
- " 19095 | \n",
- " 2019-05-19 21:18:36+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 4.5 | \n",
- " False | \n",
- " 2019-05-27 20:00:00+02:00 | \n",
- " entre femmes | \n",
- "
\n",
- " \n",
- " 318965 | \n",
- " 19095 | \n",
- " lastname19095 | \n",
- " firstname19095 | \n",
- " 1979-07-16 | \n",
- " email19095 | \n",
- " 6 | \n",
- " 2021-04-22 15:06:30.120537+02:00 | \n",
- " 2023-09-12 18:27:36.904104+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " 1556 | \n",
- " 1090839 | \n",
- " 19095 | \n",
- " 2019-05-19 21:18:36+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 4.5 | \n",
- " False | \n",
- " 2019-05-27 20:00:00+02:00 | \n",
- " entre femmes | \n",
- "
\n",
- " \n",
- " 318966 | \n",
- " 19095 | \n",
- " lastname19095 | \n",
- " firstname19095 | \n",
- " 1979-07-16 | \n",
- " email19095 | \n",
- " 6 | \n",
- " 2021-04-22 15:06:30.120537+02:00 | \n",
- " 2023-09-12 18:27:36.904104+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " 1556 | \n",
- " 1090839 | \n",
- " 19095 | \n",
- " 2019-05-19 21:18:36+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 4.5 | \n",
- " False | \n",
- " 2019-05-27 20:00:00+02:00 | \n",
- " entre femmes | \n",
- "
\n",
- " \n",
- " 318967 | \n",
- " 19095 | \n",
- " lastname19095 | \n",
- " firstname19095 | \n",
- " 1979-07-16 | \n",
- " email19095 | \n",
- " 6 | \n",
- " 2021-04-22 15:06:30.120537+02:00 | \n",
- " 2023-09-12 18:27:36.904104+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " 1556 | \n",
- " 1244277 | \n",
- " 19095 | \n",
- " 2019-12-31 11:04:07+01:00 | \n",
- " 1 | \n",
- " False | \n",
- " 5.5 | \n",
- " False | \n",
- " 2020-02-03 20:00:00+01:00 | \n",
- " a boire et a manger | \n",
- "
\n",
- " \n",
- " 318968 | \n",
- " 19095 | \n",
- " lastname19095 | \n",
- " firstname19095 | \n",
- " 1979-07-16 | \n",
- " email19095 | \n",
- " 6 | \n",
- " 2021-04-22 15:06:30.120537+02:00 | \n",
- " 2023-09-12 18:27:36.904104+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " 1556 | \n",
- " 1244277 | \n",
- " 19095 | \n",
- " 2019-12-31 11:04:07+01:00 | \n",
- " 1 | \n",
- " False | \n",
- " 5.5 | \n",
- " False | \n",
- " 2020-02-03 20:00:00+01:00 | \n",
- " a boire et a manger | \n",
- "
\n",
- " \n",
- "
\n",
- "
318969 rows × 52 columns
\n",
- "
"
- ],
- "text/plain": [
- " id lastname firstname birthdate email \\\n",
- "0 405082 lastname405082 NaN NaN NaN \n",
- "1 405082 lastname405082 NaN NaN NaN \n",
- "2 411168 lastname411168 NaN NaN NaN \n",
- "3 411168 lastname411168 NaN NaN NaN \n",
- "4 4380 lastname4380 firstname4380 NaN NaN \n",
- "... ... ... ... ... ... \n",
- "318964 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
- "318965 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
- "318966 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
- "318967 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
- "318968 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
- "\n",
- " street_id created_at \\\n",
- "0 6 2023-01-12 06:30:31.197484+01:00 \n",
- "1 6 2023-01-12 06:30:31.197484+01:00 \n",
- "2 6 2023-03-17 06:30:35.431967+01:00 \n",
- "3 6 2023-03-17 06:30:35.431967+01:00 \n",
- "4 1 2021-04-22 14:51:55.432952+02:00 \n",
- "... ... ... \n",
- "318964 6 2021-04-22 15:06:30.120537+02:00 \n",
- "318965 6 2021-04-22 15:06:30.120537+02:00 \n",
- "318966 6 2021-04-22 15:06:30.120537+02:00 \n",
- "318967 6 2021-04-22 15:06:30.120537+02:00 \n",
- "318968 6 2021-04-22 15:06:30.120537+02:00 \n",
- "\n",
- " updated_at civility is_partner ... \\\n",
- "0 2023-01-12 06:30:31.197484+01:00 NaN False ... \n",
- "1 2023-01-12 06:30:31.197484+01:00 NaN False ... \n",
- "2 2023-03-17 06:30:35.431967+01:00 NaN False ... \n",
- "3 2023-03-17 06:30:35.431967+01:00 NaN False ... \n",
- "4 2022-04-14 11:41:33.738500+02:00 NaN False ... \n",
- "... ... ... ... ... \n",
- "318964 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
- "318965 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
- "318966 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
- "318967 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
- "318968 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
- "\n",
- " tenant_id id_x customer_id purchase_date type_of \\\n",
- "0 1556 992423 405082 2023-01-11 17:08:41+01:00 3 \n",
- "1 1556 992423 405082 2023-01-11 17:08:41+01:00 3 \n",
- "2 1556 1053934 411168 2023-03-16 16:23:10+01:00 3 \n",
- "3 1556 1053934 411168 2023-03-16 16:23:10+01:00 3 \n",
- "4 1556 1189141 4380 2020-11-26 13:12:53+01:00 3 \n",
- "... ... ... ... ... ... \n",
- "318964 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
- "318965 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
- "318966 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
- "318967 1556 1244277 19095 2019-12-31 11:04:07+01:00 1 \n",
- "318968 1556 1244277 19095 2019-12-31 11:04:07+01:00 1 \n",
- "\n",
- " is_from_subscription amount is_full_price start_date_time \\\n",
- "0 False 13.0 False 2023-02-06 20:00:00+01:00 \n",
- "1 False 13.0 False 2023-02-06 20:00:00+01:00 \n",
- "2 False 62.0 False 2023-03-19 16:00:00+01:00 \n",
- "3 False 62.0 False 2023-03-19 16:00:00+01:00 \n",
- "4 False 51.3 False 2020-12-01 20:00:00+01:00 \n",
- "... ... ... ... ... \n",
- "318964 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
- "318965 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
- "318966 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
- "318967 False 5.5 False 2020-02-03 20:00:00+01:00 \n",
- "318968 False 5.5 False 2020-02-03 20:00:00+01:00 \n",
- "\n",
- " event_name \n",
- "0 zaide \n",
- "1 zaide \n",
- "2 luisa miller \n",
- "3 luisa miller \n",
- "4 iphigenie en tauride \n",
- "... ... \n",
- "318964 entre femmes \n",
- "318965 entre femmes \n",
- "318966 entre femmes \n",
- "318967 a boire et a manger \n",
- "318968 a boire et a manger \n",
- "\n",
- "[318969 rows x 52 columns]"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Jointure\n",
"merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']]\n",
@@ -810,203 +256,10 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"id": "22bfad2b-d52a-4077-9b39-bee35004e01c",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id_x | \n",
- " purchase_date | \n",
- " type_of | \n",
- " is_from_subscription | \n",
- " amount | \n",
- " is_full_price | \n",
- " start_date_time | \n",
- " event_name | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 992423 | \n",
- " 2023-01-11 17:08:41+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 13.0 | \n",
- " False | \n",
- " 2023-02-06 20:00:00+01:00 | \n",
- " zaide | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 992423 | \n",
- " 2023-01-11 17:08:41+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 13.0 | \n",
- " False | \n",
- " 2023-02-06 20:00:00+01:00 | \n",
- " zaide | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 1053934 | \n",
- " 2023-03-16 16:23:10+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 62.0 | \n",
- " False | \n",
- " 2023-03-19 16:00:00+01:00 | \n",
- " luisa miller | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1053934 | \n",
- " 2023-03-16 16:23:10+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 62.0 | \n",
- " False | \n",
- " 2023-03-19 16:00:00+01:00 | \n",
- " luisa miller | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 1189141 | \n",
- " 2020-11-26 13:12:53+01:00 | \n",
- " 3 | \n",
- " False | \n",
- " 51.3 | \n",
- " False | \n",
- " 2020-12-01 20:00:00+01:00 | \n",
- " iphigenie en tauride | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 318964 | \n",
- " 1090839 | \n",
- " 2019-05-19 21:18:36+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 4.5 | \n",
- " False | \n",
- " 2019-05-27 20:00:00+02:00 | \n",
- " entre femmes | \n",
- "
\n",
- " \n",
- " 318965 | \n",
- " 1090839 | \n",
- " 2019-05-19 21:18:36+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 4.5 | \n",
- " False | \n",
- " 2019-05-27 20:00:00+02:00 | \n",
- " entre femmes | \n",
- "
\n",
- " \n",
- " 318966 | \n",
- " 1090839 | \n",
- " 2019-05-19 21:18:36+02:00 | \n",
- " 1 | \n",
- " False | \n",
- " 4.5 | \n",
- " False | \n",
- " 2019-05-27 20:00:00+02:00 | \n",
- " entre femmes | \n",
- "
\n",
- " \n",
- " 318967 | \n",
- " 1244277 | \n",
- " 2019-12-31 11:04:07+01:00 | \n",
- " 1 | \n",
- " False | \n",
- " 5.5 | \n",
- " False | \n",
- " 2020-02-03 20:00:00+01:00 | \n",
- " a boire et a manger | \n",
- "
\n",
- " \n",
- " 318968 | \n",
- " 1244277 | \n",
- " 2019-12-31 11:04:07+01:00 | \n",
- " 1 | \n",
- " False | \n",
- " 5.5 | \n",
- " False | \n",
- " 2020-02-03 20:00:00+01:00 | \n",
- " a boire et a manger | \n",
- "
\n",
- " \n",
- "
\n",
- "
318969 rows × 8 columns
\n",
- "
"
- ],
- "text/plain": [
- " id_x purchase_date type_of is_from_subscription \\\n",
- "0 992423 2023-01-11 17:08:41+01:00 3 False \n",
- "1 992423 2023-01-11 17:08:41+01:00 3 False \n",
- "2 1053934 2023-03-16 16:23:10+01:00 3 False \n",
- "3 1053934 2023-03-16 16:23:10+01:00 3 False \n",
- "4 1189141 2020-11-26 13:12:53+01:00 3 False \n",
- "... ... ... ... ... \n",
- "318964 1090839 2019-05-19 21:18:36+02:00 1 False \n",
- "318965 1090839 2019-05-19 21:18:36+02:00 1 False \n",
- "318966 1090839 2019-05-19 21:18:36+02:00 1 False \n",
- "318967 1244277 2019-12-31 11:04:07+01:00 1 False \n",
- "318968 1244277 2019-12-31 11:04:07+01:00 1 False \n",
- "\n",
- " amount is_full_price start_date_time event_name \n",
- "0 13.0 False 2023-02-06 20:00:00+01:00 zaide \n",
- "1 13.0 False 2023-02-06 20:00:00+01:00 zaide \n",
- "2 62.0 False 2023-03-19 16:00:00+01:00 luisa miller \n",
- "3 62.0 False 2023-03-19 16:00:00+01:00 luisa miller \n",
- "4 51.3 False 2020-12-01 20:00:00+01:00 iphigenie en tauride \n",
- "... ... ... ... ... \n",
- "318964 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n",
- "318965 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n",
- "318966 4.5 False 2019-05-27 20:00:00+02:00 entre femmes \n",
- "318967 5.5 False 2020-02-03 20:00:00+01:00 a boire et a manger \n",
- "318968 5.5 False 2020-02-03 20:00:00+01:00 a boire et a manger \n",
- "\n",
- "[318969 rows x 8 columns]"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Jointure\n",
"var_choosed = ['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']\n",
@@ -1043,35 +296,10 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "f47ba14a-8601-4b91-9712-223a5ed8a1d1",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'customer_id', 'target_id', 'created_at', 'updated_at', 'name',\n",
- " 'extra_field'],\n",
- " dtype='object')\n",
- "(124302, 7)\n",
- "\n",
- "RangeIndex: 124302 entries, 0 to 124301\n",
- "Data columns (total 7 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 124302 non-null int64 \n",
- " 1 customer_id 124302 non-null int64 \n",
- " 2 target_id 124302 non-null int64 \n",
- " 3 created_at 124296 non-null object \n",
- " 4 updated_at 124296 non-null object \n",
- " 5 name 0 non-null float64\n",
- " 6 extra_field 0 non-null float64\n",
- "dtypes: float64(2), int64(3), object(2)\n",
- "memory usage: 6.6+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Client\n",
"print(customer_target_mappings.columns)\n",
@@ -1081,74 +309,30 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": null,
"id": "f11f829e-66b1-4fd0-a46f-5ae7cb78073f",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([nan])"
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"customer_target_mappings['extra_field'].unique()"
]
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": null,
"id": "c240ab80-c746-4a64-ac6a-be8382c4f0ec",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([nan])"
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"customer_target_mappings['name'].unique()"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "c03c0597-3f21-4673-8a0f-24d7d9bc5ce4",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'is_import', 'name', 'created_at', 'updated_at', 'identifier'], dtype='object')\n",
- "(4, 6)\n",
- "\n",
- "RangeIndex: 4 entries, 0 to 3\n",
- "Data columns (total 6 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 4 non-null int64 \n",
- " 1 is_import 4 non-null bool \n",
- " 2 name 4 non-null object\n",
- " 3 created_at 4 non-null object\n",
- " 4 updated_at 4 non-null object\n",
- " 5 identifier 4 non-null object\n",
- "dtypes: bool(1), int64(1), object(4)\n",
- "memory usage: 292.0+ bytes\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Segmentation existante\n",
"print(target_types.columns)\n",
@@ -1158,132 +342,22 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "5adb1773-648d-4683-bc08-d1f2298c1283",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " is_import | \n",
- " name | \n",
- " created_at | \n",
- " updated_at | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " False | \n",
- " manual_static_filter | \n",
- " 2021-04-29 13:42:14.111085+02:00 | \n",
- " 2021-04-29 13:42:14.111085+02:00 | \n",
- " fb27e81baa4debc6a4e1a8639c20e808 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 3 | \n",
- " True | \n",
- " manual_structure | \n",
- " 2021-05-07 15:20:00.626650+02:00 | \n",
- " 2021-05-07 15:20:00.626650+02:00 | \n",
- " 382bca214204a2d3462f5ec2728d5d1e | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 6 | \n",
- " False | \n",
- " manual_dynamic_filter | \n",
- " 2021-09-09 14:27:47.641302+02:00 | \n",
- " 2021-09-09 14:27:47.641302+02:00 | \n",
- " e0f4b8693184850fefd6d2a38f10584e | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2 | \n",
- " True | \n",
- " manual_import | \n",
- " 2021-04-29 13:49:30.107110+02:00 | \n",
- " 2021-04-29 13:49:30.107110+02:00 | \n",
- " 12213df2ce68a624e4c0070521437bac | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " id is_import name created_at \\\n",
- "0 1 False manual_static_filter 2021-04-29 13:42:14.111085+02:00 \n",
- "1 3 True manual_structure 2021-05-07 15:20:00.626650+02:00 \n",
- "2 6 False manual_dynamic_filter 2021-09-09 14:27:47.641302+02:00 \n",
- "3 2 True manual_import 2021-04-29 13:49:30.107110+02:00 \n",
- "\n",
- " updated_at identifier \n",
- "0 2021-04-29 13:42:14.111085+02:00 fb27e81baa4debc6a4e1a8639c20e808 \n",
- "1 2021-05-07 15:20:00.626650+02:00 382bca214204a2d3462f5ec2728d5d1e \n",
- "2 2021-09-09 14:27:47.641302+02:00 e0f4b8693184850fefd6d2a38f10584e \n",
- "3 2021-04-29 13:49:30.107110+02:00 12213df2ce68a624e4c0070521437bac "
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"target_types"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"id": "3d65f74e-47fc-4296-b493-a1ebefb91cde",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'name', 'created_at', 'updated_at', 'identifier'], dtype='object')\n",
- "(20, 5)\n",
- "\n",
- "RangeIndex: 20 entries, 0 to 19\n",
- "Data columns (total 5 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 20 non-null int64 \n",
- " 1 name 19 non-null object\n",
- " 2 created_at 20 non-null object\n",
- " 3 updated_at 20 non-null object\n",
- " 4 identifier 20 non-null object\n",
- "dtypes: int64(1), object(4)\n",
- "memory usage: 928.0+ bytes\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Tags = clients\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11tags.csv'\n",
@@ -1298,307 +372,20 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"id": "8a689a63-165b-4c4e-bbb0-695b661048d9",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " name | \n",
- " created_at | \n",
- " updated_at | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2 | \n",
- " ens-écoles | \n",
- " 2021-05-07 15:24:19.808501+02:00 | \n",
- " 2021-05-07 15:24:19.808501+02:00 | \n",
- " b6a360c5f84595940c5774f13fd39cc3 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 1 | \n",
- " NaN | \n",
- " 2021-05-07 15:24:19.805589+02:00 | \n",
- " 2021-05-07 15:24:19.805589+02:00 | \n",
- " d41d8cd98f00b204e9800998ecf8427e | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 4 | \n",
- " ecoles primaires rennes | \n",
- " 2021-05-07 15:29:06.388415+02:00 | \n",
- " 2021-05-07 15:29:06.388415+02:00 | \n",
- " ca8649dd64c240d118f60b07d11a7053 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 5 | \n",
- " Angers Nantes Opéra | \n",
- " 2023-01-27 15:59:58.187557+01:00 | \n",
- " 2023-01-27 15:59:58.187557+01:00 | \n",
- " f8f500f937fe312542399299cdc13f7e | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 6 | \n",
- " Opéras | \n",
- " 2023-01-27 16:03:59.654938+01:00 | \n",
- " 2023-01-27 16:03:59.654938+01:00 | \n",
- " 22eb2c616983ec7b54a093f84b230505 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 7 | \n",
- " Ministère de la Culture | \n",
- " 2023-01-30 11:22:29.636813+01:00 | \n",
- " 2023-01-30 11:22:29.636813+01:00 | \n",
- " 1b8c5c08fde000d90905a3d14af7763d | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 8 | \n",
- " Orchestres | \n",
- " 2023-01-30 11:33:56.392799+01:00 | \n",
- " 2023-01-30 11:33:56.392799+01:00 | \n",
- " 7c2aee0c80642d7e325a450f2dec45e5 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 9 | \n",
- " Cooperative | \n",
- " 2023-01-31 14:44:38.471146+01:00 | \n",
- " 2023-01-31 14:44:38.471146+01:00 | \n",
- " 6c88c36ffaab88d255865aa3111d7686 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 10 | \n",
- " Théâtres | \n",
- " 2023-01-31 14:45:17.804428+01:00 | \n",
- " 2023-01-31 14:45:17.804428+01:00 | \n",
- " b2c19672df82021702b79482c8cda85a | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 11 | \n",
- " La co[opera]tive | \n",
- " 2023-02-16 17:11:35.004478+01:00 | \n",
- " 2023-02-16 17:11:35.004478+01:00 | \n",
- " 5dbaa3a1f278c0fcf981d447ad20957a | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 12 | \n",
- " Ville de Rennes | \n",
- " 2023-02-16 17:37:13.816196+01:00 | \n",
- " 2023-02-16 17:37:13.816196+01:00 | \n",
- " bc483d04d9c3a08f167a3ce64366ca72 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 13 | \n",
- " Ensembles en résidence | \n",
- " 2023-02-16 17:55:54.877374+01:00 | \n",
- " 2023-02-16 17:55:54.877374+01:00 | \n",
- " e70635e771de13268dccf02bb2abfaf9 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 14 | \n",
- " Ministère | \n",
- " 2023-02-17 11:17:54.429462+01:00 | \n",
- " 2023-02-17 11:17:54.429462+01:00 | \n",
- " a3f0582853fd19f5b57e3651f8a20e7a | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 15 | \n",
- " Rennes métropole | \n",
- " 2023-02-17 11:53:24.490786+01:00 | \n",
- " 2023-02-17 11:53:24.490786+01:00 | \n",
- " e98b8db5941b96c29c353b6f2f502055 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 16 | \n",
- " Ville de Rennes - équipements culturels | \n",
- " 2023-02-17 12:00:10.649104+01:00 | \n",
- " 2023-02-17 12:00:10.649104+01:00 | \n",
- " a44edffc7edb852982efa7f4aa6d0e25 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 17 | \n",
- " Structures culturelles rennaises | \n",
- " 2023-02-17 12:05:55.583016+01:00 | \n",
- " 2023-02-17 12:05:55.583016+01:00 | \n",
- " 241550517e4e3b1c926e9aeab0f621cd | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 18 | \n",
- " Université Rennes 2 | \n",
- " 2023-02-17 14:23:44.832959+01:00 | \n",
- " 2023-02-17 14:23:44.832959+01:00 | \n",
- " 4057c5cee51c4e10aa819f0cf48adc3f | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 19 | \n",
- " Centres chorégraphiques nationaux | \n",
- " 2023-02-17 15:29:41.827321+01:00 | \n",
- " 2023-02-17 15:29:41.827321+01:00 | \n",
- " 41e75941dfb766365498d917abe0102f | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 20 | \n",
- " Télévision | \n",
- " 2023-02-17 15:46:13.746092+01:00 | \n",
- " 2023-02-17 15:46:13.746092+01:00 | \n",
- " 36d6409c539dd79c1f3af8c5948603eb | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 21 | \n",
- " structures culturelles nationales | \n",
- " 2023-02-17 15:56:00.555722+01:00 | \n",
- " 2023-02-17 15:56:00.555722+01:00 | \n",
- " 5311cf7e42aac53289e1c4a338d5cfa4 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " id name \\\n",
- "0 2 ens-écoles \n",
- "1 1 NaN \n",
- "2 4 ecoles primaires rennes \n",
- "3 5 Angers Nantes Opéra \n",
- "4 6 Opéras \n",
- "5 7 Ministère de la Culture \n",
- "6 8 Orchestres \n",
- "7 9 Cooperative \n",
- "8 10 Théâtres \n",
- "9 11 La co[opera]tive \n",
- "10 12 Ville de Rennes \n",
- "11 13 Ensembles en résidence \n",
- "12 14 Ministère \n",
- "13 15 Rennes métropole \n",
- "14 16 Ville de Rennes - équipements culturels \n",
- "15 17 Structures culturelles rennaises \n",
- "16 18 Université Rennes 2 \n",
- "17 19 Centres chorégraphiques nationaux \n",
- "18 20 Télévision \n",
- "19 21 structures culturelles nationales \n",
- "\n",
- " created_at updated_at \\\n",
- "0 2021-05-07 15:24:19.808501+02:00 2021-05-07 15:24:19.808501+02:00 \n",
- "1 2021-05-07 15:24:19.805589+02:00 2021-05-07 15:24:19.805589+02:00 \n",
- "2 2021-05-07 15:29:06.388415+02:00 2021-05-07 15:29:06.388415+02:00 \n",
- "3 2023-01-27 15:59:58.187557+01:00 2023-01-27 15:59:58.187557+01:00 \n",
- "4 2023-01-27 16:03:59.654938+01:00 2023-01-27 16:03:59.654938+01:00 \n",
- "5 2023-01-30 11:22:29.636813+01:00 2023-01-30 11:22:29.636813+01:00 \n",
- "6 2023-01-30 11:33:56.392799+01:00 2023-01-30 11:33:56.392799+01:00 \n",
- "7 2023-01-31 14:44:38.471146+01:00 2023-01-31 14:44:38.471146+01:00 \n",
- "8 2023-01-31 14:45:17.804428+01:00 2023-01-31 14:45:17.804428+01:00 \n",
- "9 2023-02-16 17:11:35.004478+01:00 2023-02-16 17:11:35.004478+01:00 \n",
- "10 2023-02-16 17:37:13.816196+01:00 2023-02-16 17:37:13.816196+01:00 \n",
- "11 2023-02-16 17:55:54.877374+01:00 2023-02-16 17:55:54.877374+01:00 \n",
- "12 2023-02-17 11:17:54.429462+01:00 2023-02-17 11:17:54.429462+01:00 \n",
- "13 2023-02-17 11:53:24.490786+01:00 2023-02-17 11:53:24.490786+01:00 \n",
- "14 2023-02-17 12:00:10.649104+01:00 2023-02-17 12:00:10.649104+01:00 \n",
- "15 2023-02-17 12:05:55.583016+01:00 2023-02-17 12:05:55.583016+01:00 \n",
- "16 2023-02-17 14:23:44.832959+01:00 2023-02-17 14:23:44.832959+01:00 \n",
- "17 2023-02-17 15:29:41.827321+01:00 2023-02-17 15:29:41.827321+01:00 \n",
- "18 2023-02-17 15:46:13.746092+01:00 2023-02-17 15:46:13.746092+01:00 \n",
- "19 2023-02-17 15:56:00.555722+01:00 2023-02-17 15:56:00.555722+01:00 \n",
- "\n",
- " identifier \n",
- "0 b6a360c5f84595940c5774f13fd39cc3 \n",
- "1 d41d8cd98f00b204e9800998ecf8427e \n",
- "2 ca8649dd64c240d118f60b07d11a7053 \n",
- "3 f8f500f937fe312542399299cdc13f7e \n",
- "4 22eb2c616983ec7b54a093f84b230505 \n",
- "5 1b8c5c08fde000d90905a3d14af7763d \n",
- "6 7c2aee0c80642d7e325a450f2dec45e5 \n",
- "7 6c88c36ffaab88d255865aa3111d7686 \n",
- "8 b2c19672df82021702b79482c8cda85a \n",
- "9 5dbaa3a1f278c0fcf981d447ad20957a \n",
- "10 bc483d04d9c3a08f167a3ce64366ca72 \n",
- "11 e70635e771de13268dccf02bb2abfaf9 \n",
- "12 a3f0582853fd19f5b57e3651f8a20e7a \n",
- "13 e98b8db5941b96c29c353b6f2f502055 \n",
- "14 a44edffc7edb852982efa7f4aa6d0e25 \n",
- "15 241550517e4e3b1c926e9aeab0f621cd \n",
- "16 4057c5cee51c4e10aa819f0cf48adc3f \n",
- "17 41e75941dfb766365498d917abe0102f \n",
- "18 36d6409c539dd79c1f3af8c5948603eb \n",
- "19 5311cf7e42aac53289e1c4a338d5cfa4 "
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"tags"
]
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"id": "69e38c52-0570-4531-aebb-9deb6db8c40b",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'structure_id', 'tag_id', 'created_at', 'updated_at'], dtype='object')\n",
- "(179, 5)\n",
- "\n",
- "RangeIndex: 179 entries, 0 to 178\n",
- "Data columns (total 5 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 179 non-null int64 \n",
- " 1 structure_id 179 non-null int64 \n",
- " 2 tag_id 179 non-null int64 \n",
- " 3 created_at 179 non-null object\n",
- " 4 updated_at 179 non-null object\n",
- "dtypes: int64(3), object(2)\n",
- "memory usage: 7.1+ KB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Structure = clients\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11structure_tag_mappings.csv'\n",
@@ -1613,247 +400,20 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"id": "74dc34ad-375b-48df-a900-40d92c5fff13",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " structure_id | \n",
- " tag_id | \n",
- " created_at | \n",
- " updated_at | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 123 | \n",
- " 187 | \n",
- " 6 | \n",
- " 2023-01-27 16:03:59.680222+01:00 | \n",
- " 2023-01-27 16:03:59.680222+01:00 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2 | \n",
- " 2 | \n",
- " 2 | \n",
- " 2021-05-07 15:24:19.872895+02:00 | \n",
- " 2021-05-07 15:24:19.872895+02:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3 | \n",
- " 3 | \n",
- " 2 | \n",
- " 2021-05-07 15:24:19.873830+02:00 | \n",
- " 2021-05-07 15:24:19.873830+02:00 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 4 | \n",
- " 4 | \n",
- " 2 | \n",
- " 2021-05-07 15:24:19.874628+02:00 | \n",
- " 2021-05-07 15:24:19.874628+02:00 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5 | \n",
- " 5 | \n",
- " 2 | \n",
- " 2021-05-07 15:24:19.875421+02:00 | \n",
- " 2021-05-07 15:24:19.875421+02:00 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 174 | \n",
- " 184 | \n",
- " 236 | \n",
- " 10 | \n",
- " 2023-02-17 16:35:25.041114+01:00 | \n",
- " 2023-02-17 16:35:25.041114+01:00 | \n",
- "
\n",
- " \n",
- " 175 | \n",
- " 185 | \n",
- " 237 | \n",
- " 17 | \n",
- " 2023-02-17 16:39:10.799478+01:00 | \n",
- " 2023-02-17 16:39:10.799478+01:00 | \n",
- "
\n",
- " \n",
- " 176 | \n",
- " 186 | \n",
- " 238 | \n",
- " 19 | \n",
- " 2023-02-17 16:53:21.098690+01:00 | \n",
- " 2023-02-17 16:53:21.098690+01:00 | \n",
- "
\n",
- " \n",
- " 177 | \n",
- " 187 | \n",
- " 239 | \n",
- " 10 | \n",
- " 2023-02-17 16:57:42.623481+01:00 | \n",
- " 2023-02-17 16:57:42.623481+01:00 | \n",
- "
\n",
- " \n",
- " 178 | \n",
- " 188 | \n",
- " 240 | \n",
- " 10 | \n",
- " 2023-02-17 16:59:22.067723+01:00 | \n",
- " 2023-02-17 16:59:22.067723+01:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
179 rows × 5 columns
\n",
- "
"
- ],
- "text/plain": [
- " id structure_id tag_id created_at \\\n",
- "0 123 187 6 2023-01-27 16:03:59.680222+01:00 \n",
- "1 2 2 2 2021-05-07 15:24:19.872895+02:00 \n",
- "2 3 3 2 2021-05-07 15:24:19.873830+02:00 \n",
- "3 4 4 2 2021-05-07 15:24:19.874628+02:00 \n",
- "4 5 5 2 2021-05-07 15:24:19.875421+02:00 \n",
- ".. ... ... ... ... \n",
- "174 184 236 10 2023-02-17 16:35:25.041114+01:00 \n",
- "175 185 237 17 2023-02-17 16:39:10.799478+01:00 \n",
- "176 186 238 19 2023-02-17 16:53:21.098690+01:00 \n",
- "177 187 239 10 2023-02-17 16:57:42.623481+01:00 \n",
- "178 188 240 10 2023-02-17 16:59:22.067723+01:00 \n",
- "\n",
- " updated_at \n",
- "0 2023-01-27 16:03:59.680222+01:00 \n",
- "1 2021-05-07 15:24:19.872895+02:00 \n",
- "2 2021-05-07 15:24:19.873830+02:00 \n",
- "3 2021-05-07 15:24:19.874628+02:00 \n",
- "4 2021-05-07 15:24:19.875421+02:00 \n",
- ".. ... \n",
- "174 2023-02-17 16:35:25.041114+01:00 \n",
- "175 2023-02-17 16:39:10.799478+01:00 \n",
- "176 2023-02-17 16:53:21.098690+01:00 \n",
- "177 2023-02-17 16:57:42.623481+01:00 \n",
- "178 2023-02-17 16:59:22.067723+01:00 \n",
- "\n",
- "[179 rows x 5 columns]"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"structure_tag_mappings"
]
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"id": "a479ceeb-0135-4899-9cbc-90ed7bf941fe",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'lastname', 'firstname', 'birthdate', 'email', 'street_id',\n",
- " 'created_at', 'updated_at', 'civility', 'is_partner', 'extra',\n",
- " 'deleted_at', 'reference', 'gender', 'is_email_true', 'extra_field',\n",
- " 'identifier', 'opt_in', 'structure_id', 'note', 'profession',\n",
- " 'language', 'mcp_contact_id', 'need_reload', 'last_buying_date',\n",
- " 'max_price', 'ticket_sum', 'average_price', 'fidelity',\n",
- " 'average_purchase_delay', 'average_price_basket',\n",
- " 'average_ticket_basket', 'total_price', 'preferred_category',\n",
- " 'preferred_supplier', 'preferred_formula', 'purchase_count',\n",
- " 'first_buying_date', 'last_visiting_date', 'zipcode', 'country', 'age',\n",
- " 'tenant_id'],\n",
- " dtype='object')\n",
- "(71307, 43)\n",
- "\n",
- "RangeIndex: 71307 entries, 0 to 71306\n",
- "Data columns (total 43 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 71307 non-null int64 \n",
- " 1 lastname 41045 non-null object \n",
- " 2 firstname 39140 non-null object \n",
- " 3 birthdate 18174 non-null object \n",
- " 4 email 58203 non-null object \n",
- " 5 street_id 71307 non-null int64 \n",
- " 6 created_at 71307 non-null object \n",
- " 7 updated_at 71307 non-null object \n",
- " 8 civility 0 non-null float64\n",
- " 9 is_partner 71307 non-null bool \n",
- " 10 extra 0 non-null float64\n",
- " 11 deleted_at 0 non-null float64\n",
- " 12 reference 0 non-null float64\n",
- " 13 gender 71307 non-null int64 \n",
- " 14 is_email_true 71307 non-null bool \n",
- " 15 extra_field 0 non-null float64\n",
- " 16 identifier 71307 non-null object \n",
- " 17 opt_in 71307 non-null bool \n",
- " 18 structure_id 616 non-null float64\n",
- " 19 note 451 non-null object \n",
- " 20 profession 812 non-null object \n",
- " 21 language 0 non-null float64\n",
- " 22 mcp_contact_id 22417 non-null float64\n",
- " 23 need_reload 71307 non-null bool \n",
- " 24 last_buying_date 34040 non-null object \n",
- " 25 max_price 34040 non-null float64\n",
- " 26 ticket_sum 71307 non-null int64 \n",
- " 27 average_price 68694 non-null float64\n",
- " 28 fidelity 71307 non-null int64 \n",
- " 29 average_purchase_delay 34040 non-null float64\n",
- " 30 average_price_basket 34040 non-null float64\n",
- " 31 average_ticket_basket 34040 non-null float64\n",
- " 32 total_price 36653 non-null float64\n",
- " 33 preferred_category 0 non-null float64\n",
- " 34 preferred_supplier 0 non-null float64\n",
- " 35 preferred_formula 0 non-null float64\n",
- " 36 purchase_count 71307 non-null int64 \n",
- " 37 first_buying_date 34040 non-null object \n",
- " 38 last_visiting_date 0 non-null float64\n",
- " 39 zipcode 33756 non-null object \n",
- " 40 country 39910 non-null object \n",
- " 41 age 18174 non-null float64\n",
- " 42 tenant_id 71307 non-null int64 \n",
- "dtypes: bool(4), float64(19), int64(7), object(13)\n",
- "memory usage: 21.5+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Tags = clients\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11customersplus.csv'\n",
@@ -1868,418 +428,17 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"id": "383e892c-606a-45ce-bdd6-b503b3e0be33",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " lastname | \n",
- " firstname | \n",
- " birthdate | \n",
- " email | \n",
- " street_id | \n",
- " created_at | \n",
- " updated_at | \n",
- " civility | \n",
- " is_partner | \n",
- " ... | \n",
- " preferred_category | \n",
- " preferred_supplier | \n",
- " preferred_formula | \n",
- " purchase_count | \n",
- " first_buying_date | \n",
- " last_visiting_date | \n",
- " zipcode | \n",
- " country | \n",
- " age | \n",
- " tenant_id | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 286834 | \n",
- " lastname286834 | \n",
- " firstname286834 | \n",
- " NaN | \n",
- " email286834 | \n",
- " 6 | \n",
- " 2022-05-19 10:09:09.361137+02:00 | \n",
- " 2022-05-19 10:09:09.361137+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " fr | \n",
- " NaN | \n",
- " 1556 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 330695 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " email330695 | \n",
- " 1 | \n",
- " 2022-07-16 04:10:34.135134+02:00 | \n",
- " 2022-07-16 04:10:34.156704+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 1556 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 330978 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " email330978 | \n",
- " 1 | \n",
- " 2022-07-21 22:14:09.811721+02:00 | \n",
- " 2022-07-21 22:14:09.836051+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 1556 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 338697 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " email338697 | \n",
- " 1 | \n",
- " 2022-09-15 19:02:03.950536+02:00 | \n",
- " 2022-09-15 19:02:03.985642+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 1556 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 338726 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " email338726 | \n",
- " 1 | \n",
- " 2022-09-16 01:24:40.719882+02:00 | \n",
- " 2022-09-16 01:24:40.742753+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 1556 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 71302 | \n",
- " 27105 | \n",
- " lastname27105 | \n",
- " firstname27105 | \n",
- " 1957-01-26 | \n",
- " email27105 | \n",
- " 205024 | \n",
- " 2021-04-22 15:12:59.986534+02:00 | \n",
- " 2023-09-12 18:59:31.613235+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 2 | \n",
- " 2018-12-31 18:56:57+01:00 | \n",
- " NaN | \n",
- " 35700 | \n",
- " fr | \n",
- " 66.0 | \n",
- " 1556 | \n",
- "
\n",
- " \n",
- " 71303 | \n",
- " 27108 | \n",
- " lastname27108 | \n",
- " firstname27108 | \n",
- " NaN | \n",
- " NaN | \n",
- " 205024 | \n",
- " 2021-04-22 15:12:59.989197+02:00 | \n",
- " 2023-09-12 18:27:34.380843+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 6 | \n",
- " 2015-12-29 14:51:46+01:00 | \n",
- " NaN | \n",
- " 35700 | \n",
- " fr | \n",
- " NaN | \n",
- " 1556 | \n",
- "
\n",
- " \n",
- " 71304 | \n",
- " 27110 | \n",
- " lastname27110 | \n",
- " firstname27110 | \n",
- " NaN | \n",
- " NaN | \n",
- " 6 | \n",
- " 2021-04-22 15:12:59.991029+02:00 | \n",
- " 2022-04-14 11:41:33.738500+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 1 | \n",
- " 2018-12-31 19:12:59+01:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " fr | \n",
- " NaN | \n",
- " 1556 | \n",
- "
\n",
- " \n",
- " 71305 | \n",
- " 10607 | \n",
- " lastname10607 | \n",
- " firstname10607 | \n",
- " 1963-01-04 | \n",
- " email10607 | \n",
- " 313332 | \n",
- " 2021-04-22 14:56:45.742226+02:00 | \n",
- " 2023-09-12 17:55:17.723195+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 26 | \n",
- " 2015-10-10 14:11:21+02:00 | \n",
- " NaN | \n",
- " 35850 | \n",
- " fr | \n",
- " 60.0 | \n",
- " 1556 | \n",
- "
\n",
- " \n",
- " 71306 | \n",
- " 19095 | \n",
- " lastname19095 | \n",
- " firstname19095 | \n",
- " 1979-07-16 | \n",
- " email19095 | \n",
- " 6 | \n",
- " 2021-04-22 15:06:30.120537+02:00 | \n",
- " 2023-09-12 18:27:36.904104+02:00 | \n",
- " NaN | \n",
- " False | \n",
- " ... | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 2 | \n",
- " 2019-05-19 21:18:36+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " fr | \n",
- " 44.0 | \n",
- " 1556 | \n",
- "
\n",
- " \n",
- "
\n",
- "
71307 rows × 43 columns
\n",
- "
"
- ],
- "text/plain": [
- " id lastname firstname birthdate email \\\n",
- "0 286834 lastname286834 firstname286834 NaN email286834 \n",
- "1 330695 NaN NaN NaN email330695 \n",
- "2 330978 NaN NaN NaN email330978 \n",
- "3 338697 NaN NaN NaN email338697 \n",
- "4 338726 NaN NaN NaN email338726 \n",
- "... ... ... ... ... ... \n",
- "71302 27105 lastname27105 firstname27105 1957-01-26 email27105 \n",
- "71303 27108 lastname27108 firstname27108 NaN NaN \n",
- "71304 27110 lastname27110 firstname27110 NaN NaN \n",
- "71305 10607 lastname10607 firstname10607 1963-01-04 email10607 \n",
- "71306 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
- "\n",
- " street_id created_at \\\n",
- "0 6 2022-05-19 10:09:09.361137+02:00 \n",
- "1 1 2022-07-16 04:10:34.135134+02:00 \n",
- "2 1 2022-07-21 22:14:09.811721+02:00 \n",
- "3 1 2022-09-15 19:02:03.950536+02:00 \n",
- "4 1 2022-09-16 01:24:40.719882+02:00 \n",
- "... ... ... \n",
- "71302 205024 2021-04-22 15:12:59.986534+02:00 \n",
- "71303 205024 2021-04-22 15:12:59.989197+02:00 \n",
- "71304 6 2021-04-22 15:12:59.991029+02:00 \n",
- "71305 313332 2021-04-22 14:56:45.742226+02:00 \n",
- "71306 6 2021-04-22 15:06:30.120537+02:00 \n",
- "\n",
- " updated_at civility is_partner ... \\\n",
- "0 2022-05-19 10:09:09.361137+02:00 NaN False ... \n",
- "1 2022-07-16 04:10:34.156704+02:00 NaN False ... \n",
- "2 2022-07-21 22:14:09.836051+02:00 NaN False ... \n",
- "3 2022-09-15 19:02:03.985642+02:00 NaN False ... \n",
- "4 2022-09-16 01:24:40.742753+02:00 NaN False ... \n",
- "... ... ... ... ... \n",
- "71302 2023-09-12 18:59:31.613235+02:00 NaN False ... \n",
- "71303 2023-09-12 18:27:34.380843+02:00 NaN False ... \n",
- "71304 2022-04-14 11:41:33.738500+02:00 NaN False ... \n",
- "71305 2023-09-12 17:55:17.723195+02:00 NaN False ... \n",
- "71306 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
- "\n",
- " preferred_category preferred_supplier preferred_formula \\\n",
- "0 NaN NaN NaN \n",
- "1 NaN NaN NaN \n",
- "2 NaN NaN NaN \n",
- "3 NaN NaN NaN \n",
- "4 NaN NaN NaN \n",
- "... ... ... ... \n",
- "71302 NaN NaN NaN \n",
- "71303 NaN NaN NaN \n",
- "71304 NaN NaN NaN \n",
- "71305 NaN NaN NaN \n",
- "71306 NaN NaN NaN \n",
- "\n",
- " purchase_count first_buying_date last_visiting_date zipcode \\\n",
- "0 0 NaN NaN NaN \n",
- "1 0 NaN NaN NaN \n",
- "2 0 NaN NaN NaN \n",
- "3 0 NaN NaN NaN \n",
- "4 0 NaN NaN NaN \n",
- "... ... ... ... ... \n",
- "71302 2 2018-12-31 18:56:57+01:00 NaN 35700 \n",
- "71303 6 2015-12-29 14:51:46+01:00 NaN 35700 \n",
- "71304 1 2018-12-31 19:12:59+01:00 NaN NaN \n",
- "71305 26 2015-10-10 14:11:21+02:00 NaN 35850 \n",
- "71306 2 2019-05-19 21:18:36+02:00 NaN NaN \n",
- "\n",
- " country age tenant_id \n",
- "0 fr NaN 1556 \n",
- "1 NaN NaN 1556 \n",
- "2 NaN NaN 1556 \n",
- "3 NaN NaN 1556 \n",
- "4 NaN NaN 1556 \n",
- "... ... ... ... \n",
- "71302 fr 66.0 1556 \n",
- "71303 fr NaN 1556 \n",
- "71304 fr NaN 1556 \n",
- "71305 fr 60.0 1556 \n",
- "71306 fr 44.0 1556 \n",
- "\n",
- "[71307 rows x 43 columns]"
- ]
- },
- "execution_count": 25,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"customersplus"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "70324d06-b855-4386-a7de-eef1eb13dfdf",
"metadata": {},
"outputs": [],
@@ -2289,40 +448,10 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": null,
"id": "4bbd743d-51fe-4786-8ad3-5a4a4d09439c",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'number', 'created_at', 'updated_at', 'purchase_id', 'product_id',\n",
- " 'is_from_subscription', 'type_of', 'supplier_id', 'barcode',\n",
- " 'identifier'],\n",
- " dtype='object')\n",
- "(318969, 11)\n",
- "\n",
- "RangeIndex: 318969 entries, 0 to 318968\n",
- "Data columns (total 11 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 318969 non-null int64 \n",
- " 1 number 318969 non-null object \n",
- " 2 created_at 318969 non-null object \n",
- " 3 updated_at 318969 non-null object \n",
- " 4 purchase_id 318969 non-null int64 \n",
- " 5 product_id 318969 non-null int64 \n",
- " 6 is_from_subscription 318969 non-null bool \n",
- " 7 type_of 318969 non-null int64 \n",
- " 8 supplier_id 318969 non-null int64 \n",
- " 9 barcode 0 non-null float64\n",
- " 10 identifier 318969 non-null object \n",
- "dtypes: bool(1), float64(1), int64(5), object(4)\n",
- "memory usage: 24.6+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# tickets\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11tickets.csv'\n",
@@ -2337,286 +466,20 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": null,
"id": "ea83ea5c-3d47-4a66-a523-04b69b149a20",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " number | \n",
- " created_at | \n",
- " updated_at | \n",
- " purchase_id | \n",
- " product_id | \n",
- " is_from_subscription | \n",
- " type_of | \n",
- " supplier_id | \n",
- " barcode | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2119081 | \n",
- " 1433_136_212_68356 | \n",
- " 2023-09-12 17:42:45.396336+02:00 | \n",
- " 2023-09-12 17:42:45.396336+02:00 | \n",
- " 861764 | \n",
- " 209879 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " f694c255855ce5643c6fcc7fed5e9237 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2119082 | \n",
- " 1433_136_194_68356 | \n",
- " 2023-09-12 17:42:45.409056+02:00 | \n",
- " 2023-09-12 17:42:45.409056+02:00 | \n",
- " 861763 | \n",
- " 209879 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 838d6101db2fc8bc80536d8b91b49859 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2119083 | \n",
- " 33158_158_343_68357 | \n",
- " 2023-09-12 17:42:45.409824+02:00 | \n",
- " 2023-09-12 17:42:45.409824+02:00 | \n",
- " 861769 | \n",
- " 209880 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 8a8d938d66a4dc57bcb44c2773c6fdfa | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2119084 | \n",
- " 33158_158_297_68357 | \n",
- " 2023-09-12 17:42:45.410447+02:00 | \n",
- " 2023-09-12 17:42:45.410447+02:00 | \n",
- " 861767 | \n",
- " 209880 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " b7a3dd0794c0957c942d45b8913e5b96 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2119085 | \n",
- " 33158_158_318_68357 | \n",
- " 2023-09-12 17:42:45.411059+02:00 | \n",
- " 2023-09-12 17:42:45.411059+02:00 | \n",
- " 861768 | \n",
- " 209880 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " d7ea7e443581ebe520dd13f6cad31af7 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 318964 | \n",
- " 2564021 | \n",
- " 44247_204_239_89278 | \n",
- " 2023-09-12 18:59:48.750953+02:00 | \n",
- " 2023-09-12 18:59:48.750953+02:00 | \n",
- " 1244281 | \n",
- " 210158 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 82c9af8b2167f7ac34a5e834242b0239 | \n",
- "
\n",
- " \n",
- " 318965 | \n",
- " 2564022 | \n",
- " 44247_204_299_89278 | \n",
- " 2023-09-12 18:59:48.751441+02:00 | \n",
- " 2023-09-12 18:59:48.751441+02:00 | \n",
- " 1244284 | \n",
- " 210158 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 235e8e608f066cb72949bbd397d0a76f | \n",
- "
\n",
- " \n",
- " 318966 | \n",
- " 2564023 | \n",
- " 44247_204_259_89278 | \n",
- " 2023-09-12 18:59:48.751924+02:00 | \n",
- " 2023-09-12 18:59:48.751924+02:00 | \n",
- " 1244282 | \n",
- " 210158 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " ec22fa828931f030f7e79a4cc5478c4b | \n",
- "
\n",
- " \n",
- " 318967 | \n",
- " 2564024 | \n",
- " 44247_204_279_89278 | \n",
- " 2023-09-12 18:59:48.752425+02:00 | \n",
- " 2023-09-12 18:59:48.752425+02:00 | \n",
- " 1244283 | \n",
- " 210158 | \n",
- " False | \n",
- " 1 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 31ec4deaf718e04caf193e1ff8d621ef | \n",
- "
\n",
- " \n",
- " 318968 | \n",
- " 2513156 | \n",
- " 4854_178_2847_89170 | \n",
- " 2023-09-12 18:52:20.331807+02:00 | \n",
- " 2023-09-12 18:59:48.752904+02:00 | \n",
- " 1244285 | \n",
- " 261922 | \n",
- " False | \n",
- " 3 | \n",
- " 1702 | \n",
- " NaN | \n",
- " 48aef9efab29bfb1537656908863bcc1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
318969 rows × 11 columns
\n",
- "
"
- ],
- "text/plain": [
- " id number created_at \\\n",
- "0 2119081 1433_136_212_68356 2023-09-12 17:42:45.396336+02:00 \n",
- "1 2119082 1433_136_194_68356 2023-09-12 17:42:45.409056+02:00 \n",
- "2 2119083 33158_158_343_68357 2023-09-12 17:42:45.409824+02:00 \n",
- "3 2119084 33158_158_297_68357 2023-09-12 17:42:45.410447+02:00 \n",
- "4 2119085 33158_158_318_68357 2023-09-12 17:42:45.411059+02:00 \n",
- "... ... ... ... \n",
- "318964 2564021 44247_204_239_89278 2023-09-12 18:59:48.750953+02:00 \n",
- "318965 2564022 44247_204_299_89278 2023-09-12 18:59:48.751441+02:00 \n",
- "318966 2564023 44247_204_259_89278 2023-09-12 18:59:48.751924+02:00 \n",
- "318967 2564024 44247_204_279_89278 2023-09-12 18:59:48.752425+02:00 \n",
- "318968 2513156 4854_178_2847_89170 2023-09-12 18:52:20.331807+02:00 \n",
- "\n",
- " updated_at purchase_id product_id \\\n",
- "0 2023-09-12 17:42:45.396336+02:00 861764 209879 \n",
- "1 2023-09-12 17:42:45.409056+02:00 861763 209879 \n",
- "2 2023-09-12 17:42:45.409824+02:00 861769 209880 \n",
- "3 2023-09-12 17:42:45.410447+02:00 861767 209880 \n",
- "4 2023-09-12 17:42:45.411059+02:00 861768 209880 \n",
- "... ... ... ... \n",
- "318964 2023-09-12 18:59:48.750953+02:00 1244281 210158 \n",
- "318965 2023-09-12 18:59:48.751441+02:00 1244284 210158 \n",
- "318966 2023-09-12 18:59:48.751924+02:00 1244282 210158 \n",
- "318967 2023-09-12 18:59:48.752425+02:00 1244283 210158 \n",
- "318968 2023-09-12 18:59:48.752904+02:00 1244285 261922 \n",
- "\n",
- " is_from_subscription type_of supplier_id barcode \\\n",
- "0 False 1 1702 NaN \n",
- "1 False 1 1702 NaN \n",
- "2 False 1 1702 NaN \n",
- "3 False 1 1702 NaN \n",
- "4 False 1 1702 NaN \n",
- "... ... ... ... ... \n",
- "318964 False 1 1702 NaN \n",
- "318965 False 1 1702 NaN \n",
- "318966 False 1 1702 NaN \n",
- "318967 False 1 1702 NaN \n",
- "318968 False 3 1702 NaN \n",
- "\n",
- " identifier \n",
- "0 f694c255855ce5643c6fcc7fed5e9237 \n",
- "1 838d6101db2fc8bc80536d8b91b49859 \n",
- "2 8a8d938d66a4dc57bcb44c2773c6fdfa \n",
- "3 b7a3dd0794c0957c942d45b8913e5b96 \n",
- "4 d7ea7e443581ebe520dd13f6cad31af7 \n",
- "... ... \n",
- "318964 82c9af8b2167f7ac34a5e834242b0239 \n",
- "318965 235e8e608f066cb72949bbd397d0a76f \n",
- "318966 ec22fa828931f030f7e79a4cc5478c4b \n",
- "318967 31ec4deaf718e04caf193e1ff8d621ef \n",
- "318968 48aef9efab29bfb1537656908863bcc1 \n",
- "\n",
- "[318969 rows x 11 columns]"
- ]
- },
- "execution_count": 30,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"tickets"
]
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": null,
"id": "ba15708e-eb84-4b5d-a86c-05ebed188cf6",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([1, 3, 0])"
- ]
- },
- "execution_count": 33,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"tickets['type_of'].unique()"
]
@@ -2631,41 +494,10 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "e14dcf62-2def-4ed5-834b-cf21abbc2894",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'created_at', 'updated_at', 'season_id', 'facility_id', 'name',\n",
- " 'event_type_id', 'manual_added', 'is_display', 'event_type_key_id',\n",
- " 'facility_key_id', 'identifier'],\n",
- " dtype='object')\n",
- "(403, 12)\n",
- "\n",
- "RangeIndex: 403 entries, 0 to 402\n",
- "Data columns (total 12 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 403 non-null int64 \n",
- " 1 created_at 403 non-null object\n",
- " 2 updated_at 403 non-null object\n",
- " 3 season_id 403 non-null int64 \n",
- " 4 facility_id 403 non-null int64 \n",
- " 5 name 403 non-null object\n",
- " 6 event_type_id 403 non-null int64 \n",
- " 7 manual_added 403 non-null bool \n",
- " 8 is_display 403 non-null bool \n",
- " 9 event_type_key_id 403 non-null int64 \n",
- " 10 facility_key_id 403 non-null int64 \n",
- " 11 identifier 403 non-null object\n",
- "dtypes: bool(2), int64(6), object(4)\n",
- "memory usage: 32.4+ KB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Evenement = events.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11events.csv'\n",
@@ -2680,357 +512,30 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "d1a1d63c-d7de-4b63-93a8-1c734eb5b316",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " created_at | \n",
- " updated_at | \n",
- " season_id | \n",
- " facility_id | \n",
- " name | \n",
- " event_type_id | \n",
- " manual_added | \n",
- " is_display | \n",
- " event_type_key_id | \n",
- " facility_key_id | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 20367 | \n",
- " 2023-09-13 03:42:45.214293+02:00 | \n",
- " 2023-09-13 03:54:30.086969+02:00 | \n",
- " 1865 | \n",
- " 1054 | \n",
- " marelle | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " 1054 | \n",
- " 26d1e9a4acad18b9cf79244334c86c93 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 20371 | \n",
- " 2023-09-13 03:42:45.218728+02:00 | \n",
- " 2023-09-13 03:54:30.103943+02:00 | \n",
- " 1865 | \n",
- " 1054 | \n",
- " dialogues | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " 1054 | \n",
- " 60356fc5e8ed6c9c1be9c5ec67e77766 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 20570 | \n",
- " 2023-10-05 04:48:29.374504+02:00 | \n",
- " 2023-10-05 04:48:36.562528+02:00 | \n",
- " 1865 | \n",
- " 1054 | \n",
- " les grandes epopees | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " 1054 | \n",
- " f8ab088e06252bf34e1b12ad2ce1a403 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 20757 | \n",
- " 2023-11-01 03:55:20.846196+01:00 | \n",
- " 2023-11-01 03:55:28.412457+01:00 | \n",
- " 1865 | \n",
- " 1054 | \n",
- " scolaire marelle | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " 1054 | \n",
- " 447fa80f9a793b7587bb85ebbda6442c | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 20364 | \n",
- " 2023-09-13 03:42:45.196791+02:00 | \n",
- " 2023-09-13 03:54:30.075456+02:00 | \n",
- " 1865 | \n",
- " 1054 | \n",
- " le couronnement de poppee | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " 1054 | \n",
- " 3b37f5d2cd354cbc422868621ac7ebc2 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 398 | \n",
- " 15603 | \n",
- " 2023-09-12 17:42:25.327618+02:00 | \n",
- " 2023-09-12 19:00:00.893400+02:00 | \n",
- " 1706 | \n",
- " 1054 | \n",
- " marelle | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " 1054 | \n",
- " fde88b72fb82b1fe42fbbfbfc3d6b4d3 | \n",
- "
\n",
- " \n",
- " 399 | \n",
- " 15621 | \n",
- " 2023-09-12 17:42:25.335792+02:00 | \n",
- " 2023-09-12 19:00:00.899622+02:00 | \n",
- " 1708 | \n",
- " 1054 | \n",
- " cartes d'adhesion | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " 1054 | \n",
- " 051b96aad2b720bad4450a59ed7dfbf6 | \n",
- "
\n",
- " \n",
- " 400 | \n",
- " 15740 | \n",
- " 2023-09-12 17:47:05.112101+02:00 | \n",
- " 2023-09-12 19:00:00.906123+02:00 | \n",
- " 1711 | \n",
- " 1054 | \n",
- " repetition le medecin malgre lui | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " 1054 | \n",
- " addd6885bea5ddf60ec3539dfc3e79e8 | \n",
- "
\n",
- " \n",
- " 401 | \n",
- " 15520 | \n",
- " 2023-09-12 17:42:25.290280+02:00 | \n",
- " 2023-09-12 19:00:00.835625+02:00 | \n",
- " 1708 | \n",
- " 1054 | \n",
- " opera au village | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " 1054 | \n",
- " 94f250d10d4a56358ceab23b384439ff | \n",
- "
\n",
- " \n",
- " 402 | \n",
- " 15439 | \n",
- " 2023-09-12 17:42:25.252747+02:00 | \n",
- " 2023-09-12 19:00:00.735990+02:00 | \n",
- " 1708 | \n",
- " 1054 | \n",
- " florilege | \n",
- " 1055 | \n",
- " False | \n",
- " True | \n",
- " 1055 | \n",
- " 1054 | \n",
- " 4f015946bcbd856aa573cadb7ac42b9f | \n",
- "
\n",
- " \n",
- "
\n",
- "
403 rows × 12 columns
\n",
- "
"
- ],
- "text/plain": [
- " id created_at \\\n",
- "0 20367 2023-09-13 03:42:45.214293+02:00 \n",
- "1 20371 2023-09-13 03:42:45.218728+02:00 \n",
- "2 20570 2023-10-05 04:48:29.374504+02:00 \n",
- "3 20757 2023-11-01 03:55:20.846196+01:00 \n",
- "4 20364 2023-09-13 03:42:45.196791+02:00 \n",
- ".. ... ... \n",
- "398 15603 2023-09-12 17:42:25.327618+02:00 \n",
- "399 15621 2023-09-12 17:42:25.335792+02:00 \n",
- "400 15740 2023-09-12 17:47:05.112101+02:00 \n",
- "401 15520 2023-09-12 17:42:25.290280+02:00 \n",
- "402 15439 2023-09-12 17:42:25.252747+02:00 \n",
- "\n",
- " updated_at season_id facility_id \\\n",
- "0 2023-09-13 03:54:30.086969+02:00 1865 1054 \n",
- "1 2023-09-13 03:54:30.103943+02:00 1865 1054 \n",
- "2 2023-10-05 04:48:36.562528+02:00 1865 1054 \n",
- "3 2023-11-01 03:55:28.412457+01:00 1865 1054 \n",
- "4 2023-09-13 03:54:30.075456+02:00 1865 1054 \n",
- ".. ... ... ... \n",
- "398 2023-09-12 19:00:00.893400+02:00 1706 1054 \n",
- "399 2023-09-12 19:00:00.899622+02:00 1708 1054 \n",
- "400 2023-09-12 19:00:00.906123+02:00 1711 1054 \n",
- "401 2023-09-12 19:00:00.835625+02:00 1708 1054 \n",
- "402 2023-09-12 19:00:00.735990+02:00 1708 1054 \n",
- "\n",
- " name event_type_id manual_added \\\n",
- "0 marelle 1055 False \n",
- "1 dialogues 1055 False \n",
- "2 les grandes epopees 1055 False \n",
- "3 scolaire marelle 1055 False \n",
- "4 le couronnement de poppee 1055 False \n",
- ".. ... ... ... \n",
- "398 marelle 1055 False \n",
- "399 cartes d'adhesion 1055 False \n",
- "400 repetition le medecin malgre lui 1055 False \n",
- "401 opera au village 1055 False \n",
- "402 florilege 1055 False \n",
- "\n",
- " is_display event_type_key_id facility_key_id \\\n",
- "0 True 1055 1054 \n",
- "1 True 1055 1054 \n",
- "2 True 1055 1054 \n",
- "3 True 1055 1054 \n",
- "4 True 1055 1054 \n",
- ".. ... ... ... \n",
- "398 True 1055 1054 \n",
- "399 True 1055 1054 \n",
- "400 True 1055 1054 \n",
- "401 True 1055 1054 \n",
- "402 True 1055 1054 \n",
- "\n",
- " identifier \n",
- "0 26d1e9a4acad18b9cf79244334c86c93 \n",
- "1 60356fc5e8ed6c9c1be9c5ec67e77766 \n",
- "2 f8ab088e06252bf34e1b12ad2ce1a403 \n",
- "3 447fa80f9a793b7587bb85ebbda6442c \n",
- "4 3b37f5d2cd354cbc422868621ac7ebc2 \n",
- ".. ... \n",
- "398 fde88b72fb82b1fe42fbbfbfc3d6b4d3 \n",
- "399 051b96aad2b720bad4450a59ed7dfbf6 \n",
- "400 addd6885bea5ddf60ec3539dfc3e79e8 \n",
- "401 94f250d10d4a56358ceab23b384439ff \n",
- "402 4f015946bcbd856aa573cadb7ac42b9f \n",
- "\n",
- "[403 rows x 12 columns]"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"events"
]
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"id": "af80eee8-f717-4159-a0fd-09d47ec96621",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "357"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"events['name'].nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"id": "6afc6f3d-4292-4a92-a4d6-14f1edc25df2",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'serial', 'event_id', 'created_at', 'updated_at',\n",
- " 'start_date_time', 'open', 'satisfaction', 'end_date_time', 'name',\n",
- " 'is_display', 'representation_type_id', 'expected_filling',\n",
- " 'max_filling', 'extra_field', 'identifier'],\n",
- " dtype='object')\n",
- "(996, 16)\n",
- "\n",
- "RangeIndex: 996 entries, 0 to 995\n",
- "Data columns (total 16 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 996 non-null int64 \n",
- " 1 serial 0 non-null float64\n",
- " 2 event_id 996 non-null int64 \n",
- " 3 created_at 996 non-null object \n",
- " 4 updated_at 996 non-null object \n",
- " 5 start_date_time 996 non-null object \n",
- " 6 open 996 non-null bool \n",
- " 7 satisfaction 0 non-null float64\n",
- " 8 end_date_time 996 non-null object \n",
- " 9 name 0 non-null float64\n",
- " 10 is_display 996 non-null bool \n",
- " 11 representation_type_id 0 non-null float64\n",
- " 12 expected_filling 24 non-null float64\n",
- " 13 max_filling 24 non-null float64\n",
- " 14 extra_field 0 non-null float64\n",
- " 15 identifier 996 non-null object \n",
- "dtypes: bool(2), float64(7), int64(2), object(5)\n",
- "memory usage: 111.0+ KB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Représentation des évenements = representations.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11representations.csv'\n",
@@ -3045,382 +550,20 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"id": "1487402a-a49b-4737-b7d7-40c764d2f0b4",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " serial | \n",
- " event_id | \n",
- " created_at | \n",
- " updated_at | \n",
- " start_date_time | \n",
- " open | \n",
- " satisfaction | \n",
- " end_date_time | \n",
- " name | \n",
- " is_display | \n",
- " representation_type_id | \n",
- " expected_filling | \n",
- " max_filling | \n",
- " extra_field | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 44351 | \n",
- " NaN | \n",
- " 20371 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-09-13 03:42:45.245879+02:00 | \n",
- " 2023-12-21 20:00:00+01:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " 550.0 | \n",
- " 550.0 | \n",
- " NaN | \n",
- " 33520762e8cc28982e3841cbc2be8ce2 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 45497 | \n",
- " NaN | \n",
- " 20757 | \n",
- " 2023-11-01 03:55:20.875712+01:00 | \n",
- " 2023-11-01 03:55:20.875712+01:00 | \n",
- " 2023-11-28 10:00:00+01:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 5c34b84e3d11276e0995d984c94cd28d | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 44383 | \n",
- " NaN | \n",
- " 20383 | \n",
- " 2023-09-13 10:41:08.964302+02:00 | \n",
- " 2023-09-13 10:41:08.964302+02:00 | \n",
- " 2023-06-04 17:00:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " bf3c65a1dfefbd747dcc2360e6887eac | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 44384 | \n",
- " NaN | \n",
- " 20383 | \n",
- " 2023-09-13 10:41:08.972401+02:00 | \n",
- " 2023-09-13 10:41:08.972401+02:00 | \n",
- " 2023-06-03 17:30:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " b0e69ae8b78ebab3066aac83de22d239 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 44385 | \n",
- " NaN | \n",
- " 20384 | \n",
- " 2023-09-13 10:41:08.973290+02:00 | \n",
- " 2023-09-13 10:41:08.973290+02:00 | \n",
- " 2023-06-03 16:15:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 9fb91c8b1cf9e444111c511e212ac5c1 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 991 | \n",
- " 33894 | \n",
- " NaN | \n",
- " 15647 | \n",
- " 2023-09-12 17:42:25.564297+02:00 | \n",
- " 2023-09-12 17:42:25.564297+02:00 | \n",
- " 2022-11-08 20:00:00+01:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 44bbcecfd007ceaad05805391beccabb | \n",
- "
\n",
- " \n",
- " 992 | \n",
- " 33873 | \n",
- " NaN | \n",
- " 15640 | \n",
- " 2023-09-12 17:42:25.554863+02:00 | \n",
- " 2023-09-12 17:42:25.554863+02:00 | \n",
- " 2022-11-14 20:00:00+01:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 151edbec8e0a3cd80071038e857f3493 | \n",
- "
\n",
- " \n",
- " 993 | \n",
- " 33610 | \n",
- " NaN | \n",
- " 15520 | \n",
- " 2023-09-12 17:42:25.442979+02:00 | \n",
- " 2023-09-12 17:42:25.442979+02:00 | \n",
- " 2023-06-19 18:00:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 9e9e38d527427e1b6f67e0c3f12b82fc | \n",
- "
\n",
- " \n",
- " 994 | \n",
- " 33953 | \n",
- " NaN | \n",
- " 15520 | \n",
- " 2023-09-12 17:42:25.590746+02:00 | \n",
- " 2023-09-12 17:42:25.590746+02:00 | \n",
- " 2023-06-19 20:00:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 7bf0978aabb6cac1bb4cd2784afb2b6b | \n",
- "
\n",
- " \n",
- " 995 | \n",
- " 33639 | \n",
- " NaN | \n",
- " 15533 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-09-12 17:42:25.455708+02:00 | \n",
- " 2023-04-15 17:30:00+02:00 | \n",
- " True | \n",
- " NaN | \n",
- " 1901-01-01 00:09:21+00:09 | \n",
- " NaN | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " fae68f1e09710ec8747957af6e22f61d | \n",
- "
\n",
- " \n",
- "
\n",
- "
996 rows × 16 columns
\n",
- "
"
- ],
- "text/plain": [
- " id serial event_id created_at \\\n",
- "0 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n",
- "1 45497 NaN 20757 2023-11-01 03:55:20.875712+01:00 \n",
- "2 44383 NaN 20383 2023-09-13 10:41:08.964302+02:00 \n",
- "3 44384 NaN 20383 2023-09-13 10:41:08.972401+02:00 \n",
- "4 44385 NaN 20384 2023-09-13 10:41:08.973290+02:00 \n",
- ".. ... ... ... ... \n",
- "991 33894 NaN 15647 2023-09-12 17:42:25.564297+02:00 \n",
- "992 33873 NaN 15640 2023-09-12 17:42:25.554863+02:00 \n",
- "993 33610 NaN 15520 2023-09-12 17:42:25.442979+02:00 \n",
- "994 33953 NaN 15520 2023-09-12 17:42:25.590746+02:00 \n",
- "995 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n",
- "\n",
- " updated_at start_date_time open \\\n",
- "0 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n",
- "1 2023-11-01 03:55:20.875712+01:00 2023-11-28 10:00:00+01:00 True \n",
- "2 2023-09-13 10:41:08.964302+02:00 2023-06-04 17:00:00+02:00 True \n",
- "3 2023-09-13 10:41:08.972401+02:00 2023-06-03 17:30:00+02:00 True \n",
- "4 2023-09-13 10:41:08.973290+02:00 2023-06-03 16:15:00+02:00 True \n",
- ".. ... ... ... \n",
- "991 2023-09-12 17:42:25.564297+02:00 2022-11-08 20:00:00+01:00 True \n",
- "992 2023-09-12 17:42:25.554863+02:00 2022-11-14 20:00:00+01:00 True \n",
- "993 2023-09-12 17:42:25.442979+02:00 2023-06-19 18:00:00+02:00 True \n",
- "994 2023-09-12 17:42:25.590746+02:00 2023-06-19 20:00:00+02:00 True \n",
- "995 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n",
- "\n",
- " satisfaction end_date_time name is_display \\\n",
- "0 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
- "1 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
- "2 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
- "3 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
- "4 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
- ".. ... ... ... ... \n",
- "991 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
- "992 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
- "993 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
- "994 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
- "995 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
- "\n",
- " representation_type_id expected_filling max_filling extra_field \\\n",
- "0 NaN 550.0 550.0 NaN \n",
- "1 NaN NaN NaN NaN \n",
- "2 NaN NaN NaN NaN \n",
- "3 NaN NaN NaN NaN \n",
- "4 NaN NaN NaN NaN \n",
- ".. ... ... ... ... \n",
- "991 NaN NaN NaN NaN \n",
- "992 NaN NaN NaN NaN \n",
- "993 NaN NaN NaN NaN \n",
- "994 NaN NaN NaN NaN \n",
- "995 NaN NaN NaN NaN \n",
- "\n",
- " identifier \n",
- "0 33520762e8cc28982e3841cbc2be8ce2 \n",
- "1 5c34b84e3d11276e0995d984c94cd28d \n",
- "2 bf3c65a1dfefbd747dcc2360e6887eac \n",
- "3 b0e69ae8b78ebab3066aac83de22d239 \n",
- "4 9fb91c8b1cf9e444111c511e212ac5c1 \n",
- ".. ... \n",
- "991 44bbcecfd007ceaad05805391beccabb \n",
- "992 151edbec8e0a3cd80071038e857f3493 \n",
- "993 9e9e38d527427e1b6f67e0c3f12b82fc \n",
- "994 7bf0978aabb6cac1bb4cd2784afb2b6b \n",
- "995 fae68f1e09710ec8747957af6e22f61d \n",
- "\n",
- "[996 rows x 16 columns]"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"representations"
]
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"id": "99b27418-2c15-4a6e-bcf5-d329ca492085",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'amount', 'is_full_price', 'representation_id',\n",
- " 'pricing_formula_id', 'created_at', 'updated_at', 'category_id',\n",
- " 'apply_price', 'products_group_id', 'product_pack_id', 'extra_field',\n",
- " 'amount_consumption', 'identifier'],\n",
- " dtype='object')\n",
- "(14648, 14)\n",
- "\n",
- "RangeIndex: 14648 entries, 0 to 14647\n",
- "Data columns (total 14 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 14648 non-null int64 \n",
- " 1 amount 14648 non-null float64\n",
- " 2 is_full_price 14648 non-null bool \n",
- " 3 representation_id 14648 non-null int64 \n",
- " 4 pricing_formula_id 14648 non-null int64 \n",
- " 5 created_at 14648 non-null object \n",
- " 6 updated_at 14648 non-null object \n",
- " 7 category_id 14648 non-null int64 \n",
- " 8 apply_price 14648 non-null float64\n",
- " 9 products_group_id 14648 non-null int64 \n",
- " 10 product_pack_id 14648 non-null int64 \n",
- " 11 extra_field 0 non-null float64\n",
- " 12 amount_consumption 0 non-null float64\n",
- " 13 identifier 14648 non-null object \n",
- "dtypes: bool(1), float64(4), int64(6), object(3)\n",
- "memory usage: 1.5+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Produits vendues = products.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11products.csv'\n",
@@ -3435,336 +578,20 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"id": "c49bcd47-672f-4e0f-aee9-a7475151b97f",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " amount | \n",
- " is_full_price | \n",
- " representation_id | \n",
- " pricing_formula_id | \n",
- " created_at | \n",
- " updated_at | \n",
- " category_id | \n",
- " apply_price | \n",
- " products_group_id | \n",
- " product_pack_id | \n",
- " extra_field | \n",
- " amount_consumption | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 268325 | \n",
- " 18.0 | \n",
- " False | \n",
- " 44332 | \n",
- " 20477 | \n",
- " 2023-09-13 03:42:45.415594+02:00 | \n",
- " 2023-09-13 03:42:45.415594+02:00 | \n",
- " 4972 | \n",
- " 0.0 | \n",
- " 268108 | \n",
- " 1 | \n",
- " NaN | \n",
- " NaN | \n",
- " b823bbea3ba837da2ef8efaf1287272d | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 274118 | \n",
- " 36.8 | \n",
- " False | \n",
- " 44340 | \n",
- " 20502 | \n",
- " 2023-10-25 03:26:57.430694+02:00 | \n",
- " 2023-10-25 03:26:57.430694+02:00 | \n",
- " 4969 | \n",
- " 0.0 | \n",
- " 273901 | \n",
- " 1 | \n",
- " NaN | \n",
- " NaN | \n",
- " 81e8b7991f6948e3ef7cfe5011d13532 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 268338 | \n",
- " 39.1 | \n",
- " False | \n",
- " 44340 | \n",
- " 20497 | \n",
- " 2023-09-13 03:42:45.430942+02:00 | \n",
- " 2023-09-13 03:42:45.430942+02:00 | \n",
- " 4969 | \n",
- " 0.0 | \n",
- " 268121 | \n",
- " 1 | \n",
- " NaN | \n",
- " NaN | \n",
- " be8bc0399db4d04aefa9f44afd4d5efa | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 209883 | \n",
- " 0.0 | \n",
- " False | \n",
- " 33443 | \n",
- " 20475 | \n",
- " 2023-09-12 17:42:27.595998+02:00 | \n",
- " 2023-09-12 17:42:27.595998+02:00 | \n",
- " 4970 | \n",
- " 0.0 | \n",
- " 209706 | \n",
- " 1 | \n",
- " NaN | \n",
- " NaN | \n",
- " 01a9eea5f8ad53491faa864bfac44183 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 268326 | \n",
- " 63.0 | \n",
- " False | \n",
- " 44333 | \n",
- " 20477 | \n",
- " 2023-09-13 03:42:45.417283+02:00 | \n",
- " 2023-09-13 03:42:45.417283+02:00 | \n",
- " 4969 | \n",
- " 0.0 | \n",
- " 268109 | \n",
- " 1 | \n",
- " NaN | \n",
- " NaN | \n",
- " 781a917ecfdabb14169701d7b143bbe4 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 14643 | \n",
- " 217878 | \n",
- " 33.6 | \n",
- " False | \n",
- " 33919 | \n",
- " 20489 | \n",
- " 2023-09-12 17:51:11.572882+02:00 | \n",
- " 2023-09-12 17:51:11.572882+02:00 | \n",
- " 4971 | \n",
- " 0.0 | \n",
- " 217695 | \n",
- " 1 | \n",
- " NaN | \n",
- " NaN | \n",
- " 82bba69321466069411b3023343b44a4 | \n",
- "
\n",
- " \n",
- " 14644 | \n",
- " 268315 | \n",
- " 10.0 | \n",
- " False | \n",
- " 33919 | \n",
- " 20504 | \n",
- " 2023-09-12 18:59:29.995176+02:00 | \n",
- " 2023-09-12 18:59:29.995176+02:00 | \n",
- " 4969 | \n",
- " 0.0 | \n",
- " 268098 | \n",
- " 1 | \n",
- " NaN | \n",
- " NaN | \n",
- " eae56a8eb0a4315c5713b2053103d595 | \n",
- "
\n",
- " \n",
- " 14645 | \n",
- " 210148 | \n",
- " 5.0 | \n",
- " False | \n",
- " 33531 | \n",
- " 20473 | \n",
- " 2023-09-12 17:42:27.733260+02:00 | \n",
- " 2023-09-12 17:42:27.733260+02:00 | \n",
- " 4975 | \n",
- " 0.0 | \n",
- " 209971 | \n",
- " 1 | \n",
- " NaN | \n",
- " NaN | \n",
- " 449f86c1ef2b478d3389f7d0e27d0e6b | \n",
- "
\n",
- " \n",
- " 14646 | \n",
- " 212054 | \n",
- " 30.0 | \n",
- " False | \n",
- " 33810 | \n",
- " 20473 | \n",
- " 2023-09-12 17:42:28.724681+02:00 | \n",
- " 2023-09-12 17:42:28.724681+02:00 | \n",
- " 4972 | \n",
- " 0.0 | \n",
- " 211876 | \n",
- " 1 | \n",
- " NaN | \n",
- " NaN | \n",
- " 2090203e2c0b58ea8f505089faee6d62 | \n",
- "
\n",
- " \n",
- " 14647 | \n",
- " 261922 | \n",
- " 21.0 | \n",
- " False | \n",
- " 33766 | \n",
- " 20488 | \n",
- " 2023-09-12 18:52:00.519838+02:00 | \n",
- " 2023-09-12 18:52:00.519838+02:00 | \n",
- " 4972 | \n",
- " 0.0 | \n",
- " 261709 | \n",
- " 1 | \n",
- " NaN | \n",
- " NaN | \n",
- " 9139ee36a92bed766ae95372cca77336 | \n",
- "
\n",
- " \n",
- "
\n",
- "
14648 rows × 14 columns
\n",
- "
"
- ],
- "text/plain": [
- " id amount is_full_price representation_id pricing_formula_id \\\n",
- "0 268325 18.0 False 44332 20477 \n",
- "1 274118 36.8 False 44340 20502 \n",
- "2 268338 39.1 False 44340 20497 \n",
- "3 209883 0.0 False 33443 20475 \n",
- "4 268326 63.0 False 44333 20477 \n",
- "... ... ... ... ... ... \n",
- "14643 217878 33.6 False 33919 20489 \n",
- "14644 268315 10.0 False 33919 20504 \n",
- "14645 210148 5.0 False 33531 20473 \n",
- "14646 212054 30.0 False 33810 20473 \n",
- "14647 261922 21.0 False 33766 20488 \n",
- "\n",
- " created_at updated_at \\\n",
- "0 2023-09-13 03:42:45.415594+02:00 2023-09-13 03:42:45.415594+02:00 \n",
- "1 2023-10-25 03:26:57.430694+02:00 2023-10-25 03:26:57.430694+02:00 \n",
- "2 2023-09-13 03:42:45.430942+02:00 2023-09-13 03:42:45.430942+02:00 \n",
- "3 2023-09-12 17:42:27.595998+02:00 2023-09-12 17:42:27.595998+02:00 \n",
- "4 2023-09-13 03:42:45.417283+02:00 2023-09-13 03:42:45.417283+02:00 \n",
- "... ... ... \n",
- "14643 2023-09-12 17:51:11.572882+02:00 2023-09-12 17:51:11.572882+02:00 \n",
- "14644 2023-09-12 18:59:29.995176+02:00 2023-09-12 18:59:29.995176+02:00 \n",
- "14645 2023-09-12 17:42:27.733260+02:00 2023-09-12 17:42:27.733260+02:00 \n",
- "14646 2023-09-12 17:42:28.724681+02:00 2023-09-12 17:42:28.724681+02:00 \n",
- "14647 2023-09-12 18:52:00.519838+02:00 2023-09-12 18:52:00.519838+02:00 \n",
- "\n",
- " category_id apply_price products_group_id product_pack_id \\\n",
- "0 4972 0.0 268108 1 \n",
- "1 4969 0.0 273901 1 \n",
- "2 4969 0.0 268121 1 \n",
- "3 4970 0.0 209706 1 \n",
- "4 4969 0.0 268109 1 \n",
- "... ... ... ... ... \n",
- "14643 4971 0.0 217695 1 \n",
- "14644 4969 0.0 268098 1 \n",
- "14645 4975 0.0 209971 1 \n",
- "14646 4972 0.0 211876 1 \n",
- "14647 4972 0.0 261709 1 \n",
- "\n",
- " extra_field amount_consumption identifier \n",
- "0 NaN NaN b823bbea3ba837da2ef8efaf1287272d \n",
- "1 NaN NaN 81e8b7991f6948e3ef7cfe5011d13532 \n",
- "2 NaN NaN be8bc0399db4d04aefa9f44afd4d5efa \n",
- "3 NaN NaN 01a9eea5f8ad53491faa864bfac44183 \n",
- "4 NaN NaN 781a917ecfdabb14169701d7b143bbe4 \n",
- "... ... ... ... \n",
- "14643 NaN NaN 82bba69321466069411b3023343b44a4 \n",
- "14644 NaN NaN eae56a8eb0a4315c5713b2053103d595 \n",
- "14645 NaN NaN 449f86c1ef2b478d3389f7d0e27d0e6b \n",
- "14646 NaN NaN 2090203e2c0b58ea8f505089faee6d62 \n",
- "14647 NaN NaN 9139ee36a92bed766ae95372cca77336 \n",
- "\n",
- "[14648 rows x 14 columns]"
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"products"
]
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"id": "a4aec5ce-d0c9-4625-bb29-9ac154818621",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'name', 'created_at', 'updated_at', 'street_id', 'fixed_capacity',\n",
- " 'identifier'],\n",
- " dtype='object')\n",
- "(1, 7)\n",
- "\n",
- "RangeIndex: 1 entries, 0 to 0\n",
- "Data columns (total 7 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 1 non-null int64 \n",
- " 1 name 0 non-null float64\n",
- " 2 created_at 1 non-null object \n",
- " 3 updated_at 1 non-null object \n",
- " 4 street_id 1 non-null int64 \n",
- " 5 fixed_capacity 0 non-null float64\n",
- " 6 identifier 1 non-null object \n",
- "dtypes: float64(2), int64(2), object(3)\n",
- "memory usage: 184.0+ bytes\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Lieu = facilities.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11facilities.csv'\n",
@@ -3779,105 +606,20 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": null,
"id": "b3642483-2879-442a-ad69-efcd2331a200",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " name | \n",
- " created_at | \n",
- " updated_at | \n",
- " street_id | \n",
- " fixed_capacity | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1054 | \n",
- " NaN | \n",
- " 2023-09-12 17:42:25.223064+02:00 | \n",
- " 2023-09-12 17:42:25.223064+02:00 | \n",
- " 1 | \n",
- " NaN | \n",
- " d41d8cd98f00b204e9800998ecf8427e | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " id name created_at \\\n",
- "0 1054 NaN 2023-09-12 17:42:25.223064+02:00 \n",
- "\n",
- " updated_at street_id fixed_capacity \\\n",
- "0 2023-09-12 17:42:25.223064+02:00 1 NaN \n",
- "\n",
- " identifier \n",
- "0 d41d8cd98f00b204e9800998ecf8427e "
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"facilities"
]
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": null,
"id": "da1e9807-2a8d-4be7-a785-55cffd734f36",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'name', 'created_at', 'updated_at', 'start_date_time',\n",
- " 'identifier'],\n",
- " dtype='object')\n",
- "(9, 6)\n",
- "\n",
- "RangeIndex: 9 entries, 0 to 8\n",
- "Data columns (total 6 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 9 non-null int64 \n",
- " 1 name 9 non-null object \n",
- " 2 created_at 9 non-null object \n",
- " 3 updated_at 9 non-null object \n",
- " 4 start_date_time 0 non-null float64\n",
- " 5 identifier 9 non-null object \n",
- "dtypes: float64(1), int64(1), object(4)\n",
- "memory usage: 560.0+ bytes\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Saisons = seasons.csv période sur deux années consécutives\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11seasons.csv'\n",
@@ -3892,59 +634,20 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"id": "ec8a37b5-2d78-4b1c-aa47-bd923fdc2ba9",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array(['saison 2023-2024', 'saison 2021-2022', 'saison 2015-2016',\n",
- " 'saison 2016-2017', 'saison 2017-2018', 'saison 2018-2019',\n",
- " 'saison 2020-2021', 'saison 2019-2020', 'saison 2022-2023'],\n",
- " dtype=object)"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"seasons['name'].unique()"
]
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"id": "abb3aa20-774b-4761-983a-df5eb2bc51c6",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['id', 'purchase_date', 'customer_id', 'created_at', 'updated_at',\n",
- " 'number', 'identifier'],\n",
- " dtype='object')\n",
- "(410695, 7)\n",
- "\n",
- "RangeIndex: 410695 entries, 0 to 410694\n",
- "Data columns (total 7 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 410695 non-null int64 \n",
- " 1 purchase_date 410695 non-null object \n",
- " 2 customer_id 410695 non-null int64 \n",
- " 3 created_at 410695 non-null object \n",
- " 4 updated_at 410695 non-null object \n",
- " 5 number 0 non-null float64\n",
- " 6 identifier 410695 non-null object \n",
- "dtypes: float64(1), int64(2), object(4)\n",
- "memory usage: 21.9+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Achats = purchases.csv \n",
"FILE_PATH_S3 = 'bdc2324-data/11/11purchases.csv'\n",
@@ -3959,204 +662,10 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": null,
"id": "30e204ab-4f63-430c-a818-5c8035b6e17b",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " purchase_date | \n",
- " customer_id | \n",
- " created_at | \n",
- " updated_at | \n",
- " number | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 861761 | \n",
- " 2019-03-01 16:28:49+01:00 | \n",
- " 4966 | \n",
- " 2023-09-12 17:42:37.564150+02:00 | \n",
- " 2023-09-12 17:42:37.564150+02:00 | \n",
- " NaN | \n",
- " d20eb0c3a7efec0bbe338dee40dc3378 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 861762 | \n",
- " 2019-03-01 16:29:11+01:00 | \n",
- " 4966 | \n",
- " 2023-09-12 17:42:37.571159+02:00 | \n",
- " 2023-09-12 17:42:37.571159+02:00 | \n",
- " NaN | \n",
- " cff3abfc018517bce5ccfc58f5cacf40 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 861763 | \n",
- " 2019-03-01 16:29:17+01:00 | \n",
- " 4966 | \n",
- " 2023-09-12 17:42:37.571646+02:00 | \n",
- " 2023-09-12 17:42:37.571646+02:00 | \n",
- " NaN | \n",
- " e1155cf26b34f792bdb23e49244d7264 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 861764 | \n",
- " 2019-03-01 16:29:19+01:00 | \n",
- " 4966 | \n",
- " 2023-09-12 17:42:37.572063+02:00 | \n",
- " 2023-09-12 17:42:37.572063+02:00 | \n",
- " NaN | \n",
- " e8b95cc6a1a8b103ffa39755ce3bfc4d | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 861765 | \n",
- " 2019-03-01 16:32:08+01:00 | \n",
- " 405994 | \n",
- " 2023-09-12 17:42:37.572470+02:00 | \n",
- " 2023-09-12 17:42:37.572470+02:00 | \n",
- " NaN | \n",
- " 1b763278914f1309e357abe5033a3f0f | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 410690 | \n",
- " 1285964 | \n",
- " 2023-10-21 21:46:41+02:00 | \n",
- " 517309 | \n",
- " 2023-10-23 03:43:16.457501+02:00 | \n",
- " 2023-10-23 03:43:16.457501+02:00 | \n",
- " NaN | \n",
- " 72c4e90c2b151dcffc87b19ea8a0c4f1 | \n",
- "
\n",
- " \n",
- " 410691 | \n",
- " 1285965 | \n",
- " 2023-10-21 21:47:07+02:00 | \n",
- " 517309 | \n",
- " 2023-10-23 03:43:16.458458+02:00 | \n",
- " 2023-10-23 03:43:16.458458+02:00 | \n",
- " NaN | \n",
- " ee65532087132145daa6154fbae050ea | \n",
- "
\n",
- " \n",
- " 410692 | \n",
- " 1285966 | \n",
- " 2023-10-21 21:47:20+02:00 | \n",
- " 517309 | \n",
- " 2023-10-23 03:43:16.458811+02:00 | \n",
- " 2023-10-23 03:43:16.458811+02:00 | \n",
- " NaN | \n",
- " 7e825dd352bc6a11ab81cb8068e325e6 | \n",
- "
\n",
- " \n",
- " 410693 | \n",
- " 1285967 | \n",
- " 2023-10-21 23:07:06+02:00 | \n",
- " 399969 | \n",
- " 2023-10-23 03:43:16.459738+02:00 | \n",
- " 2023-10-23 03:43:16.459738+02:00 | \n",
- " NaN | \n",
- " fdb92627a48d6ba8fa817d60a83dbea8 | \n",
- "
\n",
- " \n",
- " 410694 | \n",
- " 1285968 | \n",
- " 2023-10-21 23:07:39+02:00 | \n",
- " 399969 | \n",
- " 2023-10-23 03:43:16.462409+02:00 | \n",
- " 2023-10-23 03:43:16.462409+02:00 | \n",
- " NaN | \n",
- " e9dbaff4f7037a5b0efa11263584dfad | \n",
- "
\n",
- " \n",
- "
\n",
- "
410695 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " id purchase_date customer_id \\\n",
- "0 861761 2019-03-01 16:28:49+01:00 4966 \n",
- "1 861762 2019-03-01 16:29:11+01:00 4966 \n",
- "2 861763 2019-03-01 16:29:17+01:00 4966 \n",
- "3 861764 2019-03-01 16:29:19+01:00 4966 \n",
- "4 861765 2019-03-01 16:32:08+01:00 405994 \n",
- "... ... ... ... \n",
- "410690 1285964 2023-10-21 21:46:41+02:00 517309 \n",
- "410691 1285965 2023-10-21 21:47:07+02:00 517309 \n",
- "410692 1285966 2023-10-21 21:47:20+02:00 517309 \n",
- "410693 1285967 2023-10-21 23:07:06+02:00 399969 \n",
- "410694 1285968 2023-10-21 23:07:39+02:00 399969 \n",
- "\n",
- " created_at updated_at \\\n",
- "0 2023-09-12 17:42:37.564150+02:00 2023-09-12 17:42:37.564150+02:00 \n",
- "1 2023-09-12 17:42:37.571159+02:00 2023-09-12 17:42:37.571159+02:00 \n",
- "2 2023-09-12 17:42:37.571646+02:00 2023-09-12 17:42:37.571646+02:00 \n",
- "3 2023-09-12 17:42:37.572063+02:00 2023-09-12 17:42:37.572063+02:00 \n",
- "4 2023-09-12 17:42:37.572470+02:00 2023-09-12 17:42:37.572470+02:00 \n",
- "... ... ... \n",
- "410690 2023-10-23 03:43:16.457501+02:00 2023-10-23 03:43:16.457501+02:00 \n",
- "410691 2023-10-23 03:43:16.458458+02:00 2023-10-23 03:43:16.458458+02:00 \n",
- "410692 2023-10-23 03:43:16.458811+02:00 2023-10-23 03:43:16.458811+02:00 \n",
- "410693 2023-10-23 03:43:16.459738+02:00 2023-10-23 03:43:16.459738+02:00 \n",
- "410694 2023-10-23 03:43:16.462409+02:00 2023-10-23 03:43:16.462409+02:00 \n",
- "\n",
- " number identifier \n",
- "0 NaN d20eb0c3a7efec0bbe338dee40dc3378 \n",
- "1 NaN cff3abfc018517bce5ccfc58f5cacf40 \n",
- "2 NaN e1155cf26b34f792bdb23e49244d7264 \n",
- "3 NaN e8b95cc6a1a8b103ffa39755ce3bfc4d \n",
- "4 NaN 1b763278914f1309e357abe5033a3f0f \n",
- "... ... ... \n",
- "410690 NaN 72c4e90c2b151dcffc87b19ea8a0c4f1 \n",
- "410691 NaN ee65532087132145daa6154fbae050ea \n",
- "410692 NaN 7e825dd352bc6a11ab81cb8068e325e6 \n",
- "410693 NaN fdb92627a48d6ba8fa817d60a83dbea8 \n",
- "410694 NaN e9dbaff4f7037a5b0efa11263584dfad \n",
- "\n",
- "[410695 rows x 7 columns]"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"purchases"
]