{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "0eefb67b-5399-44fa-9c1c-7724ec1c7cd2", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import s3fs\n", "import warnings\n", "from datetime import date, timedelta, datetime\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "id": "37977b4e-42e7-4d8e-8b9a-6843292fd128", "metadata": {}, "outputs": [], "source": [ "# Import KPI construction functions\n", "exec(open('0_KPI_functions.py').read())\n", "# exec(open('../0_KPI_functions.py').read())\n" ] }, { "cell_type": "code", "execution_count": 42, "id": "cca62d72-f809-41a9-bb06-1be7d6b09307", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", "\n", "BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n", "fs.ls(BUCKET)" ] }, { "cell_type": "code", "execution_count": 49, "id": "68fb54f3-8eb3-4cd0-966b-000876912fb5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idstreet_idstructure_idmcp_contact_idfidelitytenant_idis_partnerdeleted_atgenderis_email_true...max_priceticket_sumaverage_priceaverage_purchase_delayaverage_price_basketaverage_ticket_baskettotal_pricepurchase_countfirst_buying_datecountry
0821538139NaNNaN0875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
18091261063NaNNaN0875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNfr
2110051063NaNNaN0875FalseNaN2False...NaN00.0NaNNaNNaNNaN14NaNfr
31766312731NaNNaN0875FalseNaN0False...NaN00.0NaNNaNNaNNaN1NaNfr
43810012395NaNNaN0875FalseNaN0True...NaN00.0NaNNaNNaNNaN1NaNfr
..................................................................
98789766266139NaN181304.00875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
98790766336139NaN178189.00875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
98791766348139NaN178141.00875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
98792766363139NaN176807.00875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
98793766366139NaN176788.00875FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
\n", "

98794 rows × 22 columns

\n", "
" ], "text/plain": [ " customer_id street_id structure_id mcp_contact_id fidelity \\\n", "0 821538 139 NaN NaN 0 \n", "1 809126 1063 NaN NaN 0 \n", "2 11005 1063 NaN NaN 0 \n", "3 17663 12731 NaN NaN 0 \n", "4 38100 12395 NaN NaN 0 \n", "... ... ... ... ... ... \n", "98789 766266 139 NaN 181304.0 0 \n", "98790 766336 139 NaN 178189.0 0 \n", "98791 766348 139 NaN 178141.0 0 \n", "98792 766363 139 NaN 176807.0 0 \n", "98793 766366 139 NaN 176788.0 0 \n", "\n", " tenant_id is_partner deleted_at gender is_email_true ... \\\n", "0 875 False NaN 2 True ... \n", "1 875 False NaN 2 True ... \n", "2 875 False NaN 2 False ... \n", "3 875 False NaN 0 False ... \n", "4 875 False NaN 0 True ... \n", "... ... ... ... ... ... ... \n", "98789 875 False NaN 2 True ... \n", "98790 875 False NaN 2 True ... \n", "98791 875 False NaN 2 True ... \n", "98792 875 False NaN 2 True ... \n", "98793 875 False NaN 2 True ... \n", "\n", " max_price ticket_sum average_price average_purchase_delay \\\n", "0 NaN 0 NaN NaN \n", "1 NaN 0 NaN NaN \n", "2 NaN 0 0.0 NaN \n", "3 NaN 0 0.0 NaN \n", "4 NaN 0 0.0 NaN \n", "... ... ... ... ... \n", "98789 NaN 0 NaN NaN \n", "98790 NaN 0 NaN NaN \n", "98791 NaN 0 NaN NaN \n", "98792 NaN 0 NaN NaN \n", "98793 NaN 0 NaN NaN \n", "\n", " average_price_basket average_ticket_basket total_price \\\n", "0 NaN NaN 0.0 \n", "1 NaN NaN 0.0 \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "... ... ... ... \n", "98789 NaN NaN 0.0 \n", "98790 NaN NaN 0.0 \n", "98791 NaN NaN 0.0 \n", "98792 NaN NaN 0.0 \n", "98793 NaN NaN 0.0 \n", "\n", " purchase_count first_buying_date country \n", "0 0 NaN NaN \n", "1 0 NaN fr \n", "2 14 NaN fr \n", "3 1 NaN fr \n", "4 1 NaN fr \n", "... ... ... ... \n", "98789 0 NaN NaN \n", "98790 0 NaN NaN \n", "98791 0 NaN NaN \n", "98792 0 NaN NaN \n", "98793 0 NaN NaN \n", "\n", "[98794 rows x 22 columns]" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "customerplus_cleaned" ] }, { "cell_type": "code", "execution_count": 56, "id": "0e1ce56c-2e50-456c-ba97-ed4a699cc8d4", "metadata": {}, "outputs": [], "source": [ "BUCKET = \"projet-bdc2324-team1\"\n", "FILE_KEY_S3 = \"0_Input/Company_10/customerplus_cleaned.csv\"\n", "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", "\n", "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", " df_customerplus_cleaned = pd.read_csv(file_in, sep=\",\")\n", " \n" ] }, { "cell_type": "code", "execution_count": 50, "id": "bcdba447-90f7-450c-b4a3-6da656e38493", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_438/3710670046.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", " purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n" ] } ], "source": [ "BUCKET = \"projet-bdc2324-team1\"\n", "FILE_KEY_S3 = \"0_Input/Company_10/products_purchased_reduced.csv\"\n", "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", "\n", "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", " purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n", " \n" ] }, { "cell_type": "code", "execution_count": 44, "id": "637aa400-f49a-4d8d-802a-868b241f8a9d", "metadata": {}, "outputs": [], "source": [ "\n", "dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n", "for nom_base in dic_base:\n", " FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n", " with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n", " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" ] }, { "cell_type": "code", "execution_count": 45, "id": "e60529b5-986f-4685-91e1-782c2b022e09", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcustomer_idtarget_nametarget_type_is_importtarget_type_name
01165098618562Newsletter mensuelleFalsemanual_static_filter
11165100618559Newsletter mensuelleFalsemanual_static_filter
21165101618561Newsletter mensuelleFalsemanual_static_filter
31165102618560Newsletter mensuelleFalsemanual_static_filter
41165103618558Newsletter mensuelleFalsemanual_static_filter
..................
69253169815818580Newsletter mensuelleFalsemanual_static_filter
69254169815918569Newsletter mensuelleFalsemanual_static_filter
6925516981602962Newsletter mensuelleFalsemanual_static_filter
6925616981613825Newsletter mensuelleFalsemanual_static_filter
6925716981625731Newsletter mensuelleFalsemanual_static_filter
\n", "

69258 rows × 5 columns

\n", "
" ], "text/plain": [ " id customer_id target_name target_type_is_import \\\n", "0 1165098 618562 Newsletter mensuelle False \n", "1 1165100 618559 Newsletter mensuelle False \n", "2 1165101 618561 Newsletter mensuelle False \n", "3 1165102 618560 Newsletter mensuelle False \n", "4 1165103 618558 Newsletter mensuelle False \n", "... ... ... ... ... \n", "69253 1698158 18580 Newsletter mensuelle False \n", "69254 1698159 18569 Newsletter mensuelle False \n", "69255 1698160 2962 Newsletter mensuelle False \n", "69256 1698161 3825 Newsletter mensuelle False \n", "69257 1698162 5731 Newsletter mensuelle False \n", "\n", " target_type_name \n", "0 manual_static_filter \n", "1 manual_static_filter \n", "2 manual_static_filter \n", "3 manual_static_filter \n", "4 manual_static_filter \n", "... ... \n", "69253 manual_static_filter \n", "69254 manual_static_filter \n", "69255 manual_static_filter \n", "69256 manual_static_filter \n", "69257 manual_static_filter \n", "\n", "[69258 rows x 5 columns]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_information" ] }, { "cell_type": "code", "execution_count": 46, "id": "6ece1bb3-5a2d-41f8-be96-eb70697881dc", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":27: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idnb_campaignsnb_campaigns_openedtime_to_open
0294NaNNaT
1373NaNNaT
23941.00 days 05:16:38
34141.00 days 01:12:29
4444NaNNaT
...............
571388279401NaNNaT
571398279411NaNNaT
571408279421NaNNaT
571418279431NaNNaT
571428279441NaNNaT
\n", "

57143 rows × 4 columns

\n", "
" ], "text/plain": [ " customer_id nb_campaigns nb_campaigns_opened time_to_open\n", "0 29 4 NaN NaT\n", "1 37 3 NaN NaT\n", "2 39 4 1.0 0 days 05:16:38\n", "3 41 4 1.0 0 days 01:12:29\n", "4 44 4 NaN NaT\n", "... ... ... ... ...\n", "57138 827940 1 NaN NaT\n", "57139 827941 1 NaN NaT\n", "57140 827942 1 NaN NaT\n", "57141 827943 1 NaN NaT\n", "57142 827944 1 NaN NaT\n", "\n", "[57143 rows x 4 columns]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "campaigns_kpi_function(campaigns)" ] }, { "cell_type": "code", "execution_count": 52, "id": "8c42f4a3-bdbc-44fe-a873-3192b983410d", "metadata": {}, "outputs": [], "source": [ "# KPI sur le comportement d'achat\n", "df_tickets_kpi = tickets_kpi_function(purchases)" ] }, { "cell_type": "code", "execution_count": 53, "id": "df124880-1e4f-4eaf-b0ef-72bb4f840d45", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idnb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internet
0194828829872.0212643.092500718.1493981924.9431028.0
1194843262.0101745.0217361743.0450351.9767010.0
219485131211878.0212649.04474585.2408452563.80390084.0
31948610496.0101944.0776041742.794225201.2833800.0
4194872133.0101742.8777661742.8777660.0000000.0
.................................
2610082487711-12.0105.9561115.9561110.0000000.0
261018248781112.0105.9569215.9569210.0000000.0
2610282487921-38.0105.2262385.2262380.0000000.0
26103824991143-100.0103.0215393.0172220.0043170.0
261048249981125.0100.0727200.0727200.0000000.0
\n", "

26105 rows × 10 columns

\n", "
" ], "text/plain": [ " customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n", "0 19482 88 29 872.0 2 \n", "1 19484 3 2 62.0 1 \n", "2 19485 131 21 1878.0 2 \n", "3 19486 10 4 96.0 1 \n", "4 19487 2 1 33.0 1 \n", "... ... ... ... ... ... \n", "26100 824877 1 1 -12.0 1 \n", "26101 824878 1 1 12.0 1 \n", "26102 824879 2 1 -38.0 1 \n", "26103 824991 14 3 -100.0 1 \n", "26104 824998 1 1 25.0 1 \n", "\n", " vente_internet_max purchase_date_min purchase_date_max \\\n", "0 1 2643.092500 718.149398 \n", "1 0 1745.021736 1743.045035 \n", "2 1 2649.044745 85.240845 \n", "3 0 1944.077604 1742.794225 \n", "4 0 1742.877766 1742.877766 \n", "... ... ... ... \n", "26100 0 5.956111 5.956111 \n", "26101 0 5.956921 5.956921 \n", "26102 0 5.226238 5.226238 \n", "26103 0 3.021539 3.017222 \n", "26104 0 0.072720 0.072720 \n", "\n", " time_between_purchase nb_tickets_internet \n", "0 1924.943102 8.0 \n", "1 1.976701 0.0 \n", "2 2563.803900 84.0 \n", "3 201.283380 0.0 \n", "4 0.000000 0.0 \n", "... ... ... \n", "26100 0.000000 0.0 \n", "26101 0.000000 0.0 \n", "26102 0.000000 0.0 \n", "26103 0.004317 0.0 \n", "26104 0.000000 0.0 \n", "\n", "[26105 rows x 10 columns]" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_tickets_kpi" ] }, { "cell_type": "code", "execution_count": 57, "id": "4e8c0d75-117f-4400-8d55-b3ae3f43501b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idstreet_idstructure_idmcp_contact_idfidelitytenant_idis_partnerdeleted_atgenderis_email_true...total_pricepurchase_countfirst_buying_datecountrygender_labelgender_femalegender_malegender_othercountry_frhas_tags
0821538139NaNNaN0875FalseNaN2True...0.00NaNNaNother001NaN0
18091261063NaNNaN0875FalseNaN2True...0.00NaNfrother0011.00
2110051063NaNNaN0875FalseNaN2False...NaN14NaNfrother0011.00
31766312731NaNNaN0875FalseNaN0False...NaN1NaNfrfemale1001.00
43810012395NaNNaN0875FalseNaN0True...NaN1NaNfrfemale1001.00
..................................................................
98789766266139NaN181304.00875FalseNaN2True...0.00NaNNaNother001NaN0
98790766336139NaN178189.00875FalseNaN2True...0.00NaNNaNother001NaN0
98791766348139NaN178141.00875FalseNaN2True...0.00NaNNaNother001NaN0
98792766363139NaN176807.00875FalseNaN2True...0.00NaNNaNother001NaN0
98793766366139NaN176788.00875FalseNaN2True...0.00NaNNaNother001NaN0
\n", "

98794 rows × 28 columns

\n", "
" ], "text/plain": [ " customer_id street_id structure_id mcp_contact_id fidelity \\\n", "0 821538 139 NaN NaN 0 \n", "1 809126 1063 NaN NaN 0 \n", "2 11005 1063 NaN NaN 0 \n", "3 17663 12731 NaN NaN 0 \n", "4 38100 12395 NaN NaN 0 \n", "... ... ... ... ... ... \n", "98789 766266 139 NaN 181304.0 0 \n", "98790 766336 139 NaN 178189.0 0 \n", "98791 766348 139 NaN 178141.0 0 \n", "98792 766363 139 NaN 176807.0 0 \n", "98793 766366 139 NaN 176788.0 0 \n", "\n", " tenant_id is_partner deleted_at gender is_email_true ... \\\n", "0 875 False NaN 2 True ... \n", "1 875 False NaN 2 True ... \n", "2 875 False NaN 2 False ... \n", "3 875 False NaN 0 False ... \n", "4 875 False NaN 0 True ... \n", "... ... ... ... ... ... ... \n", "98789 875 False NaN 2 True ... \n", "98790 875 False NaN 2 True ... \n", "98791 875 False NaN 2 True ... \n", "98792 875 False NaN 2 True ... \n", "98793 875 False NaN 2 True ... \n", "\n", " total_price purchase_count first_buying_date country gender_label \\\n", "0 0.0 0 NaN NaN other \n", "1 0.0 0 NaN fr other \n", "2 NaN 14 NaN fr other \n", "3 NaN 1 NaN fr female \n", "4 NaN 1 NaN fr female \n", "... ... ... ... ... ... \n", "98789 0.0 0 NaN NaN other \n", "98790 0.0 0 NaN NaN other \n", "98791 0.0 0 NaN NaN other \n", "98792 0.0 0 NaN NaN other \n", "98793 0.0 0 NaN NaN other \n", "\n", " gender_female gender_male gender_other country_fr has_tags \n", "0 0 0 1 NaN 0 \n", "1 0 0 1 1.0 0 \n", "2 0 0 1 1.0 0 \n", "3 1 0 0 1.0 0 \n", "4 1 0 0 1.0 0 \n", "... ... ... ... ... ... \n", "98789 0 0 1 NaN 0 \n", "98790 0 0 1 NaN 0 \n", "98791 0 0 1 NaN 0 \n", "98792 0 0 1 NaN 0 \n", "98793 0 0 1 NaN 0 \n", "\n", "[98794 rows x 28 columns]" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ " # KPI sur les données socio-démographiques\n", "df_customerplus_clean = customerplus_kpi_function(df_customerplus_cleaned)\n", " \n", "df_customerplus_clean" ] }, { "cell_type": "code", "execution_count": 29, "id": "484979cc-d4a4-4d9d-9701-71a4f353a372", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_438/1359829443.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", " campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = [\"opened_at\", \"sent_at\", \"delivered_at\"], date_parser=custom_date_parser)\n" ] } ], "source": [ "BUCKET = \"projet-bdc2324-team1\"\n", "FILE_KEY_S3 = \"0_Input/Company_10/campaigns_information.csv\"\n", "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", "\n", "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", " campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = [\"opened_at\", \"sent_at\", \"delivered_at\"], date_parser=custom_date_parser)\n", " \n" ] }, { "cell_type": "code", "execution_count": 32, "id": "553ca2e7-ead4-4508-8247-fcc602abd249", "metadata": {}, "outputs": [], "source": [ "BUCKET = \"projet-bdc2324-team1\"\n", "FILE_KEY_S3 = \"0_Input/Company_10/target_information.csv\"\n", "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", "\n", "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", " targets = pd.read_csv(file_in, sep=\",\")\n", " \n" ] }, { "cell_type": "code", "execution_count": 33, "id": "17b89ca1-deea-4139-a6c0-7822cc4e7a90", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcustomer_idtarget_nametarget_type_is_importtarget_type_name
01165098618562Newsletter mensuelleFalsemanual_static_filter
11165100618559Newsletter mensuelleFalsemanual_static_filter
21165101618561Newsletter mensuelleFalsemanual_static_filter
31165102618560Newsletter mensuelleFalsemanual_static_filter
41165103618558Newsletter mensuelleFalsemanual_static_filter
..................
69253169815818580Newsletter mensuelleFalsemanual_static_filter
69254169815918569Newsletter mensuelleFalsemanual_static_filter
6925516981602962Newsletter mensuelleFalsemanual_static_filter
6925616981613825Newsletter mensuelleFalsemanual_static_filter
6925716981625731Newsletter mensuelleFalsemanual_static_filter
\n", "

69258 rows × 5 columns

\n", "
" ], "text/plain": [ " id customer_id target_name target_type_is_import \\\n", "0 1165098 618562 Newsletter mensuelle False \n", "1 1165100 618559 Newsletter mensuelle False \n", "2 1165101 618561 Newsletter mensuelle False \n", "3 1165102 618560 Newsletter mensuelle False \n", "4 1165103 618558 Newsletter mensuelle False \n", "... ... ... ... ... \n", "69253 1698158 18580 Newsletter mensuelle False \n", "69254 1698159 18569 Newsletter mensuelle False \n", "69255 1698160 2962 Newsletter mensuelle False \n", "69256 1698161 3825 Newsletter mensuelle False \n", "69257 1698162 5731 Newsletter mensuelle False \n", "\n", " target_type_name \n", "0 manual_static_filter \n", "1 manual_static_filter \n", "2 manual_static_filter \n", "3 manual_static_filter \n", "4 manual_static_filter \n", "... ... \n", "69253 manual_static_filter \n", "69254 manual_static_filter \n", "69255 manual_static_filter \n", "69256 manual_static_filter \n", "69257 manual_static_filter \n", "\n", "[69258 rows x 5 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "targets" ] }, { "cell_type": "code", "execution_count": 40, "id": "c90d94ab-cf0e-4d18-9d5e-cb1d22f4d58b", "metadata": {}, "outputs": [ { "ename": "SyntaxError", "evalue": "f-string: expecting '}' (1665996669.py, line 1)", "output_type": "error", "traceback": [ "\u001b[0;36m Cell \u001b[0;32mIn[40], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{'musee'}'\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m f-string: expecting '}'\n" ] } ], "source": [ "BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{'musee'}'" ] }, { "cell_type": "code", "execution_count": null, "id": "d6767ba6-94ef-43f9-8f67-15ecdb41a70b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }