This commit is contained in:
Fanta RODRIGUE 2024-03-02 11:16:24 +00:00
parent 3d4e661be9
commit a3016ce78e

View File

@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e",
"metadata": {},
"outputs": [],
@ -29,7 +29,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"id": "9c1737a2-bad8-4266-8dec-452085d8cfe7",
"metadata": {},
"outputs": [
@ -42,7 +42,7 @@
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -58,7 +58,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2",
"metadata": {},
"outputs": [],
@ -72,7 +72,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 5,
"id": "40b705eb-fd18-436b-b150-61611a3c6a84",
"metadata": {},
"outputs": [],
@ -89,13 +89,300 @@
" return df \n"
]
},
{
"cell_type": "markdown",
"id": "e22eb500-80da-4dd9-8b20-9e4deec64831",
"metadata": {},
"source": [
"nb_compagnie=['10','11','12','13','14']\n",
"for directory_path in nb_compagnie:\n",
" df_customerplus_clean = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
" df_customerplus_clean[\"Number_compagnie\"]=int(directory_path)\n",
" df_campaigns_information[\"Number_compagnie\"]=int(directory_path)\n",
" df_products_purchased_reduced[\"Number_compagnie\"]=int(directory_path)\n",
"\n",
" if nb_compagnie.index(directory_path)>=1:\n",
" customerplus_clean_spectacle=pd.concat([customerplus_clean_spectacle,df_customerplus_clean],axis=0)\n",
" campaigns_information_spectacle=pd.concat([campaigns_information_spectacle,df_campaigns_information],axis=0)\n",
" products_purchased_reduced_spectacle=pd.concat([products_purchased_reduced_spectacle,df_products_purchased_reduced],axis=0)\n",
" else:\n",
" customerplus_clean_spectacle=df_customerplus_clean\n",
" campaigns_information_spectacle=df_campaigns_information\n",
" products_purchased_reduced_spectacle=df_products_purchased_reduced\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e56aa16f-a167-4bff-9f8b-f764d1f28ebd",
"execution_count": 6,
"id": "1c4a07ec-cf8e-420e-88f0-5eb4b83e2bc1",
"metadata": {},
"outputs": [],
"source": []
"source": [
"directory_path='10'"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4249cdcd-ef59-4dd6-a345-4218bb90e526",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1282/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1282/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1282/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
}
],
"source": [
" # Import customerplus\n",
"df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
"df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
"df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e56aa16f-a167-4bff-9f8b-f764d1f28ebd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
}
],
"source": [
" # Creation des KPI\n",
" # KPI sur les campagnes publicitaires\n",
"df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
"\n",
" # KPI sur le comportement d'achat\n",
"df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
"\n",
" # KPI sur les données socio-démographiques\n",
"df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
"\n",
"#creation de la colonne Number compagnie\n",
"df_tickets_kpi[\"Number_compagnie\"]=int(directory_path)\n",
"df_campaigns_kpi[\"Number_compagnie\"]=int(directory_path)\n",
"df_customerplus_clean[\"Number_compagnie\"]=int(directory_path)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "838722c3-4c78-4ffa-a6b6-01ac60f4bdbd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>Number_compagnie</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>29</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>37</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>39</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:16:38</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>41</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:12:29</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>44</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57138</th>\n",
" <td>827940</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57139</th>\n",
" <td>827941</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57140</th>\n",
" <td>827942</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57141</th>\n",
" <td>827943</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57142</th>\n",
" <td>827944</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>57143 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
"0 29 4 0.0 NaT \n",
"1 37 3 0.0 NaT \n",
"2 39 4 1.0 0 days 05:16:38 \n",
"3 41 4 1.0 0 days 01:12:29 \n",
"4 44 4 0.0 NaT \n",
"... ... ... ... ... \n",
"57138 827940 1 0.0 NaT \n",
"57139 827941 1 0.0 NaT \n",
"57140 827942 1 0.0 NaT \n",
"57141 827943 1 0.0 NaT \n",
"57142 827944 1 0.0 NaT \n",
"\n",
" Number_compagnie \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"... ... \n",
"57138 10 \n",
"57139 10 \n",
"57140 10 \n",
"57141 10 \n",
"57142 10 \n",
"\n",
"[57143 rows x 5 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_campaigns_kpi"
]
}
],
"metadata": {