Ajout et sauvegarde

This commit is contained in:
Antoine JOUBREL 2024-03-09 14:50:58 +00:00
parent ced4747372
commit 11e2e86583

View File

@ -93,16 +93,106 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 56, "execution_count": 4,
"id": "09daec01-9927-45c7-a6d4-9b9d0340ee02", "id": "09daec01-9927-45c7-a6d4-9b9d0340ee02",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"companies = {'musee' : ['1', '2', '3', '4', '101'],\n", "companies = {'musee' : ['1', '2', '3', '4'], # , '101'\n",
" 'sport': ['5', '6', '7', '8', '9'],\n", " 'sport': ['5', '6', '7', '8', '9'],\n",
" 'musique' : ['10', '11', '12', '13', '14']}" " 'musique' : ['10', '11', '12', '13', '14']}"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "d9ccb033-3c7a-4647-ae1a-3a439dec2ea1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_1/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_1/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
}
],
"source": [
"# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
"\n",
"# liste des compagnies de spectacle\n",
"nb_compagnie= companies['musee']\n",
"\n",
"customer_sport = pd.DataFrame()\n",
"campaigns_sport_brut = pd.DataFrame()\n",
"campaigns_sport_kpi = pd.DataFrame()\n",
"products_sport = pd.DataFrame()\n",
"tickets_sport = pd.DataFrame()\n",
"\n",
"# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
"for directory_path in nb_compagnie:\n",
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
" df_campaigns_brut = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
" df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
" \n",
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_brut) \n",
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
"\n",
" \n",
"# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
" df_tickets_kpi[\"number_company\"]=int(directory_path)\n",
" df_campaigns_brut[\"number_company\"]=int(directory_path)\n",
" df_campaigns_kpi[\"number_company\"]=int(directory_path)\n",
" df_customerplus_clean[\"number_company\"]=int(directory_path)\n",
" df_target_information[\"number_company\"]=int(directory_path)\n",
"\n",
"# Traitement des index\n",
" df_tickets_kpi[\"customer_id\"]= directory_path + '_' + df_tickets_kpi['customer_id'].astype('str')\n",
" df_campaigns_brut[\"customer_id\"]= directory_path + '_' + df_campaigns_brut['customer_id'].astype('str')\n",
" df_campaigns_kpi[\"customer_id\"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str') \n",
" df_customerplus_clean[\"customer_id\"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str') \n",
" df_products_purchased_reduced[\"customer_id\"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str') \n",
"\n",
"# Concaténation\n",
" customer_sport = pd.concat([customer_sport, df_customerplus_clean], ignore_index=True)\n",
" campaigns_sport_kpi = pd.concat([campaigns_sport_kpi, df_campaigns_kpi], ignore_index=True)\n",
" campaigns_sport_brut = pd.concat([campaigns_sport_brut, df_campaigns_brut], ignore_index=True) \n",
" tickets_sport = pd.concat([tickets_sport, df_tickets_kpi], ignore_index=True)\n",
" products_sport = pd.concat([products_sport, df_products_purchased_reduced], ignore_index=True)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "105862bd-5d66-45ed-be71-ec6e1e103963",
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "ae3c0c33-55a7-4a28-9a62-3ce13496917a", "id": "ae3c0c33-55a7-4a28-9a62-3ce13496917a",
@ -1247,115 +1337,10 @@
"# 1 - Comportement d'achat" "# 1 - Comportement d'achat"
] ]
}, },
{
"cell_type": "code",
"execution_count": 34,
"id": "8917cc1b-4728-460c-8432-a633de7f039b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_1/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_2/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_3/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_4/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:13: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced_1.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
}
],
"source": [
"for company_number in ['1', '2', '3', '4', '101'] :\n",
" nom_dataframe = 'df'+ company_number +'_tickets'\n",
" globals()[nom_dataframe] = display_databases(company_number, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
"\n",
" if company_number == \"101\" :\n",
" df101_tickets_1 = display_databases(company_number, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
"\n",
" "
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "3479960c-0d23-45f1-8fff-d87395205731", "id": "3479960c-0d23-45f1-8fff-d87395205731",
"metadata": { "metadata": {},
"jp-MarkdownHeadingCollapsed": true
},
"source": [ "source": [
"## Outlier" "## Outlier"
] ]