Ajout et sauvegarde

2024-03-09 14:50:58 +00:00 · 2024-03-09 14:50:58 +00:00 · 11e2e86583
commit 11e2e86583
parent ced4747372
1 changed files with 93 additions and 108 deletions
--- a/1_Descriptive_Statistics_Museum.ipynb
+++ b/1_Descriptive_Statistics_Museum.ipynb
@ -93,16 +93,106 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 4,
   "id": "09daec01-9927-45c7-a6d4-9b9d0340ee02",
   "metadata": {},
   "outputs": [],
   "source": [
-    "companies = {'musee' : ['1', '2', '3', '4', '101'],\n",
+    "companies = {'musee' : ['1', '2', '3', '4'], # , '101'\n",
    "            'sport': ['5', '6', '7', '8', '9'],\n",
    "            'musique' : ['10', '11', '12', '13', '14']}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9ccb033-3c7a-4647-ae1a-3a439dec2ea1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File path :  projet-bdc2324-team1/0_Input/Company_1/customerplus_cleaned.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File path :  projet-bdc2324-team1/0_Input/Company_1/campaigns_information.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
     ]
    }
   ],
   "source": [
    "# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
    "\n",
    "# liste des compagnies de spectacle\n",
    "nb_compagnie= companies['musee']\n",
    "\n",
    "customer_sport = pd.DataFrame()\n",
    "campaigns_sport_brut = pd.DataFrame()\n",
    "campaigns_sport_kpi = pd.DataFrame()\n",
    "products_sport = pd.DataFrame()\n",
    "tickets_sport = pd.DataFrame()\n",
    "\n",
    "# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
    "for directory_path in nb_compagnie:\n",
    "    df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
    "    df_campaigns_brut = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
    "    df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
    "    df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
    "    \n",
    "    df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_brut) \n",
    "    df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
    "    df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
    "\n",
    "        \n",
    "# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
    "    df_tickets_kpi[\"number_company\"]=int(directory_path)\n",
    "    df_campaigns_brut[\"number_company\"]=int(directory_path)\n",
    "    df_campaigns_kpi[\"number_company\"]=int(directory_path)\n",
    "    df_customerplus_clean[\"number_company\"]=int(directory_path)\n",
    "    df_target_information[\"number_company\"]=int(directory_path)\n",
    "\n",
    "# Traitement des index\n",
    "    df_tickets_kpi[\"customer_id\"]= directory_path + '_' +  df_tickets_kpi['customer_id'].astype('str')\n",
    "    df_campaigns_brut[\"customer_id\"]= directory_path + '_' +  df_campaigns_brut['customer_id'].astype('str')\n",
    "    df_campaigns_kpi[\"customer_id\"]= directory_path + '_' +  df_campaigns_kpi['customer_id'].astype('str') \n",
    "    df_customerplus_clean[\"customer_id\"]= directory_path + '_' +  df_customerplus_clean['customer_id'].astype('str') \n",
    "    df_products_purchased_reduced[\"customer_id\"]= directory_path + '_' +  df_products_purchased_reduced['customer_id'].astype('str') \n",
    "\n",
    "# Concaténation\n",
    "    customer_sport = pd.concat([customer_sport, df_customerplus_clean], ignore_index=True)\n",
    "    campaigns_sport_kpi = pd.concat([campaigns_sport_kpi, df_campaigns_kpi], ignore_index=True)\n",
    "    campaigns_sport_brut = pd.concat([campaigns_sport_brut, df_campaigns_brut], ignore_index=True) \n",
    "    tickets_sport = pd.concat([tickets_sport, df_tickets_kpi], ignore_index=True)\n",
    "    products_sport = pd.concat([products_sport, df_products_purchased_reduced], ignore_index=True)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "105862bd-5d66-45ed-be71-ec6e1e103963",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "ae3c0c33-55a7-4a28-9a62-3ce13496917a",
@ -1247,115 +1337,10 @@
    "# 1 - Comportement d'achat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "8917cc1b-4728-460c-8432-a633de7f039b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File path :  projet-bdc2324-team1/0_Input/Company_1/products_purchased_reduced.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File path :  projet-bdc2324-team1/0_Input/Company_2/products_purchased_reduced.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File path :  projet-bdc2324-team1/0_Input/Company_3/products_purchased_reduced.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File path :  projet-bdc2324-team1/0_Input/Company_4/products_purchased_reduced.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
      "<string>:13: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File path :  projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File path :  projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced_1.csv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
     ]
    }
   ],
   "source": [
    "for company_number in ['1', '2', '3', '4', '101'] :\n",
    "    nom_dataframe = 'df'+ company_number +'_tickets'\n",
    "    globals()[nom_dataframe] = display_databases(company_number, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
    "\n",
    "    if company_number == \"101\" :\n",
    "        df101_tickets_1 = display_databases(company_number, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3479960c-0d23-45f1-8fff-d87395205731",
-   "metadata": {
+   "metadata": {},
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "## Outlier"
   ]