2721 lines
		
	
	
		
			97 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			2721 lines
		
	
	
		
			97 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
{
 | 
						||
 "cells": [
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "ad414c84-be46-4d2c-be8b-9fc4d24cc672",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "# Business Data Challenge - Team 1"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 1,
 | 
						||
   "id": "15103481-8d74-404c-aa09-7601fe7730da",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "import pandas as pd\n",
 | 
						||
    "import numpy as np\n",
 | 
						||
    "import os\n",
 | 
						||
    "import s3fs\n",
 | 
						||
    "import re"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "ee97665c-39af-4c1c-a62b-c9c79feae18f",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "Configuration de l'accès aux données"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 2,
 | 
						||
   "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# Create filesystem object\n",
 | 
						||
    "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},key ='WKTGH4YGUBAT3TR0OSUR', secret = 'g8ozi6ZUrBy8DzaAip4F7zOizbr4DKf4RgYNseqU', token = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJXS1RHSDRZR1VCQVQzVFIwT1NVUiIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiaHR0cHM6Ly9vbnl4aWEubGFiLmdyb3VwZS1nZW5lcy5mciJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTcwNzU4NjUwMCwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJhbnRvaW5lLmpvdWJyZWxAZW5zYWUuZnIiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZXhwIjoxNzA3NjczMDQ3LCJmYW1pbHlfbmFtZSI6IkpPVUJSRUwiLCJnaXZlbl9uYW1lIjoiQW50b2luZSIsImdyb3VwcyI6WyJiZGMyMzI0LXRlYW0xIl0sImlhdCI6MTcwNzU4NjY0NywiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiI1MjQ2MDZmMS1lYWM3LTQxZDgtYTEzMy04MGZjMDk0MGVlNzEiLCJuYW1lIjoiQW50b2luZSBKT1VCUkVMIiwicG9saWN5Ijoic3Rzb25seSIsInByZWZlcnJlZF91c2VybmFtZSI6ImFqb3VicmVsLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNlc3Npb25fc3RhdGUiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzaWQiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzdWIiOiIwNWYwZDk3Mi1jNWM4LTQyNmYtODAwZC00NmQ0OGU4NjkwMzUiLCJ0eXAiOiJCZWFyZXIifQ.-imw-N4bk1uCcQGobkxhsRoeBAqxC9rT7PifElbC7ODOStnwIulc7HRR2fmtiqI2PdyrfnVvzfmIPK1g056HbA')"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "9cbd72c5-6f8e-4366-ab66-96c32c6e963a",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "# Exemple sur Company 1"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "db26e59a-927c-407e-b54b-1815473b0b34",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Chargement données"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 3,
 | 
						||
   "id": "699664b9-eee4-4f8d-a207-e524526560c5",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "BUCKET = \"bdc2324-data/1\"\n",
 | 
						||
    "liste_database = fs.ls(BUCKET)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 4,
 | 
						||
   "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "/tmp/ipykernel_42764/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
 | 
						||
      "  df = pd.read_csv(file_in)\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "# loop to create dataframes from liste\n",
 | 
						||
    "files_path = liste_database\n",
 | 
						||
    "\n",
 | 
						||
    "client_number = files_path[0].split(\"/\")[1]\n",
 | 
						||
    "df_prefix = \"df\" + str(client_number) + \"_\"\n",
 | 
						||
    "\n",
 | 
						||
    "for i in range(len(files_path)) :\n",
 | 
						||
    "    current_path = files_path[i]\n",
 | 
						||
    "    with fs.open(current_path, mode=\"rb\") as file_in:\n",
 | 
						||
    "        df = pd.read_csv(file_in)\n",
 | 
						||
    "        # the pattern of the name is df1xxx\n",
 | 
						||
    "        nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
 | 
						||
    "        globals()[nom_dataframe] = df"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "4004c8bf-11d9-413d-bb42-2cb8ddde7716",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Cleaning functions"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 5,
 | 
						||
   "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def cleaning_date(df, column_name):\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n",
 | 
						||
    "\n",
 | 
						||
    "    Parameters:\n",
 | 
						||
    "    - df: DataFrame\n",
 | 
						||
    "        Le DataFrame contenant la colonne à nettoyer.\n",
 | 
						||
    "    - column_name: str\n",
 | 
						||
    "        Le nom de la colonne à nettoyer.\n",
 | 
						||
    "\n",
 | 
						||
    "    Returns:\n",
 | 
						||
    "    - DataFrame\n",
 | 
						||
    "        Le DataFrame modifié avec la colonne nettoyée.\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
 | 
						||
    "    return df"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "398804d8-2225-4fd3-bceb-75ab1588e359",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Preprocessing"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "568cb180-0dd9-4b27-aecb-05e4c3775ba6",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## customer_plus"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 6,
 | 
						||
   "id": "7e7b90ce-da54-4f00-bc34-64c543b0858f",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def preprocessing_customerplus(customerplus = None):\n",
 | 
						||
    "\n",
 | 
						||
    "    customerplus_copy = customerplus.copy()\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Passage en format date\n",
 | 
						||
    "    cleaning_date(customerplus_copy, 'first_buying_date')\n",
 | 
						||
    "    cleaning_date(customerplus_copy, 'last_visiting_date')\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Selection des variables\n",
 | 
						||
    "    customerplus_copy.drop(['lastname', 'firstname', 'email', 'civility', 'note', 'created_at', 'updated_at', 'deleted_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode', 'last_visiting_date'], axis = 1, inplace=True)\n",
 | 
						||
    "    customerplus_copy.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
 | 
						||
    "\n",
 | 
						||
    "    return customerplus_copy\n"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 7,
 | 
						||
   "id": "03329e32-00a5-42c8-9470-75f7b6216ccd",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "df1_customerplus_clean = preprocessing_customerplus(df1_customersplus)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "bade04b1-0cdf-4d10-bcca-7dc7e4831656",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Ticket area"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 8,
 | 
						||
   "id": "b95464b1-26bc-4aac-84b4-45da83b92251",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# Fonction de nettoyage et selection\n",
 | 
						||
    "def preprocessing_tickets_area(tickets = None, purchases = None, suppliers = None, type_ofs = None):\n",
 | 
						||
    "    # Base des tickets\n",
 | 
						||
    "    tickets = tickets[['id', 'purchase_id', 'product_id', 'is_from_subscription', 'type_of', 'supplier_id']]\n",
 | 
						||
    "    tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
 | 
						||
    "\n",
 | 
						||
    "    # Base des fournisseurs\n",
 | 
						||
    "    suppliers = suppliers[['id', 'name']]\n",
 | 
						||
    "    suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
 | 
						||
    "    suppliers['supplier_name'] = suppliers['supplier_name'].fillna('')\n",
 | 
						||
    "\n",
 | 
						||
    "    # Base des types de billets\n",
 | 
						||
    "    type_ofs = type_ofs[['id', 'name', 'children']]\n",
 | 
						||
    "    type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n",
 | 
						||
    "\n",
 | 
						||
    "    # Base des achats\n",
 | 
						||
    "    # Nettoyage de la date d'achat\n",
 | 
						||
    "    cleaning_date(purchases, 'purchase_date')\n",
 | 
						||
    "    # Selection des variables\n",
 | 
						||
    "    purchases = purchases[['id', 'purchase_date', 'customer_id']]\n",
 | 
						||
    "\n",
 | 
						||
    "    # Fusions \n",
 | 
						||
    "    # Fusion avec fournisseurs\n",
 | 
						||
    "    ticket_information = pd.merge(tickets, suppliers, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
 | 
						||
    "    ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Fusion avec type de tickets\n",
 | 
						||
    "    ticket_information = pd.merge(ticket_information, type_ofs, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
 | 
						||
    "    ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Fusion avec achats\n",
 | 
						||
    "    ticket_information = pd.merge(ticket_information, purchases, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
 | 
						||
    "    ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)\n",
 | 
						||
    "\n",
 | 
						||
    "    return ticket_information"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 9,
 | 
						||
   "id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "/tmp/ipykernel_42764/3092893564.py:5: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
 | 
						||
      "/tmp/ipykernel_42764/3092893564.py:9: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
 | 
						||
      "/tmp/ipykernel_42764/3092893564.py:10: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 | 
						||
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  suppliers['supplier_name'] = suppliers['supplier_name'].fillna('')\n",
 | 
						||
      "/tmp/ipykernel_42764/3092893564.py:14: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_ticket_information = preprocessing_tickets_area(tickets = df1_tickets, purchases = df1_purchases, suppliers = df1_suppliers, type_ofs = df1_type_ofs)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 10,
 | 
						||
   "id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>ticket_id</th>\n",
 | 
						||
       "      <th>product_id</th>\n",
 | 
						||
       "      <th>is_from_subscription</th>\n",
 | 
						||
       "      <th>supplier_name</th>\n",
 | 
						||
       "      <th>type_of_ticket_name</th>\n",
 | 
						||
       "      <th>children</th>\n",
 | 
						||
       "      <th>purchase_date</th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>13070859</td>\n",
 | 
						||
       "      <td>225251</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>Atelier</td>\n",
 | 
						||
       "      <td>pricing_formula</td>\n",
 | 
						||
       "      <td>2018-12-28 14:47:50+00:00</td>\n",
 | 
						||
       "      <td>48187</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>13070860</td>\n",
 | 
						||
       "      <td>224914</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>Atelier</td>\n",
 | 
						||
       "      <td>pricing_formula</td>\n",
 | 
						||
       "      <td>2018-12-28 14:47:50+00:00</td>\n",
 | 
						||
       "      <td>48187</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>13070861</td>\n",
 | 
						||
       "      <td>224914</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>Atelier</td>\n",
 | 
						||
       "      <td>pricing_formula</td>\n",
 | 
						||
       "      <td>2018-12-28 14:47:50+00:00</td>\n",
 | 
						||
       "      <td>48187</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>13070862</td>\n",
 | 
						||
       "      <td>224914</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>Atelier</td>\n",
 | 
						||
       "      <td>pricing_formula</td>\n",
 | 
						||
       "      <td>2018-12-28 14:47:50+00:00</td>\n",
 | 
						||
       "      <td>48187</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>13070863</td>\n",
 | 
						||
       "      <td>224914</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>Atelier</td>\n",
 | 
						||
       "      <td>pricing_formula</td>\n",
 | 
						||
       "      <td>2018-12-28 14:47:50+00:00</td>\n",
 | 
						||
       "      <td>48187</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   ticket_id  product_id  is_from_subscription   supplier_name  \\\n",
 | 
						||
       "0   13070859      225251                 False  vente en ligne   \n",
 | 
						||
       "1   13070860      224914                 False  vente en ligne   \n",
 | 
						||
       "2   13070861      224914                 False  vente en ligne   \n",
 | 
						||
       "3   13070862      224914                 False  vente en ligne   \n",
 | 
						||
       "4   13070863      224914                 False  vente en ligne   \n",
 | 
						||
       "\n",
 | 
						||
       "  type_of_ticket_name         children             purchase_date  customer_id  \n",
 | 
						||
       "0             Atelier  pricing_formula 2018-12-28 14:47:50+00:00        48187  \n",
 | 
						||
       "1             Atelier  pricing_formula 2018-12-28 14:47:50+00:00        48187  \n",
 | 
						||
       "2             Atelier  pricing_formula 2018-12-28 14:47:50+00:00        48187  \n",
 | 
						||
       "3             Atelier  pricing_formula 2018-12-28 14:47:50+00:00        48187  \n",
 | 
						||
       "4             Atelier  pricing_formula 2018-12-28 14:47:50+00:00        48187  "
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 10,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_ticket_information.head()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "096e47f4-1d65-4575-989d-83227eedad2b",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Target area"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 11,
 | 
						||
   "id": "baed146a-9d3a-4397-a812-3d50c9a2f038",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def preprocessing_target_area(targets = None, target_types = None, customer_target_mappings = None):\n",
 | 
						||
    "    # Target.csv cleaning\n",
 | 
						||
    "    targets = targets[[\"id\", \"target_type_id\", \"name\"]]\n",
 | 
						||
    "    targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n",
 | 
						||
    "    \n",
 | 
						||
    "    # target_type cleaning\n",
 | 
						||
    "    target_types = target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n",
 | 
						||
    "    \n",
 | 
						||
    "    #customer_target_mappings cleaning\n",
 | 
						||
    "    customer_target_mappings = customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Merge target et target_type\n",
 | 
						||
    "    targets_full = pd.merge(targets, target_types, left_on='target_type_id', right_on='target_type_id', how='inner')\n",
 | 
						||
    "    targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Merge\n",
 | 
						||
    "    targets_full = pd.merge(customer_target_mappings, targets_full, left_on='target_id', right_on='target_id', how='inner')\n",
 | 
						||
    "    targets_full.drop(['target_id'], axis = 1, inplace=True)\n",
 | 
						||
    "\n",
 | 
						||
    "    return targets_full"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 12,
 | 
						||
   "id": "5fbfd88b-b94c-489c-9201-670e96e453e7",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "/tmp/ipykernel_42764/3848597476.py:4: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "cdbb48b4-5e16-4ef4-8791-ed213d68d52f",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Campaings area"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 13,
 | 
						||
   "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def preprocessing_campaigns_area(campaign_stats = None, campaigns = None):\n",
 | 
						||
    "    # campaign_stats cleaning \n",
 | 
						||
    "    campaign_stats = campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
 | 
						||
    "    cleaning_date(campaign_stats, 'opened_at')\n",
 | 
						||
    "    cleaning_date(campaign_stats, 'sent_at')\n",
 | 
						||
    "    cleaning_date(campaign_stats, 'delivered_at')\n",
 | 
						||
    "    \n",
 | 
						||
    "    # campaigns cleaning\n",
 | 
						||
    "    campaigns = campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n",
 | 
						||
    "    cleaning_date(campaigns, 'campaign_sent_at')\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Merge \n",
 | 
						||
    "    campaigns_full = pd.merge(campaign_stats, campaigns, on = \"campaign_id\", how = \"left\")\n",
 | 
						||
    "    campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)\n",
 | 
						||
    "\n",
 | 
						||
    "    return campaigns_full"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 14,
 | 
						||
   "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 | 
						||
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
 | 
						||
      "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 | 
						||
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
 | 
						||
      "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 | 
						||
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_campaigns_information = preprocessing_campaigns_area(campaign_stats = df1_campaign_stats, campaigns = df1_campaigns)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 15,
 | 
						||
   "id": "c24457e7-3cad-451a-a65b-7373b656bd6e",
 | 
						||
   "metadata": {
 | 
						||
    "scrolled": true
 | 
						||
   },
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>id</th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "      <th>opened_at</th>\n",
 | 
						||
       "      <th>sent_at</th>\n",
 | 
						||
       "      <th>delivered_at</th>\n",
 | 
						||
       "      <th>campaign_name</th>\n",
 | 
						||
       "      <th>campaign_service_id</th>\n",
 | 
						||
       "      <th>campaign_sent_at</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>19793</td>\n",
 | 
						||
       "      <td>112597</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>2021-03-28 16:01:09+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:24:18+00:00</td>\n",
 | 
						||
       "      <td>Le Mucem chez vous, gardons le lien #22</td>\n",
 | 
						||
       "      <td>404</td>\n",
 | 
						||
       "      <td>2021-03-27 23:00:00+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>14211</td>\n",
 | 
						||
       "      <td>113666</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>2021-03-28 16:01:09+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:21:02+00:00</td>\n",
 | 
						||
       "      <td>Le Mucem chez vous, gardons le lien #22</td>\n",
 | 
						||
       "      <td>404</td>\n",
 | 
						||
       "      <td>2021-03-27 23:00:00+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>13150</td>\n",
 | 
						||
       "      <td>280561</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>2021-03-28 16:00:59+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:08:45+00:00</td>\n",
 | 
						||
       "      <td>Le Mucem chez vous, gardons le lien #22</td>\n",
 | 
						||
       "      <td>404</td>\n",
 | 
						||
       "      <td>2021-03-27 23:00:00+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>7073</td>\n",
 | 
						||
       "      <td>101007</td>\n",
 | 
						||
       "      <td>2021-03-28 18:11:06+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:00:59+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:09:47+00:00</td>\n",
 | 
						||
       "      <td>Le Mucem chez vous, gardons le lien #22</td>\n",
 | 
						||
       "      <td>404</td>\n",
 | 
						||
       "      <td>2021-03-27 23:00:00+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>5175</td>\n",
 | 
						||
       "      <td>103972</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>2021-03-28 16:01:06+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:05:03+00:00</td>\n",
 | 
						||
       "      <td>Le Mucem chez vous, gardons le lien #22</td>\n",
 | 
						||
       "      <td>404</td>\n",
 | 
						||
       "      <td>2021-03-27 23:00:00+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "      id  customer_id                 opened_at                   sent_at  \\\n",
 | 
						||
       "0  19793       112597                       NaT 2021-03-28 16:01:09+00:00   \n",
 | 
						||
       "1  14211       113666                       NaT 2021-03-28 16:01:09+00:00   \n",
 | 
						||
       "2  13150       280561                       NaT 2021-03-28 16:00:59+00:00   \n",
 | 
						||
       "3   7073       101007 2021-03-28 18:11:06+00:00 2021-03-28 16:00:59+00:00   \n",
 | 
						||
       "4   5175       103972                       NaT 2021-03-28 16:01:06+00:00   \n",
 | 
						||
       "\n",
 | 
						||
       "               delivered_at                            campaign_name  \\\n",
 | 
						||
       "0 2021-03-28 16:24:18+00:00  Le Mucem chez vous, gardons le lien #22   \n",
 | 
						||
       "1 2021-03-28 16:21:02+00:00  Le Mucem chez vous, gardons le lien #22   \n",
 | 
						||
       "2 2021-03-28 16:08:45+00:00  Le Mucem chez vous, gardons le lien #22   \n",
 | 
						||
       "3 2021-03-28 16:09:47+00:00  Le Mucem chez vous, gardons le lien #22   \n",
 | 
						||
       "4 2021-03-28 16:05:03+00:00  Le Mucem chez vous, gardons le lien #22   \n",
 | 
						||
       "\n",
 | 
						||
       "   campaign_service_id          campaign_sent_at  \n",
 | 
						||
       "0                  404 2021-03-27 23:00:00+00:00  \n",
 | 
						||
       "1                  404 2021-03-27 23:00:00+00:00  \n",
 | 
						||
       "2                  404 2021-03-27 23:00:00+00:00  \n",
 | 
						||
       "3                  404 2021-03-27 23:00:00+00:00  \n",
 | 
						||
       "4                  404 2021-03-27 23:00:00+00:00  "
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 15,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_campaigns_information.head()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "56520a97-ede8-4920-a211-3b5b136af33d",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Product area"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "9782e9d3-ba20-46bf-8562-bd0969972ddc",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "Some useful functions"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 16,
 | 
						||
   "id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "BUCKET = \"bdc2324-data\"\n",
 | 
						||
    "directory_path = '1'"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 17,
 | 
						||
   "id": "607eb4b4-eed9-4b50-b823-f75c116dd37c",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def display_databases(file_name):\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    This function returns the file from s3 storage\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
 | 
						||
    "    print(\"File path : \", file_path)\n",
 | 
						||
    "    with fs.open(file_path, mode=\"rb\") as file_in:\n",
 | 
						||
    "        df = pd.read_csv(file_in, sep=\",\")\n",
 | 
						||
    "        \n",
 | 
						||
    "    print(\"Shape : \", df.shape)\n",
 | 
						||
    "    return df\n",
 | 
						||
    "\n",
 | 
						||
    "\n",
 | 
						||
    "def remove_horodates(df):\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    this function remove horodate columns like created_at and updated_at\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    df = df.drop(columns = [\"created_at\", \"updated_at\"])\n",
 | 
						||
    "    return df\n",
 | 
						||
    "\n",
 | 
						||
    "\n",
 | 
						||
    "def order_columns_id(df):\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    this function puts all id columns at the beginning in order to read the dataset easier\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    substring = 'id'\n",
 | 
						||
    "    id_columns = [col for col in df.columns if substring in col]\n",
 | 
						||
    "    remaining_col = [col for col in df.columns if substring not in col]\n",
 | 
						||
    "    new_order = id_columns + remaining_col\n",
 | 
						||
    "    return df[new_order]\n",
 | 
						||
    "\n",
 | 
						||
    "\n",
 | 
						||
    "def process_df_2(df):\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    This function organizes dataframe\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    df = remove_horodates(df)\n",
 | 
						||
    "    print(\"Number of columns : \", len(df.columns))\n",
 | 
						||
    "    df = order_columns_id(df)\n",
 | 
						||
    "    print(\"Columns : \", df.columns)\n",
 | 
						||
    "    return df\n",
 | 
						||
    "\n",
 | 
						||
    "def load_dataset(name):\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    This function loads csv file\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    df = display_databases(name)\n",
 | 
						||
    "    df = process_df_2(df)\n",
 | 
						||
    "    # drop na :\n",
 | 
						||
    "    #df = df.dropna(axis=1, thresh=len(df))\n",
 | 
						||
    "    # if identifier in table : delete it\n",
 | 
						||
    "    if 'identifier' in df.columns:\n",
 | 
						||
    "        df = df.drop(columns = 'identifier')\n",
 | 
						||
    "    return df"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "d23f28c0-bc95-438b-8d14-5b7bb6e267bd",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "Create theme tables"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 18,
 | 
						||
   "id": "350b09b9-451f-4d47-81fe-f34b892db027",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def create_products_table():\n",
 | 
						||
    "    # first merge products and categories\n",
 | 
						||
    "    print(\"first merge products and categories\")\n",
 | 
						||
    "    products = load_dataset(\"1products.csv\")\n",
 | 
						||
    "    categories = load_dataset(\"1categories.csv\")\n",
 | 
						||
    "    # Drop useless columns\n",
 | 
						||
    "    products = products.drop(columns = ['apply_price', 'extra_field', 'amount_consumption'])\n",
 | 
						||
    "    categories = categories.drop(columns = ['extra_field', 'quota'])\n",
 | 
						||
    "\n",
 | 
						||
    "    #Merge\n",
 | 
						||
    "    products_theme = products.merge(categories, how = 'left', left_on = 'category_id',\n",
 | 
						||
    "                                    right_on = 'id', suffixes=('_products', '_categories'))\n",
 | 
						||
    "    products_theme = products_theme.rename(columns = {\"name\" : \"name_categories\"})\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Second merge products_theme and type of categories\n",
 | 
						||
    "    print(\"Second merge products_theme and type of categories\")\n",
 | 
						||
    "    type_of_categories = load_dataset(\"1type_of_categories.csv\")\n",
 | 
						||
    "    type_of_categories = type_of_categories.drop(columns = 'id')\n",
 | 
						||
    "    products_theme = products_theme.merge(type_of_categories, how = 'left', left_on = 'category_id',\n",
 | 
						||
    "                                          right_on = 'category_id' )\n",
 | 
						||
    "\n",
 | 
						||
    "    # Index cleaning\n",
 | 
						||
    "    products_theme = products_theme.drop(columns = ['id_categories'])\n",
 | 
						||
    "    products_theme  = order_columns_id(products_theme)\n",
 | 
						||
    "    return products_theme\n",
 | 
						||
    "\n",
 | 
						||
    "\n",
 | 
						||
    "def create_events_table():\n",
 | 
						||
    "    # first merge events and seasons : \n",
 | 
						||
    "    print(\"first merge events and seasons : \")\n",
 | 
						||
    "    events = load_dataset(\"1events.csv\")\n",
 | 
						||
    "    seasons = load_dataset(\"1seasons.csv\")\n",
 | 
						||
    "\n",
 | 
						||
    "    # Drop useless columns\n",
 | 
						||
    "    events = events.drop(columns = ['manual_added', 'is_display'])\n",
 | 
						||
    "    seasons = seasons.drop(columns = ['start_date_time'])\n",
 | 
						||
    "        \n",
 | 
						||
    "    events_theme = events.merge(seasons, how = 'left', left_on = 'season_id', right_on = 'id', suffixes=('_events', '_seasons'))\n",
 | 
						||
    "\n",
 | 
						||
    "    # Secondly merge events_theme and event_types\n",
 | 
						||
    "    print(\"Secondly merge events_theme and event_types : \")\n",
 | 
						||
    "    event_types = load_dataset(\"1event_types.csv\")\n",
 | 
						||
    "    event_types = event_types.drop(columns = ['fidelity_delay'])\n",
 | 
						||
    "    \n",
 | 
						||
    "    events_theme = events_theme.merge(event_types, how = 'left', left_on = 'event_type_id', right_on = 'id', suffixes=('_events', '_event_type'))\n",
 | 
						||
    "    events_theme = events_theme.rename(columns = {\"name\" : \"name_event_types\"})\n",
 | 
						||
    "    events_theme = events_theme.drop(columns = 'id')\n",
 | 
						||
    "\n",
 | 
						||
    "    # thirdly merge events_theme and facilities\n",
 | 
						||
    "    print(\"thirdly merge events_theme and facilities : \")\n",
 | 
						||
    "    facilities = load_dataset(\"1facilities.csv\")\n",
 | 
						||
    "    facilities = facilities.drop(columns = ['fixed_capacity'])\n",
 | 
						||
    "    \n",
 | 
						||
    "    events_theme = events_theme.merge(facilities, how = 'left', left_on = 'facility_id', right_on = 'id', suffixes=('_events', '_facility'))\n",
 | 
						||
    "    events_theme = events_theme.rename(columns = {\"name\" : \"name_facilities\", \"id_events\" : \"event_id\"})\n",
 | 
						||
    "    events_theme = events_theme.drop(columns = 'id')\n",
 | 
						||
    "\n",
 | 
						||
    "    # Index cleaning\n",
 | 
						||
    "    events_theme = events_theme.drop(columns = ['id_seasons'])\n",
 | 
						||
    "    events_theme  = order_columns_id(events_theme)\n",
 | 
						||
    "    return events_theme\n",
 | 
						||
    "\n",
 | 
						||
    "\n",
 | 
						||
    "def create_representations_table():\n",
 | 
						||
    "    representations = load_dataset(\"1representations.csv\")\n",
 | 
						||
    "    representations = representations.drop(columns = ['serial', 'open', 'satisfaction', 'is_display', 'expected_filling',\n",
 | 
						||
    "                                                     'max_filling', 'extra_field', 'start_date_time', 'end_date_time', 'name',\n",
 | 
						||
    "                                                     'representation_type_id'])\n",
 | 
						||
    "    \n",
 | 
						||
    "    representations_capacity = load_dataset(\"1representation_category_capacities.csv\")\n",
 | 
						||
    "    representations_capacity = representations_capacity.drop(columns = ['expected_filling', 'max_filling'])\n",
 | 
						||
    "\n",
 | 
						||
    "    representations_theme = representations.merge(representations_capacity, how='left',\n",
 | 
						||
    "                                                  left_on='id', right_on='representation_id',\n",
 | 
						||
    "                                                  suffixes=('_representation', '_representation_cap'))\n",
 | 
						||
    "    # index cleaning\n",
 | 
						||
    "    representations_theme = representations_theme.drop(columns = [\"id_representation\"])\n",
 | 
						||
    "    representations_theme = order_columns_id(representations_theme)\n",
 | 
						||
    "    return representations_theme"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 19,
 | 
						||
   "id": "0fccc8ef-e575-4857-a401-94a7274394df",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stdout",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "first merge products and categories\n",
 | 
						||
      "File path :  bdc2324-data/1/1products.csv\n",
 | 
						||
      "Shape :  (94803, 14)\n",
 | 
						||
      "Number of columns :  12\n",
 | 
						||
      "Columns :  Index(['id', 'representation_id', 'pricing_formula_id', 'category_id',\n",
 | 
						||
      "       'products_group_id', 'product_pack_id', 'identifier', 'amount',\n",
 | 
						||
      "       'is_full_price', 'apply_price', 'extra_field', 'amount_consumption'],\n",
 | 
						||
      "      dtype='object')\n",
 | 
						||
      "File path :  bdc2324-data/1/1categories.csv\n",
 | 
						||
      "Shape :  (27, 7)\n",
 | 
						||
      "Number of columns :  5\n",
 | 
						||
      "Columns :  Index(['id', 'identifier', 'name', 'extra_field', 'quota'], dtype='object')\n",
 | 
						||
      "Second merge products_theme and type of categories\n",
 | 
						||
      "File path :  bdc2324-data/1/1type_of_categories.csv\n",
 | 
						||
      "Shape :  (5, 6)\n",
 | 
						||
      "Number of columns :  4\n",
 | 
						||
      "Columns :  Index(['id', 'type_of_id', 'category_id', 'identifier'], dtype='object')\n"
 | 
						||
     ]
 | 
						||
    },
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>id_products</th>\n",
 | 
						||
       "      <th>representation_id</th>\n",
 | 
						||
       "      <th>pricing_formula_id</th>\n",
 | 
						||
       "      <th>category_id</th>\n",
 | 
						||
       "      <th>products_group_id</th>\n",
 | 
						||
       "      <th>product_pack_id</th>\n",
 | 
						||
       "      <th>type_of_id</th>\n",
 | 
						||
       "      <th>amount</th>\n",
 | 
						||
       "      <th>is_full_price</th>\n",
 | 
						||
       "      <th>name_categories</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>10682</td>\n",
 | 
						||
       "      <td>914</td>\n",
 | 
						||
       "      <td>114</td>\n",
 | 
						||
       "      <td>41</td>\n",
 | 
						||
       "      <td>10655</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>9.0</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>indiv activité tr</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>478</td>\n",
 | 
						||
       "      <td>273</td>\n",
 | 
						||
       "      <td>131</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>471</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>12.0</td>\n",
 | 
						||
       "      <td>9.5</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>indiv entrées tp</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>20873</td>\n",
 | 
						||
       "      <td>275</td>\n",
 | 
						||
       "      <td>137</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>20825</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>12.0</td>\n",
 | 
						||
       "      <td>11.5</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>indiv entrées tp</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>157142</td>\n",
 | 
						||
       "      <td>82519</td>\n",
 | 
						||
       "      <td>9</td>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>156773</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>8.0</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>indiv entrées tr</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>1341</td>\n",
 | 
						||
       "      <td>9</td>\n",
 | 
						||
       "      <td>93</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1175</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>12.0</td>\n",
 | 
						||
       "      <td>8.5</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>indiv entrées tp</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   id_products  representation_id  pricing_formula_id  category_id  \\\n",
 | 
						||
       "0        10682                914                 114           41   \n",
 | 
						||
       "1          478                273                 131            1   \n",
 | 
						||
       "2        20873                275                 137            1   \n",
 | 
						||
       "3       157142              82519                   9            5   \n",
 | 
						||
       "4         1341                  9                  93            1   \n",
 | 
						||
       "\n",
 | 
						||
       "   products_group_id  product_pack_id  type_of_id  amount  is_full_price  \\\n",
 | 
						||
       "0              10655                1         NaN     9.0          False   \n",
 | 
						||
       "1                471                1        12.0     9.5          False   \n",
 | 
						||
       "2              20825                1        12.0    11.5          False   \n",
 | 
						||
       "3             156773                1         NaN     8.0          False   \n",
 | 
						||
       "4               1175                1        12.0     8.5          False   \n",
 | 
						||
       "\n",
 | 
						||
       "     name_categories  \n",
 | 
						||
       "0  indiv activité tr  \n",
 | 
						||
       "1   indiv entrées tp  \n",
 | 
						||
       "2   indiv entrées tp  \n",
 | 
						||
       "3   indiv entrées tr  \n",
 | 
						||
       "4   indiv entrées tp  "
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 19,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "products_theme = create_products_table()\n",
 | 
						||
    "products_theme.head()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 20,
 | 
						||
   "id": "779d8aaf-6668-4f66-8852-847304407ea3",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stdout",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "first merge events and seasons : \n",
 | 
						||
      "File path :  bdc2324-data/1/1events.csv\n",
 | 
						||
      "Shape :  (1232, 12)\n",
 | 
						||
      "Number of columns :  10\n",
 | 
						||
      "Columns :  Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n",
 | 
						||
      "       'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n",
 | 
						||
      "      dtype='object')\n",
 | 
						||
      "File path :  bdc2324-data/1/1seasons.csv\n",
 | 
						||
      "Shape :  (13, 6)\n",
 | 
						||
      "Number of columns :  4\n",
 | 
						||
      "Columns :  Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n",
 | 
						||
      "Secondly merge events_theme and event_types : \n",
 | 
						||
      "File path :  bdc2324-data/1/1event_types.csv\n",
 | 
						||
      "Shape :  (9, 6)\n",
 | 
						||
      "Number of columns :  4\n",
 | 
						||
      "Columns :  Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n",
 | 
						||
      "thirdly merge events_theme and facilities : \n",
 | 
						||
      "File path :  bdc2324-data/1/1facilities.csv\n",
 | 
						||
      "Shape :  (2, 7)\n",
 | 
						||
      "Number of columns :  5\n",
 | 
						||
      "Columns :  Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n"
 | 
						||
     ]
 | 
						||
    },
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>event_id</th>\n",
 | 
						||
       "      <th>season_id</th>\n",
 | 
						||
       "      <th>facility_id</th>\n",
 | 
						||
       "      <th>event_type_id</th>\n",
 | 
						||
       "      <th>event_type_key_id</th>\n",
 | 
						||
       "      <th>facility_key_id</th>\n",
 | 
						||
       "      <th>street_id</th>\n",
 | 
						||
       "      <th>name_events</th>\n",
 | 
						||
       "      <th>name_seasons</th>\n",
 | 
						||
       "      <th>name_event_types</th>\n",
 | 
						||
       "      <th>name_facilities</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>192</td>\n",
 | 
						||
       "      <td>16</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>frontières</td>\n",
 | 
						||
       "      <td>2018</td>\n",
 | 
						||
       "      <td>spectacle vivant</td>\n",
 | 
						||
       "      <td>mucem</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>30329</td>\n",
 | 
						||
       "      <td>2767</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>visite guidée une autre histoire du monde (1h00)</td>\n",
 | 
						||
       "      <td>2023</td>\n",
 | 
						||
       "      <td>offre muséale groupe</td>\n",
 | 
						||
       "      <td>mucem</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>161</td>\n",
 | 
						||
       "      <td>16</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>visite contée les chercheurs d'or indiv</td>\n",
 | 
						||
       "      <td>2018</td>\n",
 | 
						||
       "      <td>offre muséale individuel</td>\n",
 | 
						||
       "      <td>mucem</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>5957</td>\n",
 | 
						||
       "      <td>582</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>we dreamt of utopia and we woke up screaming.</td>\n",
 | 
						||
       "      <td>2021</td>\n",
 | 
						||
       "      <td>spectacle vivant</td>\n",
 | 
						||
       "      <td>mucem</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>8337</td>\n",
 | 
						||
       "      <td>582</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>jeff koons épisodes 4</td>\n",
 | 
						||
       "      <td>2021</td>\n",
 | 
						||
       "      <td>spectacle vivant</td>\n",
 | 
						||
       "      <td>mucem</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   event_id  season_id  facility_id  event_type_id  event_type_key_id  \\\n",
 | 
						||
       "0       192         16            1              4                  4   \n",
 | 
						||
       "1     30329       2767            1              5                  5   \n",
 | 
						||
       "2       161         16            1              2                  2   \n",
 | 
						||
       "3      5957        582            1              4                  4   \n",
 | 
						||
       "4      8337        582            1              4                  4   \n",
 | 
						||
       "\n",
 | 
						||
       "   facility_key_id  street_id  \\\n",
 | 
						||
       "0                1          1   \n",
 | 
						||
       "1                1          1   \n",
 | 
						||
       "2                1          1   \n",
 | 
						||
       "3                1          1   \n",
 | 
						||
       "4                1          1   \n",
 | 
						||
       "\n",
 | 
						||
       "                                        name_events name_seasons  \\\n",
 | 
						||
       "0                                        frontières         2018   \n",
 | 
						||
       "1  visite guidée une autre histoire du monde (1h00)         2023   \n",
 | 
						||
       "2           visite contée les chercheurs d'or indiv         2018   \n",
 | 
						||
       "3     we dreamt of utopia and we woke up screaming.         2021   \n",
 | 
						||
       "4                             jeff koons épisodes 4         2021   \n",
 | 
						||
       "\n",
 | 
						||
       "           name_event_types name_facilities  \n",
 | 
						||
       "0          spectacle vivant           mucem  \n",
 | 
						||
       "1      offre muséale groupe           mucem  \n",
 | 
						||
       "2  offre muséale individuel           mucem  \n",
 | 
						||
       "3          spectacle vivant           mucem  \n",
 | 
						||
       "4          spectacle vivant           mucem  "
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 20,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "events_theme= create_events_table()\n",
 | 
						||
    "events_theme.head()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 21,
 | 
						||
   "id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stdout",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "File path :  bdc2324-data/1/1representations.csv\n",
 | 
						||
      "Shape :  (36095, 16)\n",
 | 
						||
      "Number of columns :  14\n",
 | 
						||
      "Columns :  Index(['id', 'event_id', 'representation_type_id', 'identifier', 'serial',\n",
 | 
						||
      "       'start_date_time', 'open', 'satisfaction', 'end_date_time', 'name',\n",
 | 
						||
      "       'is_display', 'expected_filling', 'max_filling', 'extra_field'],\n",
 | 
						||
      "      dtype='object')\n",
 | 
						||
      "File path :  bdc2324-data/1/1representation_category_capacities.csv\n",
 | 
						||
      "Shape :  (65241, 7)\n",
 | 
						||
      "Number of columns :  5\n",
 | 
						||
      "Columns :  Index(['id', 'representation_id', 'category_id', 'expected_filling',\n",
 | 
						||
      "       'max_filling'],\n",
 | 
						||
      "      dtype='object')\n"
 | 
						||
     ]
 | 
						||
    },
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>event_id</th>\n",
 | 
						||
       "      <th>id_representation_cap</th>\n",
 | 
						||
       "      <th>representation_id</th>\n",
 | 
						||
       "      <th>category_id</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>12384</td>\n",
 | 
						||
       "      <td>123058</td>\n",
 | 
						||
       "      <td>84820</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>37</td>\n",
 | 
						||
       "      <td>2514</td>\n",
 | 
						||
       "      <td>269</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>37</td>\n",
 | 
						||
       "      <td>384</td>\n",
 | 
						||
       "      <td>269</td>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>37</td>\n",
 | 
						||
       "      <td>2515</td>\n",
 | 
						||
       "      <td>269</td>\n",
 | 
						||
       "      <td>10</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>37</td>\n",
 | 
						||
       "      <td>383</td>\n",
 | 
						||
       "      <td>269</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   event_id  id_representation_cap  representation_id  category_id\n",
 | 
						||
       "0     12384                 123058              84820            2\n",
 | 
						||
       "1        37                   2514                269            2\n",
 | 
						||
       "2        37                    384                269            5\n",
 | 
						||
       "3        37                   2515                269           10\n",
 | 
						||
       "4        37                    383                269            1"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 21,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "representation_theme = create_representations_table()\n",
 | 
						||
    "representation_theme.head()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "8fa191d5-c867-4d4d-bbab-f29d7d91ce6a",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "Create uniform product database "
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 22,
 | 
						||
   "id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def uniform_product_df():\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    This function returns the uniform product dataset\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    print(\"Products theme columns : \", products_theme.columns)\n",
 | 
						||
    "    print(\"\\n Representation theme columns : \", representation_theme.columns)\n",
 | 
						||
    "    print(\"\\n Events theme columns : \", events_theme.columns)\n",
 | 
						||
    "\n",
 | 
						||
    "    products_global = products_theme.merge(representation_theme, how='left',\n",
 | 
						||
    "                                           on= [\"representation_id\", \"category_id\"])\n",
 | 
						||
    "    \n",
 | 
						||
    "    products_global = products_global.merge(events_theme, how='left', on='event_id',\n",
 | 
						||
    "                                            suffixes = (\"_representation\", \"_event\"))\n",
 | 
						||
    "    \n",
 | 
						||
    "    products_global = order_columns_id(products_global)\n",
 | 
						||
    "\n",
 | 
						||
    "    # remove useless columns \n",
 | 
						||
    "    products_global = products_global.drop(columns = ['type_of_id']) # 'name_events', 'name_seasons', 'name_categories'\n",
 | 
						||
    "    return products_global"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 23,
 | 
						||
   "id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stdout",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "Products theme columns :  Index(['id_products', 'representation_id', 'pricing_formula_id', 'category_id',\n",
 | 
						||
      "       'products_group_id', 'product_pack_id', 'type_of_id', 'amount',\n",
 | 
						||
      "       'is_full_price', 'name_categories'],\n",
 | 
						||
      "      dtype='object')\n",
 | 
						||
      "\n",
 | 
						||
      " Representation theme columns :  Index(['event_id', 'id_representation_cap', 'representation_id',\n",
 | 
						||
      "       'category_id'],\n",
 | 
						||
      "      dtype='object')\n",
 | 
						||
      "\n",
 | 
						||
      " Events theme columns :  Index(['event_id', 'season_id', 'facility_id', 'event_type_id',\n",
 | 
						||
      "       'event_type_key_id', 'facility_key_id', 'street_id', 'name_events',\n",
 | 
						||
      "       'name_seasons', 'name_event_types', 'name_facilities'],\n",
 | 
						||
      "      dtype='object')\n"
 | 
						||
     ]
 | 
						||
    },
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>id_products</th>\n",
 | 
						||
       "      <th>representation_id</th>\n",
 | 
						||
       "      <th>pricing_formula_id</th>\n",
 | 
						||
       "      <th>category_id</th>\n",
 | 
						||
       "      <th>products_group_id</th>\n",
 | 
						||
       "      <th>product_pack_id</th>\n",
 | 
						||
       "      <th>event_id</th>\n",
 | 
						||
       "      <th>id_representation_cap</th>\n",
 | 
						||
       "      <th>season_id</th>\n",
 | 
						||
       "      <th>facility_id</th>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <th>event_type_key_id</th>\n",
 | 
						||
       "      <th>facility_key_id</th>\n",
 | 
						||
       "      <th>street_id</th>\n",
 | 
						||
       "      <th>amount</th>\n",
 | 
						||
       "      <th>is_full_price</th>\n",
 | 
						||
       "      <th>name_categories</th>\n",
 | 
						||
       "      <th>name_events</th>\n",
 | 
						||
       "      <th>name_seasons</th>\n",
 | 
						||
       "      <th>name_event_types</th>\n",
 | 
						||
       "      <th>name_facilities</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>10682</td>\n",
 | 
						||
       "      <td>914</td>\n",
 | 
						||
       "      <td>114</td>\n",
 | 
						||
       "      <td>41</td>\n",
 | 
						||
       "      <td>10655</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>132</td>\n",
 | 
						||
       "      <td>8789</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>9.0</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>indiv activité tr</td>\n",
 | 
						||
       "      <td>visite-jeu \"le classico des minots\" (1h30)</td>\n",
 | 
						||
       "      <td>2017</td>\n",
 | 
						||
       "      <td>offre muséale individuel</td>\n",
 | 
						||
       "      <td>mucem</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>478</td>\n",
 | 
						||
       "      <td>273</td>\n",
 | 
						||
       "      <td>131</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>471</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>37</td>\n",
 | 
						||
       "      <td>390</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>9.5</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>indiv entrées tp</td>\n",
 | 
						||
       "      <td>billet mucem picasso</td>\n",
 | 
						||
       "      <td>2016</td>\n",
 | 
						||
       "      <td>offre muséale individuel</td>\n",
 | 
						||
       "      <td>mucem</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>20873</td>\n",
 | 
						||
       "      <td>275</td>\n",
 | 
						||
       "      <td>137</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>20825</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>37</td>\n",
 | 
						||
       "      <td>395</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>11.5</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>indiv entrées tp</td>\n",
 | 
						||
       "      <td>billet mucem picasso</td>\n",
 | 
						||
       "      <td>2016</td>\n",
 | 
						||
       "      <td>offre muséale individuel</td>\n",
 | 
						||
       "      <td>mucem</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>157142</td>\n",
 | 
						||
       "      <td>82519</td>\n",
 | 
						||
       "      <td>9</td>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>156773</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>12365</td>\n",
 | 
						||
       "      <td>120199</td>\n",
 | 
						||
       "      <td>1754</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>8.0</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>indiv entrées tr</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>offre muséale individuel</td>\n",
 | 
						||
       "      <td>mucem</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>1341</td>\n",
 | 
						||
       "      <td>9</td>\n",
 | 
						||
       "      <td>93</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1175</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>8</td>\n",
 | 
						||
       "      <td>21</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>6</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>8.5</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>indiv entrées tp</td>\n",
 | 
						||
       "      <td>non défini</td>\n",
 | 
						||
       "      <td>2017</td>\n",
 | 
						||
       "      <td>non défini</td>\n",
 | 
						||
       "      <td>mucem</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>5 rows × 21 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   id_products  representation_id  pricing_formula_id  category_id  \\\n",
 | 
						||
       "0        10682                914                 114           41   \n",
 | 
						||
       "1          478                273                 131            1   \n",
 | 
						||
       "2        20873                275                 137            1   \n",
 | 
						||
       "3       157142              82519                   9            5   \n",
 | 
						||
       "4         1341                  9                  93            1   \n",
 | 
						||
       "\n",
 | 
						||
       "   products_group_id  product_pack_id  event_id  id_representation_cap  \\\n",
 | 
						||
       "0              10655                1       132                   8789   \n",
 | 
						||
       "1                471                1        37                    390   \n",
 | 
						||
       "2              20825                1        37                    395   \n",
 | 
						||
       "3             156773                1     12365                 120199   \n",
 | 
						||
       "4               1175                1         8                     21   \n",
 | 
						||
       "\n",
 | 
						||
       "   season_id  facility_id  ...  event_type_key_id  facility_key_id  street_id  \\\n",
 | 
						||
       "0          4            1  ...                  5                1          1   \n",
 | 
						||
       "1          2            1  ...                  2                1          1   \n",
 | 
						||
       "2          2            1  ...                  2                1          1   \n",
 | 
						||
       "3       1754            1  ...                  4                1          1   \n",
 | 
						||
       "4          4            1  ...                  6                1          1   \n",
 | 
						||
       "\n",
 | 
						||
       "   amount  is_full_price    name_categories  \\\n",
 | 
						||
       "0     9.0          False  indiv activité tr   \n",
 | 
						||
       "1     9.5          False   indiv entrées tp   \n",
 | 
						||
       "2    11.5          False   indiv entrées tp   \n",
 | 
						||
       "3     8.0          False   indiv entrées tr   \n",
 | 
						||
       "4     8.5          False   indiv entrées tp   \n",
 | 
						||
       "\n",
 | 
						||
       "                                  name_events name_seasons  \\\n",
 | 
						||
       "0  visite-jeu \"le classico des minots\" (1h30)         2017   \n",
 | 
						||
       "1                        billet mucem picasso         2016   \n",
 | 
						||
       "2                        billet mucem picasso         2016   \n",
 | 
						||
       "3                                         NaN          NaN   \n",
 | 
						||
       "4                                  non défini         2017   \n",
 | 
						||
       "\n",
 | 
						||
       "           name_event_types name_facilities  \n",
 | 
						||
       "0  offre muséale individuel           mucem  \n",
 | 
						||
       "1  offre muséale individuel           mucem  \n",
 | 
						||
       "2  offre muséale individuel           mucem  \n",
 | 
						||
       "3  offre muséale individuel           mucem  \n",
 | 
						||
       "4                non défini           mucem  \n",
 | 
						||
       "\n",
 | 
						||
       "[5 rows x 21 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 23,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "products_global = uniform_product_df()\n",
 | 
						||
    "products_global.head()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 24,
 | 
						||
   "id": "98f78cd5-b694-4cc6-b033-20170aa13e8d",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# Fusion liée au product\n",
 | 
						||
    "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n",
 | 
						||
    "\n",
 | 
						||
    "# Selection des variables d'intérêts\n",
 | 
						||
    "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id',  'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "d7c3668a-c016-4bd0-837e-04af328ff14f",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "# Construction des variables explicatives"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "314f1b7f-ae48-4c6f-8469-9ce879043243",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## KPI campaigns"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 28,
 | 
						||
   "id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def campaigns_kpi_function(campaigns_information = None):\n",
 | 
						||
    "    # Nombre de campagnes de mails\n",
 | 
						||
    "    nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
 | 
						||
    "    nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n",
 | 
						||
    "    # Temps d'ouverture en min moyen    \n",
 | 
						||
    "    campaigns_information['time_to_open'] = campaigns_information['opened_at'] - campaigns_information['delivered_at']\n",
 | 
						||
    "    time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()\n",
 | 
						||
    "\n",
 | 
						||
    "    # Nombre de mail ouvert    \n",
 | 
						||
    "    opened_campaign = campaigns_information[['customer_id', 'campaign_name', 'opened_at']]\n",
 | 
						||
    "    opened_campaign.dropna(subset=['opened_at'], inplace=True)\n",
 | 
						||
    "    opened_campaign = opened_campaign[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
 | 
						||
    "    opened_campaign.rename(columns = {'campaign_name' : 'nb_campaigns_opened' }, inplace = True)\n",
 | 
						||
    "\n",
 | 
						||
    "    # Fusion des indicateurs\n",
 | 
						||
    "    campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')\n",
 | 
						||
    "    campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n",
 | 
						||
    "\n",
 | 
						||
    "    # Remplir les NaN : nb_campaigns_opened\n",
 | 
						||
    "    campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n",
 | 
						||
    "\n",
 | 
						||
    "    # Remplir les NaT : time_to_open (??)\n",
 | 
						||
    "\n",
 | 
						||
    "    return campaigns_reduced\n",
 | 
						||
    "    "
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 29,
 | 
						||
   "id": "24537647-bc29-4777-9848-ac4120a4aa60",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "/tmp/ipykernel_42764/3700263836.py:11: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  opened_campaign.dropna(subset=['opened_at'], inplace=True)\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_campaigns_kpi = campaigns_kpi_function(campaigns_information = df1_campaigns_information) "
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 30,
 | 
						||
   "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "      <th>nb_campaigns</th>\n",
 | 
						||
       "      <th>nb_campaigns_opened</th>\n",
 | 
						||
       "      <th>time_to_open</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>222</td>\n",
 | 
						||
       "      <td>124.0</td>\n",
 | 
						||
       "      <td>1 days 00:28:30.169354838</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>7</td>\n",
 | 
						||
       "      <td>7.0</td>\n",
 | 
						||
       "      <td>1 days 04:31:01.428571428</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>6</td>\n",
 | 
						||
       "      <td>20</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   customer_id  nb_campaigns  nb_campaigns_opened              time_to_open\n",
 | 
						||
       "0            2             4                  0.0                       NaT\n",
 | 
						||
       "1            3           222                124.0 1 days 00:28:30.169354838\n",
 | 
						||
       "2            4             7                  7.0 1 days 04:31:01.428571428\n",
 | 
						||
       "3            5             4                  0.0                       NaT\n",
 | 
						||
       "4            6            20                  0.0                       NaT"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 30,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_campaigns_kpi.head()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "d4dcfbe0-c6ce-497e-b75e-dc9e938801b2",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## KPI tickets"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 31,
 | 
						||
   "id": "b913a69e-3146-4919-b5f6-a6108532bffa",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/plain": [
 | 
						||
       "array(['spectacle vivant', 'offre muséale individuel', 'formule adhésion',\n",
 | 
						||
       "       'offre muséale groupe'], dtype=object)"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 31,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_products_purchased_reduced['name_event_types'].unique()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 32,
 | 
						||
   "id": "2bda0b97-b28b-4070-a57d-aeab0e2f7dfe",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# Nombre de client assistant à plus de 2 type d'événement\n",
 | 
						||
    "nb_event_types = df1_products_purchased_reduced[['customer_id', 'name_event_types']].groupby('customer_id').nunique()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 33,
 | 
						||
   "id": "043303fe-e90f-4689-a2a9-5d690555a045",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def tickets_kpi_function(tickets_information = None):\n",
 | 
						||
    "\n",
 | 
						||
    "    tickets_information_copy = tickets_information.copy()\n",
 | 
						||
    "\n",
 | 
						||
    "    # Dummy : Canal de vente en ligne\n",
 | 
						||
    "    liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] # vad = vente à distance\n",
 | 
						||
    "    tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n",
 | 
						||
    "\n",
 | 
						||
    "    # Proportion de vente en ligne\n",
 | 
						||
    "    prop_vente_internet = tickets_information_copy[tickets_information_copy['vente_internet'] == 1].groupby(['customer_id', 'event_type_id'])['ticket_id'].count().reset_index()\n",
 | 
						||
    "    prop_vente_internet.rename(columns = {'ticket_id' : 'nb_tickets_internet'}, inplace = True)\n",
 | 
						||
    "    \n",
 | 
						||
    "    tickets_kpi = (tickets_information_copy[['event_type_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'amount', 'vente_internet']]\n",
 | 
						||
    "                   .groupby(['customer_id', 'event_type_id']) \n",
 | 
						||
    "                   .agg({'ticket_id': 'count', \n",
 | 
						||
    "                         'amount' : 'sum',\n",
 | 
						||
    "                         'supplier_name': 'nunique',\n",
 | 
						||
    "                         'vente_internet' : 'max',\n",
 | 
						||
    "                         'purchase_date' : ['min', 'max']})\n",
 | 
						||
    "                   .reset_index()\n",
 | 
						||
    "                  )\n",
 | 
						||
    "    \n",
 | 
						||
    "    tickets_kpi.columns = tickets_kpi.columns.map('_'.join)\n",
 | 
						||
    "    \n",
 | 
						||
    "    tickets_kpi.rename(columns = {'ticket_id_count' : 'nb_tickets', \n",
 | 
						||
    "                                  'amount_sum' : 'total_amount',\n",
 | 
						||
    "                                  'supplier_name_nunique' : 'nb_suppliers', \n",
 | 
						||
    "                                  'customer_id_' : 'customer_id',\n",
 | 
						||
    "                                  'event_type_id_' : 'event_type_id'}, inplace = True)\n",
 | 
						||
    "    \n",
 | 
						||
    "    tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n",
 | 
						||
    "\n",
 | 
						||
    "    tickets_kpi = tickets_kpi.merge(prop_vente_internet, on = ['customer_id', 'event_type_id'], how = 'left')\n",
 | 
						||
    "    tickets_kpi['nb_tickets_internet'] = tickets_kpi['nb_tickets_internet'].fillna(0)\n",
 | 
						||
    "        \n",
 | 
						||
    "    return tickets_kpi\n",
 | 
						||
    "    "
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 34,
 | 
						||
   "id": "5882234a-1ed5-4269-87a6-0d75613476e3",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_products_purchased_reduced)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "597b241e-a83d-4b7c-8ad7-eec50295dff2",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "#### Exportation"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 35,
 | 
						||
   "id": "a4a2311d-8a72-4030-afd5-218004d5d2a5",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# Exportation vers 'projet-bdc2324-team1'\n",
 | 
						||
    "BUCKET_OUT = \"projet-bdc2324-team1\"\n",
 | 
						||
    "FILE_KEY_OUT_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n",
 | 
						||
    "FILE_PATH_OUT_S3 = BUCKET_OUT + \"/\" + FILE_KEY_OUT_S3\n",
 | 
						||
    "\n",
 | 
						||
    "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
 | 
						||
    "    df1_tickets_kpi.to_csv(file_out, index = False)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 36,
 | 
						||
   "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "      <th>event_type_id</th>\n",
 | 
						||
       "      <th>nb_tickets</th>\n",
 | 
						||
       "      <th>total_amount</th>\n",
 | 
						||
       "      <th>nb_suppliers</th>\n",
 | 
						||
       "      <th>vente_internet_max</th>\n",
 | 
						||
       "      <th>purchase_date_min</th>\n",
 | 
						||
       "      <th>purchase_date_max</th>\n",
 | 
						||
       "      <th>time_between_purchase</th>\n",
 | 
						||
       "      <th>nb_tickets_internet</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>453242</td>\n",
 | 
						||
       "      <td>3248965.5</td>\n",
 | 
						||
       "      <td>6</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>2013-09-23 14:45:01+00:00</td>\n",
 | 
						||
       "      <td>2023-11-03 14:11:01+00:00</td>\n",
 | 
						||
       "      <td>3692 days 23:26:00</td>\n",
 | 
						||
       "      <td>2988.0</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>384226</td>\n",
 | 
						||
       "      <td>2686540.5</td>\n",
 | 
						||
       "      <td>7</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>2014-12-03 14:55:37+00:00</td>\n",
 | 
						||
       "      <td>2023-11-04 15:12:16+00:00</td>\n",
 | 
						||
       "      <td>3258 days 00:16:39</td>\n",
 | 
						||
       "      <td>51.0</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>6</td>\n",
 | 
						||
       "      <td>217356</td>\n",
 | 
						||
       "      <td>1435871.5</td>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>2017-01-01 02:20:08+00:00</td>\n",
 | 
						||
       "      <td>2019-12-31 02:20:06+00:00</td>\n",
 | 
						||
       "      <td>1093 days 23:59:58</td>\n",
 | 
						||
       "      <td>5.0</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>201750</td>\n",
 | 
						||
       "      <td>1459190.0</td>\n",
 | 
						||
       "      <td>6</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>2013-06-10 10:37:58+00:00</td>\n",
 | 
						||
       "      <td>2023-11-08 15:59:45+00:00</td>\n",
 | 
						||
       "      <td>3803 days 05:21:47</td>\n",
 | 
						||
       "      <td>9.0</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>5032</th>\n",
 | 
						||
       "      <td>6733</td>\n",
 | 
						||
       "      <td>6</td>\n",
 | 
						||
       "      <td>14208</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>2017-01-11 15:00:54+00:00</td>\n",
 | 
						||
       "      <td>2019-11-27 09:47:06+00:00</td>\n",
 | 
						||
       "      <td>1049 days 18:46:12</td>\n",
 | 
						||
       "      <td>13497.0</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "      customer_id  event_type_id  nb_tickets  total_amount  nb_suppliers  \\\n",
 | 
						||
       "1               1              4      453242     3248965.5             6   \n",
 | 
						||
       "0               1              2      384226     2686540.5             7   \n",
 | 
						||
       "3               1              6      217356     1435871.5             5   \n",
 | 
						||
       "2               1              5      201750     1459190.0             6   \n",
 | 
						||
       "5032         6733              6       14208           0.0             3   \n",
 | 
						||
       "\n",
 | 
						||
       "      vente_internet_max         purchase_date_min         purchase_date_max  \\\n",
 | 
						||
       "1                      1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00   \n",
 | 
						||
       "0                      1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00   \n",
 | 
						||
       "3                      1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00   \n",
 | 
						||
       "2                      1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00   \n",
 | 
						||
       "5032                   1 2017-01-11 15:00:54+00:00 2019-11-27 09:47:06+00:00   \n",
 | 
						||
       "\n",
 | 
						||
       "     time_between_purchase  nb_tickets_internet  \n",
 | 
						||
       "1       3692 days 23:26:00               2988.0  \n",
 | 
						||
       "0       3258 days 00:16:39                 51.0  \n",
 | 
						||
       "3       1093 days 23:59:58                  5.0  \n",
 | 
						||
       "2       3803 days 05:21:47                  9.0  \n",
 | 
						||
       "5032    1049 days 18:46:12              13497.0  "
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 36,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_tickets_kpi.sort_values(by='nb_tickets', ascending=False).head(5)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "f1d7f7ba-361b-467d-b375-b09c149185f7",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Alexis' work"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 37,
 | 
						||
   "id": "4ab1c0d2-0097-4669-b984-b6822c976740",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>event_type_id</th>\n",
 | 
						||
       "      <th>avg_amount</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>6.150659</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>7.762474</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>4.452618</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>6</td>\n",
 | 
						||
       "      <td>6.439463</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   event_type_id  avg_amount\n",
 | 
						||
       "0              2    6.150659\n",
 | 
						||
       "1              4    7.762474\n",
 | 
						||
       "2              5    4.452618\n",
 | 
						||
       "3              6    6.439463"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 37,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "avg_amount =  (df1_products_purchased_reduced.groupby([\"event_type_id\"])\n",
 | 
						||
    "              .agg({\"amount\" : \"mean\"}).reset_index()\n",
 | 
						||
    "              .rename(columns = {'amount' : 'avg_amount'}))\n",
 | 
						||
    "\n",
 | 
						||
    "avg_amount"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 38,
 | 
						||
   "id": "a9c62b39-389e-4dac-89a6-ac8a59fea58a",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "      <th>event_type_id</th>\n",
 | 
						||
       "      <th>nb_tickets</th>\n",
 | 
						||
       "      <th>avg_amount</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>384226</td>\n",
 | 
						||
       "      <td>6.150659</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>453242</td>\n",
 | 
						||
       "      <td>7.762474</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>201750</td>\n",
 | 
						||
       "      <td>4.452618</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>6</td>\n",
 | 
						||
       "      <td>217356</td>\n",
 | 
						||
       "      <td>6.439463</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>143</td>\n",
 | 
						||
       "      <td>6.150659</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   customer_id  event_type_id  nb_tickets  avg_amount\n",
 | 
						||
       "0            1              2      384226    6.150659\n",
 | 
						||
       "1            1              4      453242    7.762474\n",
 | 
						||
       "2            1              5      201750    4.452618\n",
 | 
						||
       "3            1              6      217356    6.439463\n",
 | 
						||
       "4            2              2         143    6.150659"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 38,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "nb_tickets = (df1_products_purchased_reduced.groupby([\"customer_id\", \"event_type_id\"])\n",
 | 
						||
    "              .agg({\"ticket_id\" : \"count\"}).reset_index()\n",
 | 
						||
    "              .rename(columns = {'ticket_id' : 'nb_tickets'})\n",
 | 
						||
    "             .merge(avg_amount, how='left', on='event_type_id'))\n",
 | 
						||
    "nb_tickets.head()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 39,
 | 
						||
   "id": "8710611c-7eb8-45ca-bdcc-009f4081f9e2",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "      <th>birthdate</th>\n",
 | 
						||
       "      <th>street_id</th>\n",
 | 
						||
       "      <th>is_partner</th>\n",
 | 
						||
       "      <th>gender</th>\n",
 | 
						||
       "      <th>is_email_true</th>\n",
 | 
						||
       "      <th>opt_in</th>\n",
 | 
						||
       "      <th>structure_id</th>\n",
 | 
						||
       "      <th>profession</th>\n",
 | 
						||
       "      <th>language</th>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <th>average_ticket_basket</th>\n",
 | 
						||
       "      <th>total_price</th>\n",
 | 
						||
       "      <th>purchase_count</th>\n",
 | 
						||
       "      <th>first_buying_date</th>\n",
 | 
						||
       "      <th>country</th>\n",
 | 
						||
       "      <th>age</th>\n",
 | 
						||
       "      <th>tenant_id</th>\n",
 | 
						||
       "      <th>nb_campaigns</th>\n",
 | 
						||
       "      <th>nb_campaigns_opened</th>\n",
 | 
						||
       "      <th>time_to_open</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>12751</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>fr</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1311</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>12825</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>fr</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1311</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>11261</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>fr</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1311</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>13071</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>fr</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1311</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>653061</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>10</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1311</td>\n",
 | 
						||
       "      <td>80.0</td>\n",
 | 
						||
       "      <td>2.0</td>\n",
 | 
						||
       "      <td>0 days 19:53:02.500000</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>5 rows × 28 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   customer_id birthdate  street_id  is_partner  gender  is_email_true  \\\n",
 | 
						||
       "0        12751       NaN          2       False       1           True   \n",
 | 
						||
       "1        12825       NaN          2       False       2           True   \n",
 | 
						||
       "2        11261       NaN          2       False       1           True   \n",
 | 
						||
       "3        13071       NaN          2       False       2           True   \n",
 | 
						||
       "4       653061       NaN         10       False       2           True   \n",
 | 
						||
       "\n",
 | 
						||
       "   opt_in  structure_id profession language  ...  average_ticket_basket  \\\n",
 | 
						||
       "0    True           NaN        NaN      NaN  ...                    NaN   \n",
 | 
						||
       "1    True           NaN        NaN      NaN  ...                    NaN   \n",
 | 
						||
       "2    True           NaN        NaN      NaN  ...                    NaN   \n",
 | 
						||
       "3    True           NaN        NaN      NaN  ...                    NaN   \n",
 | 
						||
       "4   False           NaN        NaN      NaN  ...                    NaN   \n",
 | 
						||
       "\n",
 | 
						||
       "  total_price  purchase_count  first_buying_date  country  age  tenant_id  \\\n",
 | 
						||
       "0         NaN               0                NaT       fr  NaN       1311   \n",
 | 
						||
       "1         NaN               0                NaT       fr  NaN       1311   \n",
 | 
						||
       "2         NaN               0                NaT       fr  NaN       1311   \n",
 | 
						||
       "3         NaN               0                NaT       fr  NaN       1311   \n",
 | 
						||
       "4         NaN               0                NaT      NaN  NaN       1311   \n",
 | 
						||
       "\n",
 | 
						||
       "   nb_campaigns  nb_campaigns_opened           time_to_open  \n",
 | 
						||
       "0           NaN                  NaN                    NaT  \n",
 | 
						||
       "1           NaN                  NaN                    NaT  \n",
 | 
						||
       "2           NaN                  NaN                    NaT  \n",
 | 
						||
       "3           NaN                  NaN                    NaT  \n",
 | 
						||
       "4          80.0                  2.0 0 days 19:53:02.500000  \n",
 | 
						||
       "\n",
 | 
						||
       "[5 rows x 28 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 39,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "# Fusion avec KPI campaigns liés au customer\n",
 | 
						||
    "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n",
 | 
						||
    "df1_customer.head()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 40,
 | 
						||
   "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stdout",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "shape :  (156289, 31)\n"
 | 
						||
     ]
 | 
						||
    },
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "      <th>birthdate</th>\n",
 | 
						||
       "      <th>street_id</th>\n",
 | 
						||
       "      <th>is_partner</th>\n",
 | 
						||
       "      <th>gender</th>\n",
 | 
						||
       "      <th>is_email_true</th>\n",
 | 
						||
       "      <th>opt_in</th>\n",
 | 
						||
       "      <th>structure_id</th>\n",
 | 
						||
       "      <th>profession</th>\n",
 | 
						||
       "      <th>language</th>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <th>first_buying_date</th>\n",
 | 
						||
       "      <th>country</th>\n",
 | 
						||
       "      <th>age</th>\n",
 | 
						||
       "      <th>tenant_id</th>\n",
 | 
						||
       "      <th>nb_campaigns</th>\n",
 | 
						||
       "      <th>nb_campaigns_opened</th>\n",
 | 
						||
       "      <th>time_to_open</th>\n",
 | 
						||
       "      <th>event_type_id</th>\n",
 | 
						||
       "      <th>nb_tickets</th>\n",
 | 
						||
       "      <th>avg_amount</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>12751</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>fr</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1311</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>12825</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>fr</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1311</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>11261</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>fr</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1311</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>13071</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>fr</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1311</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>653061</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>10</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1311</td>\n",
 | 
						||
       "      <td>80.0</td>\n",
 | 
						||
       "      <td>2.0</td>\n",
 | 
						||
       "      <td>0 days 19:53:02.500000</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>5 rows × 31 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   customer_id birthdate  street_id  is_partner  gender  is_email_true  \\\n",
 | 
						||
       "0        12751       NaN          2       False       1           True   \n",
 | 
						||
       "1        12825       NaN          2       False       2           True   \n",
 | 
						||
       "2        11261       NaN          2       False       1           True   \n",
 | 
						||
       "3        13071       NaN          2       False       2           True   \n",
 | 
						||
       "4       653061       NaN         10       False       2           True   \n",
 | 
						||
       "\n",
 | 
						||
       "   opt_in  structure_id profession language  ...  first_buying_date country  \\\n",
 | 
						||
       "0    True           NaN        NaN      NaN  ...                NaT      fr   \n",
 | 
						||
       "1    True           NaN        NaN      NaN  ...                NaT      fr   \n",
 | 
						||
       "2    True           NaN        NaN      NaN  ...                NaT      fr   \n",
 | 
						||
       "3    True           NaN        NaN      NaN  ...                NaT      fr   \n",
 | 
						||
       "4   False           NaN        NaN      NaN  ...                NaT     NaN   \n",
 | 
						||
       "\n",
 | 
						||
       "   age  tenant_id  nb_campaigns  nb_campaigns_opened           time_to_open  \\\n",
 | 
						||
       "0  NaN       1311           NaN                  NaN                    NaT   \n",
 | 
						||
       "1  NaN       1311           NaN                  NaN                    NaT   \n",
 | 
						||
       "2  NaN       1311           NaN                  NaN                    NaT   \n",
 | 
						||
       "3  NaN       1311           NaN                  NaN                    NaT   \n",
 | 
						||
       "4  NaN       1311          80.0                  2.0 0 days 19:53:02.500000   \n",
 | 
						||
       "\n",
 | 
						||
       "   event_type_id  nb_tickets  avg_amount  \n",
 | 
						||
       "0            NaN         NaN         NaN  \n",
 | 
						||
       "1            NaN         NaN         NaN  \n",
 | 
						||
       "2            NaN         NaN         NaN  \n",
 | 
						||
       "3            NaN         NaN         NaN  \n",
 | 
						||
       "4            NaN         NaN         NaN  \n",
 | 
						||
       "\n",
 | 
						||
       "[5 rows x 31 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 40,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
 | 
						||
    "print(\"shape : \", df1_customer_product.shape)\n",
 | 
						||
    "df1_customer_product.head()"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 41,
 | 
						||
   "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "7c3211a5-a851-43bc-a1f0-b39d51857fb7",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "# Fusion des bases locales"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 42,
 | 
						||
   "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# Fusion avec KPI liés au customer\n",
 | 
						||
    "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 43,
 | 
						||
   "id": "1e42a790-b215-4107-a969-85005da06ebd",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# Fusion avec KPI liés au comportement d'achat\n",
 | 
						||
    "df1_customer_product = pd.merge(df1_tickets_kpi, df1_customer, on = 'customer_id', how = 'outer')"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": null,
 | 
						||
   "id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# df1_customer_product"
 | 
						||
   ]
 | 
						||
  }
 | 
						||
 ],
 | 
						||
 "metadata": {
 | 
						||
  "kernelspec": {
 | 
						||
   "display_name": "Python 3 (ipykernel)",
 | 
						||
   "language": "python",
 | 
						||
   "name": "python3"
 | 
						||
  },
 | 
						||
  "language_info": {
 | 
						||
   "codemirror_mode": {
 | 
						||
    "name": "ipython",
 | 
						||
    "version": 3
 | 
						||
   },
 | 
						||
   "file_extension": ".py",
 | 
						||
   "mimetype": "text/x-python",
 | 
						||
   "name": "python",
 | 
						||
   "nbconvert_exporter": "python",
 | 
						||
   "pygments_lexer": "ipython3",
 | 
						||
   "version": "3.10.13"
 | 
						||
  }
 | 
						||
 },
 | 
						||
 "nbformat": 4,
 | 
						||
 "nbformat_minor": 5
 | 
						||
}
 |