{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "ad414c84-be46-4d2c-be8b-9fc4d24cc672",
   "metadata": {},
   "source": [
    "# Business Data Challenge - Team 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "15103481-8d74-404c-aa09-7601fe7730da",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import s3fs\n",
    "import re"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ee97665c-39af-4c1c-a62b-c9c79feae18f",
   "metadata": {},
   "source": [
    "Configuration de l'accès aux données"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create filesystem object\n",
    "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
    "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9cbd72c5-6f8e-4366-ab66-96c32c6e963a",
   "metadata": {},
   "source": [
    "# Exemple sur Company 1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "db26e59a-927c-407e-b54b-1815473b0b34",
   "metadata": {},
   "source": [
    "## Chargement données"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "699664b9-eee4-4f8d-a207-e524526560c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "BUCKET = \"bdc2324-data/1\"\n",
    "liste_database = fs.ls(BUCKET)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "aaf64d60-bf92-470c-8210-d09abd6a653e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['bdc2324-data/1/1campaign_stats.csv',\n",
       " 'bdc2324-data/1/1campaigns.csv',\n",
       " 'bdc2324-data/1/1categories.csv',\n",
       " 'bdc2324-data/1/1countries.csv',\n",
       " 'bdc2324-data/1/1currencies.csv',\n",
       " 'bdc2324-data/1/1customer_target_mappings.csv',\n",
       " 'bdc2324-data/1/1customersplus.csv',\n",
       " 'bdc2324-data/1/1event_types.csv',\n",
       " 'bdc2324-data/1/1events.csv',\n",
       " 'bdc2324-data/1/1facilities.csv',\n",
       " 'bdc2324-data/1/1link_stats.csv',\n",
       " 'bdc2324-data/1/1pricing_formulas.csv',\n",
       " 'bdc2324-data/1/1product_packs.csv',\n",
       " 'bdc2324-data/1/1products.csv',\n",
       " 'bdc2324-data/1/1products_groups.csv',\n",
       " 'bdc2324-data/1/1purchases.csv',\n",
       " 'bdc2324-data/1/1representation_category_capacities.csv',\n",
       " 'bdc2324-data/1/1representations.csv',\n",
       " 'bdc2324-data/1/1seasons.csv',\n",
       " 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
       " 'bdc2324-data/1/1suppliers.csv',\n",
       " 'bdc2324-data/1/1tags.csv',\n",
       " 'bdc2324-data/1/1target_types.csv',\n",
       " 'bdc2324-data/1/1targets.csv',\n",
       " 'bdc2324-data/1/1tickets.csv',\n",
       " 'bdc2324-data/1/1type_of_categories.csv',\n",
       " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
       " 'bdc2324-data/1/1type_ofs.csv']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "liste_database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "# loop to create dataframes from liste\n",
    "files_path = liste_database\n",
    "\n",
    "client_number = files_path[0].split(\"/\")[1]\n",
    "df_prefix = \"df\" + str(client_number) + \"_\"\n",
    "\n",
    "for i in range(len(files_path)) :\n",
    "    current_path = files_path[i]\n",
    "    with fs.open(current_path, mode=\"rb\") as file_in:\n",
    "        df = pd.read_csv(file_in)\n",
    "        # the pattern of the name is df1xxx\n",
    "        nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
    "        globals()[nom_dataframe] = df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4004c8bf-11d9-413d-bb42-2cb8ddde7716",
   "metadata": {},
   "source": [
    "## Cleaning functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def cleaning_date(df, column_name):\n",
    "    \"\"\"\n",
    "    Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n",
    "\n",
    "    Parameters:\n",
    "    - df: DataFrame\n",
    "        Le DataFrame contenant la colonne à nettoyer.\n",
    "    - column_name: str\n",
    "        Le nom de la colonne à nettoyer.\n",
    "\n",
    "    Returns:\n",
    "    - DataFrame\n",
    "        Le DataFrame modifié avec la colonne nettoyée.\n",
    "    \"\"\"\n",
    "    df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "398804d8-2225-4fd3-bceb-75ab1588e359",
   "metadata": {},
   "source": [
    "## Preprocessing"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "568cb180-0dd9-4b27-aecb-05e4c3775ba6",
   "metadata": {},
   "source": [
    "## customer_plus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e7b90ce-da54-4f00-bc34-64c543b0858f",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "bade04b1-0cdf-4d10-bcca-7dc7e4831656",
   "metadata": {},
   "source": [
    "## Ticket area"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b95464b1-26bc-4aac-84b4-45da83b92251",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fonction de nettoyage et selection\n",
    "def preprocessing_tickets_area(tickets = None, purchases = None, suppliers = None, type_ofs = None):\n",
    "    # Base des tickets\n",
    "    tickets = tickets[['id', 'purchase_id', 'product_id', 'is_from_subscription', 'type_of', 'supplier_id']]\n",
    "    tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
    "\n",
    "    # Base des fournisseurs\n",
    "    suppliers = suppliers[['id', 'name']]\n",
    "    suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
    "\n",
    "    # Base des types de billets\n",
    "    # type_ofs = type_ofs[['id', 'name', 'children']]\n",
    "    # type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n",
    "\n",
    "    # Base des achats\n",
    "    # Nettoyage de la date d'achat\n",
    "    cleaning_date(purchases, 'purchase_date')\n",
    "    # Selection des variables\n",
    "    purchases = purchases[['id', 'purchase_date', 'customer_id']]\n",
    "\n",
    "    # Fusions \n",
    "    # Fusion avec fournisseurs\n",
    "    ticket_information = pd.merge(tickets, suppliers, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
    "    ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
    "    \n",
    "    # # Fusion avec type de tickets\n",
    "    # ticket_information = pd.merge(ticket_information, type_ofs, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
    "    # ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
    "    \n",
    "    # Fusion avec achats\n",
    "    ticket_information = pd.merge(ticket_information, purchases, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
    "    ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)\n",
    "\n",
    "    return ticket_information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1_ticket_information = preprocessing_tickets_area(tickets = df1_tickets, purchases = df1_purchases, suppliers = df1_suppliers, type_ofs = df1_type_ofs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1_ticket_information"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "096e47f4-1d65-4575-989d-83227eedad2b",
   "metadata": {},
   "source": [
    "## Target area"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "baed146a-9d3a-4397-a812-3d50c9a2f038",
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocessing_target_area(targets = None, target_types = None, customer_target_mappings = None):\n",
    "    # Target.csv cleaning\n",
    "    targets = targets[[\"id\", \"target_type_id\", \"name\"]]\n",
    "    targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n",
    "    \n",
    "    # target_type cleaning\n",
    "    target_types = target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n",
    "    \n",
    "    #customer_target_mappings cleaning\n",
    "    customer_target_mappings = customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n",
    "    \n",
    "    # Merge target et target_type\n",
    "    targets_full = pd.merge(targets, target_types, left_on='target_type_id', right_on='target_type_id', how='inner')\n",
    "    targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n",
    "    \n",
    "    # Merge\n",
    "    targets_full = pd.merge(customer_target_mappings, targets_full, left_on='target_id', right_on='target_id', how='inner')\n",
    "    targets_full.drop(['target_id'], axis = 1, inplace=True)\n",
    "\n",
    "    return targets_full"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5fbfd88b-b94c-489c-9201-670e96e453e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4417ff51-f501-4ab9-a192-4ab75764a8ed",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n",
    "df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cdbb48b4-5e16-4ef4-8791-ed213d68d52f",
   "metadata": {},
   "source": [
    "## Campaings area"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85",
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocessing_campaigns_area(campaign_stats = None, campaigns = None):\n",
    "    # campaign_stats cleaning \n",
    "    campaign_stats = campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
    "    cleaning_date(campaign_stats, 'opened_at')\n",
    "    cleaning_date(campaign_stats, 'sent_at')\n",
    "    cleaning_date(campaign_stats, 'delivered_at')\n",
    "    \n",
    "    # campaigns cleaning\n",
    "    campaigns = campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n",
    "    cleaning_date(campaigns, 'campaign_sent_at')\n",
    "    \n",
    "    # Merge \n",
    "    campaigns_full = pd.merge(campaign_stats, campaigns, on = \"campaign_id\", how = \"left\")\n",
    "    campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)\n",
    "\n",
    "    return campaigns_full"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1_campaigns_information = preprocessing_campaigns_area(campaign_stats = df1_campaign_stats, campaigns = df1_campaigns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c24457e7-3cad-451a-a65b-7373b656bd6e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1_campaigns_information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "def campaigns_kpi(campaigns_information = None):\n",
    "    # Nombre de campagnes de mails\n",
    "    nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
    "    nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n",
    "    # Temps d'ouverture en min moyen    \n",
    "    campaigns_information['time_to_open'] = campaigns_information['opened_at'] - campaigns_information['delivered_at']\n",
    "    time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()\n",
    "\n",
    "    # Nombre de mail ouvert    \n",
    "    opened_campaign = campaigns_information[['customer_id', 'campaign_name', 'opened_at']]\n",
    "    opened_campaign.dropna(subset=['opened_at'], inplace=True)\n",
    "    opened_campaign = opened_campaign[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
    "    opened_campaign.rename(columns = {'campaign_name' : 'nb_campaigns_opened' }, inplace = True)\n",
    "\n",
    "    # Fusion des indicateurs\n",
    "    campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')\n",
    "    campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n",
    "\n",
    "    # Remplir les NaN : nb_campaigns_opened\n",
    "    campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n",
    "\n",
    "    # Remplir les NaT : time_to_open (??)\n",
    "\n",
    "    return campaigns_reduced\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24537647-bc29-4777-9848-ac4120a4aa60",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1_campaigns_kpi = campaigns_kpi(campaigns_information = df1_campaigns_information) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1_campaigns_kpi"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56520a97-ede8-4920-a211-3b5b136af33d",
   "metadata": {},
   "source": [
    "## Create Products Table"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9782e9d3-ba20-46bf-8562-bd0969972ddc",
   "metadata": {},
   "source": [
    "Some useful functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "BUCKET = \"bdc2324-data\"\n",
    "directory_path = '1'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "607eb4b4-eed9-4b50-b823-f75c116dd37c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def display_databases(file_name):\n",
    "    \"\"\"\n",
    "    This function returns the file from s3 storage\n",
    "    \"\"\"\n",
    "    file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
    "    print(\"File path : \", file_path)\n",
    "    with fs.open(file_path, mode=\"rb\") as file_in:\n",
    "        df = pd.read_csv(file_in, sep=\",\")\n",
    "        \n",
    "    print(\"Shape : \", df.shape)\n",
    "    return df\n",
    "\n",
    "\n",
    "def remove_horodates(df):\n",
    "    \"\"\"\n",
    "    this function remove horodate columns like created_at and updated_at\n",
    "    \"\"\"\n",
    "    df = df.drop(columns = [\"created_at\", \"updated_at\"])\n",
    "    return df\n",
    "\n",
    "\n",
    "def order_columns_id(df):\n",
    "    \"\"\"\n",
    "    this function puts all id columns at the beginning in order to read the dataset easier\n",
    "    \"\"\"\n",
    "    substring = 'id'\n",
    "    id_columns = [col for col in df.columns if substring in col]\n",
    "    remaining_col = [col for col in df.columns if substring not in col]\n",
    "    new_order = id_columns + remaining_col\n",
    "    return df[new_order]\n",
    "\n",
    "\n",
    "def process_df_2(df):\n",
    "    \"\"\"\n",
    "    This function organizes dataframe\n",
    "    \"\"\"\n",
    "    df = remove_horodates(df)\n",
    "    print(\"Number of columns : \", len(df.columns))\n",
    "    df = order_columns_id(df)\n",
    "    print(\"Columns : \", df.columns)\n",
    "    return df\n",
    "\n",
    "def load_dataset(name):\n",
    "    \"\"\"\n",
    "    This function loads csv file\n",
    "    \"\"\"\n",
    "    df = display_databases(name)\n",
    "    df = process_df_2(df)\n",
    "    # drop na :\n",
    "    #df = df.dropna(axis=1, thresh=len(df))\n",
    "    # if identifier in table : delete it\n",
    "    if 'identifier' in df.columns:\n",
    "        df = df.drop(columns = 'identifier')\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d23f28c0-bc95-438b-8d14-5b7bb6e267bd",
   "metadata": {},
   "source": [
    "Create theme tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "350b09b9-451f-4d47-81fe-f34b892db027",
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_products_table():\n",
    "    # first merge products and categories\n",
    "    print(\"first merge products and categories\")\n",
    "    products = load_dataset(\"1products.csv\")\n",
    "    categories = load_dataset(\"1categories.csv\")\n",
    "    # Drop useless columns\n",
    "    products = products.drop(columns = ['apply_price', 'extra_field', 'amount_consumption'])\n",
    "    categories = categories.drop(columns = ['extra_field', 'quota'])\n",
    "\n",
    "    #Merge\n",
    "    products_theme = products.merge(categories, how = 'left', left_on = 'category_id',\n",
    "                                    right_on = 'id', suffixes=('_products', '_categories'))\n",
    "    products_theme = products_theme.rename(columns = {\"name\" : \"name_categories\"})\n",
    "    \n",
    "    # Second merge products_theme and type of categories\n",
    "    print(\"Second merge products_theme and type of categories\")\n",
    "    type_of_categories = load_dataset(\"1type_of_categories.csv\")\n",
    "    type_of_categories = type_of_categories.drop(columns = 'id')\n",
    "    products_theme = products_theme.merge(type_of_categories, how = 'left', left_on = 'category_id',\n",
    "                                          right_on = 'category_id' )\n",
    "\n",
    "    # Index cleaning\n",
    "    products_theme = products_theme.drop(columns = ['id_categories'])\n",
    "    products_theme  = order_columns_id(products_theme)\n",
    "    return products_theme\n",
    "\n",
    "\n",
    "def create_events_table():\n",
    "    # first merge events and seasons : \n",
    "    print(\"first merge events and seasons : \")\n",
    "    events = load_dataset(\"1events.csv\")\n",
    "    seasons = load_dataset(\"1seasons.csv\")\n",
    "\n",
    "    # Drop useless columns\n",
    "    events = events.drop(columns = ['manual_added', 'is_display'])\n",
    "    seasons = seasons.drop(columns = ['start_date_time'])\n",
    "        \n",
    "    events_theme = events.merge(seasons, how = 'left', left_on = 'season_id', right_on = 'id', suffixes=('_events', '_seasons'))\n",
    "\n",
    "    # Secondly merge events_theme and event_types\n",
    "    print(\"Secondly merge events_theme and event_types : \")\n",
    "    event_types = load_dataset(\"1event_types.csv\")\n",
    "    event_types = event_types.drop(columns = ['fidelity_delay'])\n",
    "    \n",
    "    events_theme = events_theme.merge(event_types, how = 'left', left_on = 'event_type_id', right_on = 'id', suffixes=('_events', '_event_type'))\n",
    "    events_theme = events_theme.rename(columns = {\"name\" : \"name_event_types\"})\n",
    "    events_theme = events_theme.drop(columns = 'id')\n",
    "\n",
    "    # thirdly merge events_theme and facilities\n",
    "    print(\"thirdly merge events_theme and facilities : \")\n",
    "    facilities = load_dataset(\"1facilities.csv\")\n",
    "    facilities = facilities.drop(columns = ['fixed_capacity'])\n",
    "    \n",
    "    events_theme = events_theme.merge(facilities, how = 'left', left_on = 'facility_id', right_on = 'id', suffixes=('_events', '_facility'))\n",
    "    events_theme = events_theme.rename(columns = {\"name\" : \"name_facilities\", \"id_events\" : \"event_id\"})\n",
    "    events_theme = events_theme.drop(columns = 'id')\n",
    "\n",
    "    # Index cleaning\n",
    "    events_theme = events_theme.drop(columns = ['id_seasons'])\n",
    "    events_theme  = order_columns_id(events_theme)\n",
    "    return events_theme\n",
    "\n",
    "\n",
    "def create_representations_table():\n",
    "    representations = load_dataset(\"1representations.csv\")\n",
    "    representations = representations.drop(columns = ['serial', 'open', 'satisfaction', 'is_display', 'expected_filling',\n",
    "                                                     'max_filling', 'extra_field', 'start_date_time', 'end_date_time', 'name',\n",
    "                                                     'representation_type_id'])\n",
    "    \n",
    "    representations_capacity = load_dataset(\"1representation_category_capacities.csv\")\n",
    "    representations_capacity = representations_capacity.drop(columns = ['expected_filling', 'max_filling'])\n",
    "\n",
    "    representations_theme = representations.merge(representations_capacity, how='left',\n",
    "                                                  left_on='id', right_on='representation_id',\n",
    "                                                  suffixes=('_representation', '_representation_cap'))\n",
    "    # index cleaning\n",
    "    representations_theme = representations_theme.drop(columns = [\"id_representation\"])\n",
    "    representations_theme = order_columns_id(representations_theme)\n",
    "    return representations_theme"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "0fccc8ef-e575-4857-a401-94a7274394df",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "first merge products and categories\n",
      "File path :  bdc2324-data/1/1products.csv\n",
      "Shape :  (94803, 14)\n",
      "Number of columns :  12\n",
      "Columns :  Index(['id', 'representation_id', 'pricing_formula_id', 'category_id',\n",
      "       'products_group_id', 'product_pack_id', 'identifier', 'amount',\n",
      "       'is_full_price', 'apply_price', 'extra_field', 'amount_consumption'],\n",
      "      dtype='object')\n",
      "File path :  bdc2324-data/1/1categories.csv\n",
      "Shape :  (27, 7)\n",
      "Number of columns :  5\n",
      "Columns :  Index(['id', 'identifier', 'name', 'extra_field', 'quota'], dtype='object')\n",
      "Second merge products_theme and type of categories\n",
      "File path :  bdc2324-data/1/1type_of_categories.csv\n",
      "Shape :  (5, 6)\n",
      "Number of columns :  4\n",
      "Columns :  Index(['id', 'type_of_id', 'category_id', 'identifier'], dtype='object')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id_products</th>\n",
       "      <th>representation_id</th>\n",
       "      <th>pricing_formula_id</th>\n",
       "      <th>category_id</th>\n",
       "      <th>products_group_id</th>\n",
       "      <th>product_pack_id</th>\n",
       "      <th>type_of_id</th>\n",
       "      <th>amount</th>\n",
       "      <th>is_full_price</th>\n",
       "      <th>name_categories</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10682</td>\n",
       "      <td>914</td>\n",
       "      <td>114</td>\n",
       "      <td>41</td>\n",
       "      <td>10655</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.0</td>\n",
       "      <td>False</td>\n",
       "      <td>indiv activité tr</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>478</td>\n",
       "      <td>273</td>\n",
       "      <td>131</td>\n",
       "      <td>1</td>\n",
       "      <td>471</td>\n",
       "      <td>1</td>\n",
       "      <td>12.0</td>\n",
       "      <td>9.5</td>\n",
       "      <td>False</td>\n",
       "      <td>indiv entrées tp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20873</td>\n",
       "      <td>275</td>\n",
       "      <td>137</td>\n",
       "      <td>1</td>\n",
       "      <td>20825</td>\n",
       "      <td>1</td>\n",
       "      <td>12.0</td>\n",
       "      <td>11.5</td>\n",
       "      <td>False</td>\n",
       "      <td>indiv entrées tp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>157142</td>\n",
       "      <td>82519</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>156773</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>False</td>\n",
       "      <td>indiv entrées tr</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1341</td>\n",
       "      <td>9</td>\n",
       "      <td>93</td>\n",
       "      <td>1</td>\n",
       "      <td>1175</td>\n",
       "      <td>1</td>\n",
       "      <td>12.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>False</td>\n",
       "      <td>indiv entrées tp</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id_products  representation_id  pricing_formula_id  category_id  \\\n",
       "0        10682                914                 114           41   \n",
       "1          478                273                 131            1   \n",
       "2        20873                275                 137            1   \n",
       "3       157142              82519                   9            5   \n",
       "4         1341                  9                  93            1   \n",
       "\n",
       "   products_group_id  product_pack_id  type_of_id  amount  is_full_price  \\\n",
       "0              10655                1         NaN     9.0          False   \n",
       "1                471                1        12.0     9.5          False   \n",
       "2              20825                1        12.0    11.5          False   \n",
       "3             156773                1         NaN     8.0          False   \n",
       "4               1175                1        12.0     8.5          False   \n",
       "\n",
       "     name_categories  \n",
       "0  indiv activité tr  \n",
       "1   indiv entrées tp  \n",
       "2   indiv entrées tp  \n",
       "3   indiv entrées tr  \n",
       "4   indiv entrées tp  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "products_theme = create_products_table()\n",
    "products_theme.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "779d8aaf-6668-4f66-8852-847304407ea3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "first merge events and seasons : \n",
      "File path :  bdc2324-data/1/1events.csv\n",
      "Shape :  (1232, 12)\n",
      "Number of columns :  10\n",
      "Columns :  Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n",
      "       'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n",
      "      dtype='object')\n",
      "File path :  bdc2324-data/1/1seasons.csv\n",
      "Shape :  (13, 6)\n",
      "Number of columns :  4\n",
      "Columns :  Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n",
      "Secondly merge events_theme and event_types : \n",
      "File path :  bdc2324-data/1/1event_types.csv\n",
      "Shape :  (9, 6)\n",
      "Number of columns :  4\n",
      "Columns :  Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n",
      "thirdly merge events_theme and facilities : \n",
      "File path :  bdc2324-data/1/1facilities.csv\n",
      "Shape :  (2, 7)\n",
      "Number of columns :  5\n",
      "Columns :  Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>season_id</th>\n",
       "      <th>facility_id</th>\n",
       "      <th>event_type_id</th>\n",
       "      <th>event_type_key_id</th>\n",
       "      <th>facility_key_id</th>\n",
       "      <th>street_id</th>\n",
       "      <th>name_events</th>\n",
       "      <th>name_seasons</th>\n",
       "      <th>name_event_types</th>\n",
       "      <th>name_facilities</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>192</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>frontières</td>\n",
       "      <td>2018</td>\n",
       "      <td>spectacle vivant</td>\n",
       "      <td>mucem</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>30329</td>\n",
       "      <td>2767</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>visite guidée une autre histoire du monde (1h00)</td>\n",
       "      <td>2023</td>\n",
       "      <td>offre muséale groupe</td>\n",
       "      <td>mucem</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>161</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>visite contée les chercheurs d'or indiv</td>\n",
       "      <td>2018</td>\n",
       "      <td>offre muséale individuel</td>\n",
       "      <td>mucem</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5957</td>\n",
       "      <td>582</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>we dreamt of utopia and we woke up screaming.</td>\n",
       "      <td>2021</td>\n",
       "      <td>spectacle vivant</td>\n",
       "      <td>mucem</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8337</td>\n",
       "      <td>582</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>jeff koons épisodes 4</td>\n",
       "      <td>2021</td>\n",
       "      <td>spectacle vivant</td>\n",
       "      <td>mucem</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   event_id  season_id  facility_id  event_type_id  event_type_key_id  \\\n",
       "0       192         16            1              4                  4   \n",
       "1     30329       2767            1              5                  5   \n",
       "2       161         16            1              2                  2   \n",
       "3      5957        582            1              4                  4   \n",
       "4      8337        582            1              4                  4   \n",
       "\n",
       "   facility_key_id  street_id  \\\n",
       "0                1          1   \n",
       "1                1          1   \n",
       "2                1          1   \n",
       "3                1          1   \n",
       "4                1          1   \n",
       "\n",
       "                                        name_events name_seasons  \\\n",
       "0                                        frontières         2018   \n",
       "1  visite guidée une autre histoire du monde (1h00)         2023   \n",
       "2           visite contée les chercheurs d'or indiv         2018   \n",
       "3     we dreamt of utopia and we woke up screaming.         2021   \n",
       "4                             jeff koons épisodes 4         2021   \n",
       "\n",
       "           name_event_types name_facilities  \n",
       "0          spectacle vivant           mucem  \n",
       "1      offre muséale groupe           mucem  \n",
       "2  offre muséale individuel           mucem  \n",
       "3          spectacle vivant           mucem  \n",
       "4          spectacle vivant           mucem  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "events_theme= create_events_table()\n",
    "events_theme.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File path :  bdc2324-data/1/1representations.csv\n",
      "Shape :  (36095, 16)\n",
      "Number of columns :  14\n",
      "Columns :  Index(['id', 'event_id', 'representation_type_id', 'identifier', 'serial',\n",
      "       'start_date_time', 'open', 'satisfaction', 'end_date_time', 'name',\n",
      "       'is_display', 'expected_filling', 'max_filling', 'extra_field'],\n",
      "      dtype='object')\n",
      "File path :  bdc2324-data/1/1representation_category_capacities.csv\n",
      "Shape :  (65241, 7)\n",
      "Number of columns :  5\n",
      "Columns :  Index(['id', 'representation_id', 'category_id', 'expected_filling',\n",
      "       'max_filling'],\n",
      "      dtype='object')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>id_representation_cap</th>\n",
       "      <th>representation_id</th>\n",
       "      <th>category_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12384</td>\n",
       "      <td>123058</td>\n",
       "      <td>84820</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>37</td>\n",
       "      <td>2514</td>\n",
       "      <td>269</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>384</td>\n",
       "      <td>269</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>37</td>\n",
       "      <td>2515</td>\n",
       "      <td>269</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>37</td>\n",
       "      <td>383</td>\n",
       "      <td>269</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   event_id  id_representation_cap  representation_id  category_id\n",
       "0     12384                 123058              84820            2\n",
       "1        37                   2514                269            2\n",
       "2        37                    384                269            5\n",
       "3        37                   2515                269           10\n",
       "4        37                    383                269            1"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "representation_theme = create_representations_table()\n",
    "representation_theme.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8fa191d5-c867-4d4d-bbab-f29d7d91ce6a",
   "metadata": {},
   "source": [
    "Create uniform product database "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "def uniform_product_df():\n",
    "    \"\"\"\n",
    "    This function returns the uniform product dataset\n",
    "    \"\"\"\n",
    "    print(\"Products theme columns : \", products_theme.columns)\n",
    "    print(\"\\n Representation theme columns : \", representation_theme.columns)\n",
    "    print(\"\\n Events theme columns : \", events_theme.columns)\n",
    "\n",
    "    products_global = products_theme.merge(representation_theme, how='left',\n",
    "                                           on= [\"representation_id\", \"category_id\"])\n",
    "    \n",
    "    products_global = products_global.merge(events_theme, how='left', on='event_id',\n",
    "                                            suffixes = (\"_representation\", \"_event\"))\n",
    "    \n",
    "    products_global = order_columns_id(products_global)\n",
    "\n",
    "    # remove useless columns \n",
    "    products_global = products_global.drop(columns = ['type_of_id', 'name_events', 'name_seasons', 'name_categories'])\n",
    "    return products_global"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Products theme columns :  Index(['id_products', 'representation_id', 'pricing_formula_id', 'category_id',\n",
      "       'products_group_id', 'product_pack_id', 'type_of_id', 'amount',\n",
      "       'is_full_price', 'name_categories'],\n",
      "      dtype='object')\n",
      "\n",
      " Representation theme columns :  Index(['event_id', 'id_representation_cap', 'representation_id',\n",
      "       'category_id'],\n",
      "      dtype='object')\n",
      "\n",
      " Events theme columns :  Index(['event_id', 'season_id', 'facility_id', 'event_type_id',\n",
      "       'event_type_key_id', 'facility_key_id', 'street_id', 'name_events',\n",
      "       'name_seasons', 'name_event_types', 'name_facilities'],\n",
      "      dtype='object')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id_products</th>\n",
       "      <th>representation_id</th>\n",
       "      <th>pricing_formula_id</th>\n",
       "      <th>category_id</th>\n",
       "      <th>products_group_id</th>\n",
       "      <th>product_pack_id</th>\n",
       "      <th>event_id</th>\n",
       "      <th>id_representation_cap</th>\n",
       "      <th>season_id</th>\n",
       "      <th>facility_id</th>\n",
       "      <th>event_type_id</th>\n",
       "      <th>event_type_key_id</th>\n",
       "      <th>facility_key_id</th>\n",
       "      <th>street_id</th>\n",
       "      <th>amount</th>\n",
       "      <th>is_full_price</th>\n",
       "      <th>name_event_types</th>\n",
       "      <th>name_facilities</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10682</td>\n",
       "      <td>914</td>\n",
       "      <td>114</td>\n",
       "      <td>41</td>\n",
       "      <td>10655</td>\n",
       "      <td>1</td>\n",
       "      <td>132</td>\n",
       "      <td>8789</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>False</td>\n",
       "      <td>offre muséale individuel</td>\n",
       "      <td>mucem</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>478</td>\n",
       "      <td>273</td>\n",
       "      <td>131</td>\n",
       "      <td>1</td>\n",
       "      <td>471</td>\n",
       "      <td>1</td>\n",
       "      <td>37</td>\n",
       "      <td>390</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9.5</td>\n",
       "      <td>False</td>\n",
       "      <td>offre muséale individuel</td>\n",
       "      <td>mucem</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20873</td>\n",
       "      <td>275</td>\n",
       "      <td>137</td>\n",
       "      <td>1</td>\n",
       "      <td>20825</td>\n",
       "      <td>1</td>\n",
       "      <td>37</td>\n",
       "      <td>395</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>11.5</td>\n",
       "      <td>False</td>\n",
       "      <td>offre muséale individuel</td>\n",
       "      <td>mucem</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>157142</td>\n",
       "      <td>82519</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>156773</td>\n",
       "      <td>1</td>\n",
       "      <td>12365</td>\n",
       "      <td>120199</td>\n",
       "      <td>1754</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>False</td>\n",
       "      <td>offre muséale individuel</td>\n",
       "      <td>mucem</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1341</td>\n",
       "      <td>9</td>\n",
       "      <td>93</td>\n",
       "      <td>1</td>\n",
       "      <td>1175</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8.5</td>\n",
       "      <td>False</td>\n",
       "      <td>non défini</td>\n",
       "      <td>mucem</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id_products  representation_id  pricing_formula_id  category_id  \\\n",
       "0        10682                914                 114           41   \n",
       "1          478                273                 131            1   \n",
       "2        20873                275                 137            1   \n",
       "3       157142              82519                   9            5   \n",
       "4         1341                  9                  93            1   \n",
       "\n",
       "   products_group_id  product_pack_id  event_id  id_representation_cap  \\\n",
       "0              10655                1       132                   8789   \n",
       "1                471                1        37                    390   \n",
       "2              20825                1        37                    395   \n",
       "3             156773                1     12365                 120199   \n",
       "4               1175                1         8                     21   \n",
       "\n",
       "   season_id  facility_id  event_type_id  event_type_key_id  facility_key_id  \\\n",
       "0          4            1              2                  5                1   \n",
       "1          2            1              2                  2                1   \n",
       "2          2            1              2                  2                1   \n",
       "3       1754            1              2                  4                1   \n",
       "4          4            1              3                  6                1   \n",
       "\n",
       "   street_id  amount  is_full_price          name_event_types name_facilities  \n",
       "0          1     9.0          False  offre muséale individuel           mucem  \n",
       "1          1     9.5          False  offre muséale individuel           mucem  \n",
       "2          1    11.5          False  offre muséale individuel           mucem  \n",
       "3          1     8.0          False  offre muséale individuel           mucem  \n",
       "4          1     8.5          False                non défini           mucem  "
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "products_global = uniform_product_df()\n",
    "products_global.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "117d172a-2195-4060-9245-96c6f637ebbd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}