1466 lines
		
	
	
		
			53 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			1466 lines
		
	
	
		
			53 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
{
 | 
						||
 "cells": [
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "ad414c84-be46-4d2c-be8b-9fc4d24cc672",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "# Business Data Challenge - Team 1"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 1,
 | 
						||
   "id": "15103481-8d74-404c-aa09-7601fe7730da",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "import pandas as pd\n",
 | 
						||
    "import numpy as np\n",
 | 
						||
    "import os\n",
 | 
						||
    "import s3fs\n",
 | 
						||
    "import re"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "ee97665c-39af-4c1c-a62b-c9c79feae18f",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "Configuration de l'accès aux données"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 2,
 | 
						||
   "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# Create filesystem object\n",
 | 
						||
    "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
 | 
						||
    "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "9cbd72c5-6f8e-4366-ab66-96c32c6e963a",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "# Exemple sur Company 1"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "db26e59a-927c-407e-b54b-1815473b0b34",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Chargement données"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 3,
 | 
						||
   "id": "699664b9-eee4-4f8d-a207-e524526560c5",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "BUCKET = \"bdc2324-data/1\"\n",
 | 
						||
    "liste_database = fs.ls(BUCKET)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 4,
 | 
						||
   "id": "aaf64d60-bf92-470c-8210-d09abd6a653e",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/plain": [
 | 
						||
       "['bdc2324-data/1/1campaign_stats.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1campaigns.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1categories.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1countries.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1currencies.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1customer_target_mappings.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1customersplus.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1event_types.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1events.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1facilities.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1link_stats.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1pricing_formulas.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1product_packs.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1products.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1products_groups.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1purchases.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1representation_category_capacities.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1representations.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1seasons.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1suppliers.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1tags.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1target_types.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1targets.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1tickets.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1type_of_categories.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
 | 
						||
       " 'bdc2324-data/1/1type_ofs.csv']"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 4,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "liste_database"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 5,
 | 
						||
   "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "/tmp/ipykernel_50143/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
 | 
						||
      "  df = pd.read_csv(file_in)\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "# loop to create dataframes from liste\n",
 | 
						||
    "files_path = liste_database\n",
 | 
						||
    "\n",
 | 
						||
    "client_number = files_path[0].split(\"/\")[1]\n",
 | 
						||
    "df_prefix = \"df\" + str(client_number) + \"_\"\n",
 | 
						||
    "\n",
 | 
						||
    "for i in range(len(files_path)) :\n",
 | 
						||
    "    current_path = files_path[i]\n",
 | 
						||
    "    with fs.open(current_path, mode=\"rb\") as file_in:\n",
 | 
						||
    "        df = pd.read_csv(file_in)\n",
 | 
						||
    "        # the pattern of the name is df1xxx\n",
 | 
						||
    "        nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
 | 
						||
    "        globals()[nom_dataframe] = df"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "4004c8bf-11d9-413d-bb42-2cb8ddde7716",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Cleaning functions"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 6,
 | 
						||
   "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def cleaning_date(df, column_name):\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n",
 | 
						||
    "\n",
 | 
						||
    "    Parameters:\n",
 | 
						||
    "    - df: DataFrame\n",
 | 
						||
    "        Le DataFrame contenant la colonne à nettoyer.\n",
 | 
						||
    "    - column_name: str\n",
 | 
						||
    "        Le nom de la colonne à nettoyer.\n",
 | 
						||
    "\n",
 | 
						||
    "    Returns:\n",
 | 
						||
    "    - DataFrame\n",
 | 
						||
    "        Le DataFrame modifié avec la colonne nettoyée.\n",
 | 
						||
    "    \"\"\"\n",
 | 
						||
    "    df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
 | 
						||
    "    return df"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "398804d8-2225-4fd3-bceb-75ab1588e359",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Preprocessing"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "568cb180-0dd9-4b27-aecb-05e4c3775ba6",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## customer_plus"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": null,
 | 
						||
   "id": "7e7b90ce-da54-4f00-bc34-64c543b0858f",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": []
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "bade04b1-0cdf-4d10-bcca-7dc7e4831656",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Ticket area"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 7,
 | 
						||
   "id": "b95464b1-26bc-4aac-84b4-45da83b92251",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "# Fonction de nettoyage et selection\n",
 | 
						||
    "def preprocessing_tickets_area(tickets = None, purchases = None, suppliers = None, type_ofs = None):\n",
 | 
						||
    "    # Base des tickets\n",
 | 
						||
    "    tickets = tickets[['id', 'purchase_id', 'product_id', 'is_from_subscription', 'type_of', 'supplier_id']]\n",
 | 
						||
    "    tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
 | 
						||
    "\n",
 | 
						||
    "    # Base des fournisseurs\n",
 | 
						||
    "    suppliers = suppliers[['id', 'name']]\n",
 | 
						||
    "    suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
 | 
						||
    "\n",
 | 
						||
    "    # Base des types de billets\n",
 | 
						||
    "    # type_ofs = type_ofs[['id', 'name', 'children']]\n",
 | 
						||
    "    # type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n",
 | 
						||
    "\n",
 | 
						||
    "    # Base des achats\n",
 | 
						||
    "    # Nettoyage de la date d'achat\n",
 | 
						||
    "    cleaning_date(purchases, 'purchase_date')\n",
 | 
						||
    "    # Selection des variables\n",
 | 
						||
    "    purchases = purchases[['id', 'purchase_date', 'customer_id']]\n",
 | 
						||
    "\n",
 | 
						||
    "    # Fusions \n",
 | 
						||
    "    # Fusion avec fournisseurs\n",
 | 
						||
    "    ticket_information = pd.merge(tickets, suppliers, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
 | 
						||
    "    ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
 | 
						||
    "    \n",
 | 
						||
    "    # # Fusion avec type de tickets\n",
 | 
						||
    "    # ticket_information = pd.merge(ticket_information, type_ofs, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
 | 
						||
    "    # ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Fusion avec achats\n",
 | 
						||
    "    ticket_information = pd.merge(ticket_information, purchases, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
 | 
						||
    "    ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)\n",
 | 
						||
    "\n",
 | 
						||
    "    return ticket_information"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 8,
 | 
						||
   "id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "/tmp/ipykernel_50143/1320335767.py:5: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
 | 
						||
      "/tmp/ipykernel_50143/1320335767.py:9: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_ticket_information = preprocessing_tickets_area(tickets = df1_tickets, purchases = df1_purchases, suppliers = df1_suppliers, type_ofs = df1_type_ofs)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 70,
 | 
						||
   "id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>ticket_id</th>\n",
 | 
						||
       "      <th>product_id</th>\n",
 | 
						||
       "      <th>is_from_subscription</th>\n",
 | 
						||
       "      <th>type_of</th>\n",
 | 
						||
       "      <th>supplier_name</th>\n",
 | 
						||
       "      <th>purchase_date</th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>13070859</td>\n",
 | 
						||
       "      <td>225251</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>2018-12-28 14:47:50+00:00</td>\n",
 | 
						||
       "      <td>48187</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>13070860</td>\n",
 | 
						||
       "      <td>224914</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>2018-12-28 14:47:50+00:00</td>\n",
 | 
						||
       "      <td>48187</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>13070861</td>\n",
 | 
						||
       "      <td>224914</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>2018-12-28 14:47:50+00:00</td>\n",
 | 
						||
       "      <td>48187</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>13070862</td>\n",
 | 
						||
       "      <td>224914</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>2018-12-28 14:47:50+00:00</td>\n",
 | 
						||
       "      <td>48187</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>13070863</td>\n",
 | 
						||
       "      <td>224914</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>2018-12-28 14:47:50+00:00</td>\n",
 | 
						||
       "      <td>48187</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1826667</th>\n",
 | 
						||
       "      <td>20662815</td>\n",
 | 
						||
       "      <td>405689</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>2023-11-08 17:23:54+00:00</td>\n",
 | 
						||
       "      <td>1256135</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1826668</th>\n",
 | 
						||
       "      <td>20662816</td>\n",
 | 
						||
       "      <td>403658</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>2023-11-08 18:32:18+00:00</td>\n",
 | 
						||
       "      <td>1256136</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1826669</th>\n",
 | 
						||
       "      <td>20662817</td>\n",
 | 
						||
       "      <td>403658</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>2023-11-08 18:32:18+00:00</td>\n",
 | 
						||
       "      <td>1256136</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1826670</th>\n",
 | 
						||
       "      <td>20662818</td>\n",
 | 
						||
       "      <td>403658</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>2023-11-08 19:30:28+00:00</td>\n",
 | 
						||
       "      <td>1256137</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1826671</th>\n",
 | 
						||
       "      <td>20662819</td>\n",
 | 
						||
       "      <td>403658</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>vente en ligne</td>\n",
 | 
						||
       "      <td>2023-11-08 19:30:28+00:00</td>\n",
 | 
						||
       "      <td>1256137</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>1826672 rows × 7 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "         ticket_id  product_id  is_from_subscription  type_of   supplier_name  \\\n",
 | 
						||
       "0         13070859      225251                 False        1  vente en ligne   \n",
 | 
						||
       "1         13070860      224914                 False        1  vente en ligne   \n",
 | 
						||
       "2         13070861      224914                 False        1  vente en ligne   \n",
 | 
						||
       "3         13070862      224914                 False        1  vente en ligne   \n",
 | 
						||
       "4         13070863      224914                 False        1  vente en ligne   \n",
 | 
						||
       "...            ...         ...                   ...      ...             ...   \n",
 | 
						||
       "1826667   20662815      405689                 False        1  vente en ligne   \n",
 | 
						||
       "1826668   20662816      403658                 False        1  vente en ligne   \n",
 | 
						||
       "1826669   20662817      403658                 False        1  vente en ligne   \n",
 | 
						||
       "1826670   20662818      403658                 False        1  vente en ligne   \n",
 | 
						||
       "1826671   20662819      403658                 False        1  vente en ligne   \n",
 | 
						||
       "\n",
 | 
						||
       "                    purchase_date  customer_id  \n",
 | 
						||
       "0       2018-12-28 14:47:50+00:00        48187  \n",
 | 
						||
       "1       2018-12-28 14:47:50+00:00        48187  \n",
 | 
						||
       "2       2018-12-28 14:47:50+00:00        48187  \n",
 | 
						||
       "3       2018-12-28 14:47:50+00:00        48187  \n",
 | 
						||
       "4       2018-12-28 14:47:50+00:00        48187  \n",
 | 
						||
       "...                           ...          ...  \n",
 | 
						||
       "1826667 2023-11-08 17:23:54+00:00      1256135  \n",
 | 
						||
       "1826668 2023-11-08 18:32:18+00:00      1256136  \n",
 | 
						||
       "1826669 2023-11-08 18:32:18+00:00      1256136  \n",
 | 
						||
       "1826670 2023-11-08 19:30:28+00:00      1256137  \n",
 | 
						||
       "1826671 2023-11-08 19:30:28+00:00      1256137  \n",
 | 
						||
       "\n",
 | 
						||
       "[1826672 rows x 7 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 70,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_ticket_information"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "096e47f4-1d65-4575-989d-83227eedad2b",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Target area"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 9,
 | 
						||
   "id": "baed146a-9d3a-4397-a812-3d50c9a2f038",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def preprocessing_target_area(targets = None, target_types = None, customer_target_mappings = None):\n",
 | 
						||
    "    # Target.csv cleaning\n",
 | 
						||
    "    targets = targets[[\"id\", \"target_type_id\", \"name\"]]\n",
 | 
						||
    "    targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n",
 | 
						||
    "    \n",
 | 
						||
    "    # target_type cleaning\n",
 | 
						||
    "    target_types = target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n",
 | 
						||
    "    \n",
 | 
						||
    "    #customer_target_mappings cleaning\n",
 | 
						||
    "    customer_target_mappings = customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Merge target et target_type\n",
 | 
						||
    "    targets_full = pd.merge(targets, target_types, left_on='target_type_id', right_on='target_type_id', how='inner')\n",
 | 
						||
    "    targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Merge\n",
 | 
						||
    "    targets_full = pd.merge(customer_target_mappings, targets_full, left_on='target_id', right_on='target_id', how='inner')\n",
 | 
						||
    "    targets_full.drop(['target_id'], axis = 1, inplace=True)\n",
 | 
						||
    "\n",
 | 
						||
    "    return targets_full"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 10,
 | 
						||
   "id": "5fbfd88b-b94c-489c-9201-670e96e453e7",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "/tmp/ipykernel_50143/3848597476.py:4: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 19,
 | 
						||
   "id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>target_name</th>\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optin mediation specialisee</th>\n",
 | 
						||
       "      <td>150000</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optin jeune public</th>\n",
 | 
						||
       "      <td>149979</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optin b2c</th>\n",
 | 
						||
       "      <td>108909</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Arenametrix_bascule tel vers sib</th>\n",
 | 
						||
       "      <td>35216</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optout b2c</th>\n",
 | 
						||
       "      <td>34523</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Automation_parrainage_newsletter_handicap_visuel</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optout mediation specialisee</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Inscrits NL LSF formulaire</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Market auto - contacts inactifs post-scénario</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Inactifs - fin du scénario</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>283 rows × 1 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "                                                  customer_id\n",
 | 
						||
       "target_name                                                  \n",
 | 
						||
       "consentement optin mediation specialisee               150000\n",
 | 
						||
       "consentement optin jeune public                        149979\n",
 | 
						||
       "consentement optin b2c                                 108909\n",
 | 
						||
       "Arenametrix_bascule tel vers sib                        35216\n",
 | 
						||
       "consentement optout b2c                                 34523\n",
 | 
						||
       "...                                                       ...\n",
 | 
						||
       "Automation_parrainage_newsletter_handicap_visuel            1\n",
 | 
						||
       "consentement optout mediation specialisee                   1\n",
 | 
						||
       "Inscrits NL LSF formulaire                                  1\n",
 | 
						||
       "Market auto - contacts inactifs post-scénario               1\n",
 | 
						||
       "Inactifs - fin du scénario                                  1\n",
 | 
						||
       "\n",
 | 
						||
       "[283 rows x 1 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 19,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 29,
 | 
						||
   "id": "4417ff51-f501-4ab9-a192-4ab75764a8ed",
 | 
						||
   "metadata": {
 | 
						||
    "scrolled": true
 | 
						||
   },
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>target_name</th>\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Arenametrix_bascule tel vers sib</th>\n",
 | 
						||
       "      <td>35216</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Autres_interet_exposition</th>\n",
 | 
						||
       "      <td>1021</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>COM Inscrits NL générale (historique)</th>\n",
 | 
						||
       "      <td>23005</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Contacts_prenomsdoubles</th>\n",
 | 
						||
       "      <td>11643</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP MD Procès du Siècle</th>\n",
 | 
						||
       "      <td>1684</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP Newsletter centres de loisirs</th>\n",
 | 
						||
       "      <td>1032</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP Newsletter enseignants</th>\n",
 | 
						||
       "      <td>4510</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP Newsletter jeune public</th>\n",
 | 
						||
       "      <td>3862</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP Newsletter relais champ social</th>\n",
 | 
						||
       "      <td>2270</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP PROMO Participants ateliers (adultes et enfants)</th>\n",
 | 
						||
       "      <td>1954</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP billets famille</th>\n",
 | 
						||
       "      <td>3609</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP promo MD pass musées dps oct 2018</th>\n",
 | 
						||
       "      <td>1785</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP promo Plan B 2019 (concerts)</th>\n",
 | 
						||
       "      <td>1948</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers)</th>\n",
 | 
						||
       "      <td>1293</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP rentrée culturelle 2023</th>\n",
 | 
						||
       "      <td>1757</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DDCP_marseille_jazz_2023</th>\n",
 | 
						||
       "      <td>1043</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DRE Festival Jean Rouch</th>\n",
 | 
						||
       "      <td>1502</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DRE MucemLab</th>\n",
 | 
						||
       "      <td>2302</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DRE chercheurs</th>\n",
 | 
						||
       "      <td>1557</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>DRE institutionnels</th>\n",
 | 
						||
       "      <td>2229</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>FORMATION _ acheteurs optin last year</th>\n",
 | 
						||
       "      <td>10485</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Inscrits NL générale (export_291019 + operation_videomaton)</th>\n",
 | 
						||
       "      <td>14086</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Inscrits NL générale site web</th>\n",
 | 
						||
       "      <td>3732</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Inscrits NL jeune public site web</th>\n",
 | 
						||
       "      <td>1249</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>Votre première liste</th>\n",
 | 
						||
       "      <td>3715</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optin b2b</th>\n",
 | 
						||
       "      <td>12735</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optin b2c</th>\n",
 | 
						||
       "      <td>108909</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optin dre</th>\n",
 | 
						||
       "      <td>4527</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optin jeune public</th>\n",
 | 
						||
       "      <td>149979</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optin mediation specialisee</th>\n",
 | 
						||
       "      <td>150000</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optin newsletter generale</th>\n",
 | 
						||
       "      <td>22095</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optin scolaires</th>\n",
 | 
						||
       "      <td>4849</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optout b2b</th>\n",
 | 
						||
       "      <td>14219</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optout b2c</th>\n",
 | 
						||
       "      <td>34523</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optout dre</th>\n",
 | 
						||
       "      <td>14328</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optout newsletter generale</th>\n",
 | 
						||
       "      <td>18855</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>consentement optout scolaires</th>\n",
 | 
						||
       "      <td>15744</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>ddcp_md_scene_ouverte_au_talent</th>\n",
 | 
						||
       "      <td>1577</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>ddcp_promo_MD_billet_musée_oct_2019_agarder2</th>\n",
 | 
						||
       "      <td>5482</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>ddcp_promo_md_musée_dps 011019</th>\n",
 | 
						||
       "      <td>6010</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>ddcp_promo_visiteurs occasionnels_musee_8mois</th>\n",
 | 
						||
       "      <td>6640</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>ddcp_visiteurs dps 010622</th>\n",
 | 
						||
       "      <td>12355</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>festival_jean_rouch</th>\n",
 | 
						||
       "      <td>1502</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>rappel po barvalo</th>\n",
 | 
						||
       "      <td>1248</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>structures_etiquette champ social</th>\n",
 | 
						||
       "      <td>1488</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "                                                    customer_id\n",
 | 
						||
       "target_name                                                    \n",
 | 
						||
       "Arenametrix_bascule tel vers sib                          35216\n",
 | 
						||
       "Autres_interet_exposition                                  1021\n",
 | 
						||
       "COM Inscrits NL générale (historique)                     23005\n",
 | 
						||
       "Contacts_prenomsdoubles                                   11643\n",
 | 
						||
       "DDCP MD Procès du Siècle                                   1684\n",
 | 
						||
       "DDCP Newsletter centres de loisirs                         1032\n",
 | 
						||
       "DDCP Newsletter enseignants                                4510\n",
 | 
						||
       "DDCP Newsletter jeune public                               3862\n",
 | 
						||
       "DDCP Newsletter relais champ social                        2270\n",
 | 
						||
       "DDCP PROMO Participants ateliers (adultes et en...         1954\n",
 | 
						||
       "DDCP billets famille                                       3609\n",
 | 
						||
       "DDCP promo MD pass musées dps oct 2018                     1785\n",
 | 
						||
       "DDCP promo Plan B 2019 (concerts)                          1948\n",
 | 
						||
       "DDCP promo spectateurs prog 21-22 (spectacles, ...         1293\n",
 | 
						||
       "DDCP rentrée culturelle 2023                               1757\n",
 | 
						||
       "DDCP_marseille_jazz_2023                                   1043\n",
 | 
						||
       "DRE Festival Jean Rouch                                    1502\n",
 | 
						||
       "DRE MucemLab                                               2302\n",
 | 
						||
       "DRE chercheurs                                             1557\n",
 | 
						||
       "DRE institutionnels                                        2229\n",
 | 
						||
       "FORMATION _ acheteurs optin last year                     10485\n",
 | 
						||
       "Inscrits NL générale (export_291019 + operation...        14086\n",
 | 
						||
       "Inscrits NL générale site web                              3732\n",
 | 
						||
       "Inscrits NL jeune public site web                          1249\n",
 | 
						||
       "Votre première liste                                       3715\n",
 | 
						||
       "consentement optin b2b                                    12735\n",
 | 
						||
       "consentement optin b2c                                   108909\n",
 | 
						||
       "consentement optin dre                                     4527\n",
 | 
						||
       "consentement optin jeune public                          149979\n",
 | 
						||
       "consentement optin mediation specialisee                 150000\n",
 | 
						||
       "consentement optin newsletter generale                    22095\n",
 | 
						||
       "consentement optin scolaires                               4849\n",
 | 
						||
       "consentement optout b2b                                   14219\n",
 | 
						||
       "consentement optout b2c                                   34523\n",
 | 
						||
       "consentement optout dre                                   14328\n",
 | 
						||
       "consentement optout newsletter generale                   18855\n",
 | 
						||
       "consentement optout scolaires                             15744\n",
 | 
						||
       "ddcp_md_scene_ouverte_au_talent                            1577\n",
 | 
						||
       "ddcp_promo_MD_billet_musée_oct_2019_agarder2               5482\n",
 | 
						||
       "ddcp_promo_md_musée_dps 011019                             6010\n",
 | 
						||
       "ddcp_promo_visiteurs occasionnels_musee_8mois              6640\n",
 | 
						||
       "ddcp_visiteurs dps 010622                                 12355\n",
 | 
						||
       "festival_jean_rouch                                        1502\n",
 | 
						||
       "rappel po barvalo                                          1248\n",
 | 
						||
       "structures_etiquette champ social                          1488"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 29,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n",
 | 
						||
    "df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000]"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "cdbb48b4-5e16-4ef4-8791-ed213d68d52f",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "## Campaings area"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 11,
 | 
						||
   "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def preprocessing_campaigns_area(campaign_stats = None, campaigns = None):\n",
 | 
						||
    "    # campaign_stats cleaning \n",
 | 
						||
    "    campaign_stats = campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
 | 
						||
    "    cleaning_date(campaign_stats, 'opened_at')\n",
 | 
						||
    "    cleaning_date(campaign_stats, 'sent_at')\n",
 | 
						||
    "    cleaning_date(campaign_stats, 'delivered_at')\n",
 | 
						||
    "    \n",
 | 
						||
    "    # campaigns cleaning\n",
 | 
						||
    "    campaigns = campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n",
 | 
						||
    "    cleaning_date(campaigns, 'campaign_sent_at')\n",
 | 
						||
    "    \n",
 | 
						||
    "    # Merge \n",
 | 
						||
    "    campaigns_full = pd.merge(campaign_stats, campaigns, on = \"campaign_id\", how = \"left\")\n",
 | 
						||
    "    campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)\n",
 | 
						||
    "\n",
 | 
						||
    "    return campaigns_full"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 12,
 | 
						||
   "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "/tmp/ipykernel_50143/1967867975.py:15: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 | 
						||
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
 | 
						||
      "/tmp/ipykernel_50143/1967867975.py:15: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 | 
						||
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
 | 
						||
      "/tmp/ipykernel_50143/1967867975.py:15: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 | 
						||
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_campaigns_information = preprocessing_campaigns_area(campaign_stats = df1_campaign_stats, campaigns = df1_campaigns)"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 27,
 | 
						||
   "id": "c24457e7-3cad-451a-a65b-7373b656bd6e",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>id</th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "      <th>opened_at</th>\n",
 | 
						||
       "      <th>sent_at</th>\n",
 | 
						||
       "      <th>delivered_at</th>\n",
 | 
						||
       "      <th>campaign_name</th>\n",
 | 
						||
       "      <th>campaign_service_id</th>\n",
 | 
						||
       "      <th>campaign_sent_at</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>19793</td>\n",
 | 
						||
       "      <td>112597</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>2021-03-28 16:01:09+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:24:18+00:00</td>\n",
 | 
						||
       "      <td>Le Mucem chez vous, gardons le lien #22</td>\n",
 | 
						||
       "      <td>404</td>\n",
 | 
						||
       "      <td>2021-03-27 23:00:00+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>14211</td>\n",
 | 
						||
       "      <td>113666</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>2021-03-28 16:01:09+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:21:02+00:00</td>\n",
 | 
						||
       "      <td>Le Mucem chez vous, gardons le lien #22</td>\n",
 | 
						||
       "      <td>404</td>\n",
 | 
						||
       "      <td>2021-03-27 23:00:00+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>13150</td>\n",
 | 
						||
       "      <td>280561</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>2021-03-28 16:00:59+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:08:45+00:00</td>\n",
 | 
						||
       "      <td>Le Mucem chez vous, gardons le lien #22</td>\n",
 | 
						||
       "      <td>404</td>\n",
 | 
						||
       "      <td>2021-03-27 23:00:00+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>7073</td>\n",
 | 
						||
       "      <td>101007</td>\n",
 | 
						||
       "      <td>2021-03-28 18:11:06+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:00:59+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:09:47+00:00</td>\n",
 | 
						||
       "      <td>Le Mucem chez vous, gardons le lien #22</td>\n",
 | 
						||
       "      <td>404</td>\n",
 | 
						||
       "      <td>2021-03-27 23:00:00+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>5175</td>\n",
 | 
						||
       "      <td>103972</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>2021-03-28 16:01:06+00:00</td>\n",
 | 
						||
       "      <td>2021-03-28 16:05:03+00:00</td>\n",
 | 
						||
       "      <td>Le Mucem chez vous, gardons le lien #22</td>\n",
 | 
						||
       "      <td>404</td>\n",
 | 
						||
       "      <td>2021-03-27 23:00:00+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>6214803</th>\n",
 | 
						||
       "      <td>8302994</td>\n",
 | 
						||
       "      <td>266155</td>\n",
 | 
						||
       "      <td>2023-10-23 09:43:25+00:00</td>\n",
 | 
						||
       "      <td>2023-10-23 09:32:33+00:00</td>\n",
 | 
						||
       "      <td>2023-10-23 09:32:34+00:00</td>\n",
 | 
						||
       "      <td>dre_nov_2023</td>\n",
 | 
						||
       "      <td>1318</td>\n",
 | 
						||
       "      <td>2023-10-23 09:31:17+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>6214804</th>\n",
 | 
						||
       "      <td>8303307</td>\n",
 | 
						||
       "      <td>21355</td>\n",
 | 
						||
       "      <td>2023-10-23 09:44:02+00:00</td>\n",
 | 
						||
       "      <td>2023-10-23 09:32:49+00:00</td>\n",
 | 
						||
       "      <td>2023-10-23 09:32:49+00:00</td>\n",
 | 
						||
       "      <td>dre_nov_2023</td>\n",
 | 
						||
       "      <td>1318</td>\n",
 | 
						||
       "      <td>2023-10-23 09:31:17+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>6214805</th>\n",
 | 
						||
       "      <td>8304346</td>\n",
 | 
						||
       "      <td>21849</td>\n",
 | 
						||
       "      <td>2023-10-23 09:45:52+00:00</td>\n",
 | 
						||
       "      <td>2023-10-23 09:33:28+00:00</td>\n",
 | 
						||
       "      <td>2023-10-23 09:33:29+00:00</td>\n",
 | 
						||
       "      <td>dre_nov_2023</td>\n",
 | 
						||
       "      <td>1318</td>\n",
 | 
						||
       "      <td>2023-10-23 09:31:17+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>6214806</th>\n",
 | 
						||
       "      <td>8302037</td>\n",
 | 
						||
       "      <td>667789</td>\n",
 | 
						||
       "      <td>2023-10-23 09:47:32+00:00</td>\n",
 | 
						||
       "      <td>2023-10-23 09:31:53+00:00</td>\n",
 | 
						||
       "      <td>2023-10-23 09:31:54+00:00</td>\n",
 | 
						||
       "      <td>dre_nov_2023</td>\n",
 | 
						||
       "      <td>1318</td>\n",
 | 
						||
       "      <td>2023-10-23 09:31:17+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>6214807</th>\n",
 | 
						||
       "      <td>8304939</td>\n",
 | 
						||
       "      <td>294154</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "      <td>2023-10-23 09:33:54+00:00</td>\n",
 | 
						||
       "      <td>2023-10-23 09:33:55+00:00</td>\n",
 | 
						||
       "      <td>dre_nov_2023</td>\n",
 | 
						||
       "      <td>1318</td>\n",
 | 
						||
       "      <td>2023-10-23 09:31:17+00:00</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>6214808 rows × 8 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "              id  customer_id                 opened_at  \\\n",
 | 
						||
       "0          19793       112597                       NaT   \n",
 | 
						||
       "1          14211       113666                       NaT   \n",
 | 
						||
       "2          13150       280561                       NaT   \n",
 | 
						||
       "3           7073       101007 2021-03-28 18:11:06+00:00   \n",
 | 
						||
       "4           5175       103972                       NaT   \n",
 | 
						||
       "...          ...          ...                       ...   \n",
 | 
						||
       "6214803  8302994       266155 2023-10-23 09:43:25+00:00   \n",
 | 
						||
       "6214804  8303307        21355 2023-10-23 09:44:02+00:00   \n",
 | 
						||
       "6214805  8304346        21849 2023-10-23 09:45:52+00:00   \n",
 | 
						||
       "6214806  8302037       667789 2023-10-23 09:47:32+00:00   \n",
 | 
						||
       "6214807  8304939       294154                       NaT   \n",
 | 
						||
       "\n",
 | 
						||
       "                          sent_at              delivered_at  \\\n",
 | 
						||
       "0       2021-03-28 16:01:09+00:00 2021-03-28 16:24:18+00:00   \n",
 | 
						||
       "1       2021-03-28 16:01:09+00:00 2021-03-28 16:21:02+00:00   \n",
 | 
						||
       "2       2021-03-28 16:00:59+00:00 2021-03-28 16:08:45+00:00   \n",
 | 
						||
       "3       2021-03-28 16:00:59+00:00 2021-03-28 16:09:47+00:00   \n",
 | 
						||
       "4       2021-03-28 16:01:06+00:00 2021-03-28 16:05:03+00:00   \n",
 | 
						||
       "...                           ...                       ...   \n",
 | 
						||
       "6214803 2023-10-23 09:32:33+00:00 2023-10-23 09:32:34+00:00   \n",
 | 
						||
       "6214804 2023-10-23 09:32:49+00:00 2023-10-23 09:32:49+00:00   \n",
 | 
						||
       "6214805 2023-10-23 09:33:28+00:00 2023-10-23 09:33:29+00:00   \n",
 | 
						||
       "6214806 2023-10-23 09:31:53+00:00 2023-10-23 09:31:54+00:00   \n",
 | 
						||
       "6214807 2023-10-23 09:33:54+00:00 2023-10-23 09:33:55+00:00   \n",
 | 
						||
       "\n",
 | 
						||
       "                                   campaign_name  campaign_service_id  \\\n",
 | 
						||
       "0        Le Mucem chez vous, gardons le lien #22                  404   \n",
 | 
						||
       "1        Le Mucem chez vous, gardons le lien #22                  404   \n",
 | 
						||
       "2        Le Mucem chez vous, gardons le lien #22                  404   \n",
 | 
						||
       "3        Le Mucem chez vous, gardons le lien #22                  404   \n",
 | 
						||
       "4        Le Mucem chez vous, gardons le lien #22                  404   \n",
 | 
						||
       "...                                          ...                  ...   \n",
 | 
						||
       "6214803                             dre_nov_2023                 1318   \n",
 | 
						||
       "6214804                             dre_nov_2023                 1318   \n",
 | 
						||
       "6214805                             dre_nov_2023                 1318   \n",
 | 
						||
       "6214806                             dre_nov_2023                 1318   \n",
 | 
						||
       "6214807                             dre_nov_2023                 1318   \n",
 | 
						||
       "\n",
 | 
						||
       "                 campaign_sent_at  \n",
 | 
						||
       "0       2021-03-27 23:00:00+00:00  \n",
 | 
						||
       "1       2021-03-27 23:00:00+00:00  \n",
 | 
						||
       "2       2021-03-27 23:00:00+00:00  \n",
 | 
						||
       "3       2021-03-27 23:00:00+00:00  \n",
 | 
						||
       "4       2021-03-27 23:00:00+00:00  \n",
 | 
						||
       "...                           ...  \n",
 | 
						||
       "6214803 2023-10-23 09:31:17+00:00  \n",
 | 
						||
       "6214804 2023-10-23 09:31:17+00:00  \n",
 | 
						||
       "6214805 2023-10-23 09:31:17+00:00  \n",
 | 
						||
       "6214806 2023-10-23 09:31:17+00:00  \n",
 | 
						||
       "6214807 2023-10-23 09:31:17+00:00  \n",
 | 
						||
       "\n",
 | 
						||
       "[6214808 rows x 8 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 27,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_campaigns_information"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 67,
 | 
						||
   "id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": [
 | 
						||
    "def campaigns_kpi(campaigns_information = None):\n",
 | 
						||
    "    # Nombre de campagnes de mails\n",
 | 
						||
    "    nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
 | 
						||
    "    nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n",
 | 
						||
    "    # Temps d'ouverture en min moyen    \n",
 | 
						||
    "    campaigns_information['time_to_open'] = campaigns_information['opened_at'] - campaigns_information['delivered_at']\n",
 | 
						||
    "    time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()\n",
 | 
						||
    "\n",
 | 
						||
    "    # Nombre de mail ouvert    \n",
 | 
						||
    "    opened_campaign = campaigns_information[['customer_id', 'campaign_name', 'opened_at']]\n",
 | 
						||
    "    opened_campaign.dropna(subset=['opened_at'], inplace=True)\n",
 | 
						||
    "    opened_campaign = opened_campaign[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
 | 
						||
    "    opened_campaign.rename(columns = {'campaign_name' : 'nb_campaigns_opened' }, inplace = True)\n",
 | 
						||
    "\n",
 | 
						||
    "    # Fusion des indicateurs\n",
 | 
						||
    "    campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')\n",
 | 
						||
    "    campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n",
 | 
						||
    "\n",
 | 
						||
    "    # Remplir les NaN : nb_campaigns_opened\n",
 | 
						||
    "    campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n",
 | 
						||
    "\n",
 | 
						||
    "    # Remplir les NaT : time_to_open (??)\n",
 | 
						||
    "\n",
 | 
						||
    "    return campaigns_reduced\n",
 | 
						||
    "    "
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 69,
 | 
						||
   "id": "24537647-bc29-4777-9848-ac4120a4aa60",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "/tmp/ipykernel_50143/2679359833.py:11: SettingWithCopyWarning: \n",
 | 
						||
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
 | 
						||
      "\n",
 | 
						||
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 | 
						||
      "  opened_campaign.dropna(subset=['opened_at'], inplace=True)\n",
 | 
						||
      "/tmp/ipykernel_50143/2679359833.py:20: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
 | 
						||
      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
 | 
						||
      "\n",
 | 
						||
      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
 | 
						||
      "\n",
 | 
						||
      "\n",
 | 
						||
      "  campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_campaigns_kpi = campaigns_kpi(campaigns_information = df1_campaigns_information) "
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 66,
 | 
						||
   "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>customer_id</th>\n",
 | 
						||
       "      <th>nb_campaigns</th>\n",
 | 
						||
       "      <th>nb_campaigns_opened</th>\n",
 | 
						||
       "      <th>time_to_open</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>222</td>\n",
 | 
						||
       "      <td>124.0</td>\n",
 | 
						||
       "      <td>1 days 00:28:30.169354838</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>7</td>\n",
 | 
						||
       "      <td>7.0</td>\n",
 | 
						||
       "      <td>1 days 04:31:01.428571428</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>6</td>\n",
 | 
						||
       "      <td>20</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>130467</th>\n",
 | 
						||
       "      <td>1256097</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1.0</td>\n",
 | 
						||
       "      <td>0 days 02:11:15</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>130468</th>\n",
 | 
						||
       "      <td>1256098</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>130469</th>\n",
 | 
						||
       "      <td>1256099</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>130470</th>\n",
 | 
						||
       "      <td>1256100</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>130471</th>\n",
 | 
						||
       "      <td>1256101</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0.0</td>\n",
 | 
						||
       "      <td>NaT</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>130472 rows × 4 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "        customer_id  nb_campaigns  nb_campaigns_opened  \\\n",
 | 
						||
       "0                 2             4                  0.0   \n",
 | 
						||
       "1                 3           222                124.0   \n",
 | 
						||
       "2                 4             7                  7.0   \n",
 | 
						||
       "3                 5             4                  0.0   \n",
 | 
						||
       "4                 6            20                  0.0   \n",
 | 
						||
       "...             ...           ...                  ...   \n",
 | 
						||
       "130467      1256097             1                  1.0   \n",
 | 
						||
       "130468      1256098             1                  0.0   \n",
 | 
						||
       "130469      1256099             1                  0.0   \n",
 | 
						||
       "130470      1256100             1                  0.0   \n",
 | 
						||
       "130471      1256101             1                  0.0   \n",
 | 
						||
       "\n",
 | 
						||
       "                    time_to_open  \n",
 | 
						||
       "0                            NaT  \n",
 | 
						||
       "1      1 days 00:28:30.169354838  \n",
 | 
						||
       "2      1 days 04:31:01.428571428  \n",
 | 
						||
       "3                            NaT  \n",
 | 
						||
       "4                            NaT  \n",
 | 
						||
       "...                          ...  \n",
 | 
						||
       "130467           0 days 02:11:15  \n",
 | 
						||
       "130468                       NaT  \n",
 | 
						||
       "130469                       NaT  \n",
 | 
						||
       "130470                       NaT  \n",
 | 
						||
       "130471                       NaT  \n",
 | 
						||
       "\n",
 | 
						||
       "[130472 rows x 4 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 66,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df1_campaigns_kpi"
 | 
						||
   ]
 | 
						||
  }
 | 
						||
 ],
 | 
						||
 "metadata": {
 | 
						||
  "kernelspec": {
 | 
						||
   "display_name": "Python 3 (ipykernel)",
 | 
						||
   "language": "python",
 | 
						||
   "name": "python3"
 | 
						||
  },
 | 
						||
  "language_info": {
 | 
						||
   "codemirror_mode": {
 | 
						||
    "name": "ipython",
 | 
						||
    "version": 3
 | 
						||
   },
 | 
						||
   "file_extension": ".py",
 | 
						||
   "mimetype": "text/x-python",
 | 
						||
   "name": "python",
 | 
						||
   "nbconvert_exporter": "python",
 | 
						||
   "pygments_lexer": "ipython3",
 | 
						||
   "version": "3.10.13"
 | 
						||
  }
 | 
						||
 },
 | 
						||
 "nbformat": 4,
 | 
						||
 "nbformat_minor": 5
 | 
						||
}
 |