diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb
index 9f3f20b..ced5bdf 100644
--- a/0_Cleaning_and_merge.ipynb
+++ b/0_Cleaning_and_merge.ipynb
@@ -38,8 +38,7 @@
"outputs": [],
"source": [
"# Create filesystem object\n",
- "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
- "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
+ "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},key ='WKTGH4YGUBAT3TR0OSUR', secret = 'g8ozi6ZUrBy8DzaAip4F7zOizbr4DKf4RgYNseqU', token = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJXS1RHSDRZR1VCQVQzVFIwT1NVUiIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiaHR0cHM6Ly9vbnl4aWEubGFiLmdyb3VwZS1nZW5lcy5mciJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTcwNzU4NjUwMCwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJhbnRvaW5lLmpvdWJyZWxAZW5zYWUuZnIiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZXhwIjoxNzA3NjczMDQ3LCJmYW1pbHlfbmFtZSI6IkpPVUJSRUwiLCJnaXZlbl9uYW1lIjoiQW50b2luZSIsImdyb3VwcyI6WyJiZGMyMzI0LXRlYW0xIl0sImlhdCI6MTcwNzU4NjY0NywiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiI1MjQ2MDZmMS1lYWM3LTQxZDgtYTEzMy04MGZjMDk0MGVlNzEiLCJuYW1lIjoiQW50b2luZSBKT1VCUkVMIiwicG9saWN5Ijoic3Rzb25seSIsInByZWZlcnJlZF91c2VybmFtZSI6ImFqb3VicmVsLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNlc3Npb25fc3RhdGUiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzaWQiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzdWIiOiIwNWYwZDk3Mi1jNWM4LTQyNmYtODAwZC00NmQ0OGU4NjkwMzUiLCJ0eXAiOiJCZWFyZXIifQ.-imw-N4bk1uCcQGobkxhsRoeBAqxC9rT7PifElbC7ODOStnwIulc7HRR2fmtiqI2PdyrfnVvzfmIPK1g056HbA')"
]
},
{
@@ -79,7 +78,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_492/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ "/tmp/ipykernel_42764/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in)\n"
]
}
@@ -205,6 +204,7 @@
" # Base des fournisseurs\n",
" suppliers = suppliers[['id', 'name']]\n",
" suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
+ " suppliers['supplier_name'] = suppliers['supplier_name'].fillna('')\n",
"\n",
" # Base des types de billets\n",
" type_ofs = type_ofs[['id', 'name', 'children']]\n",
@@ -242,17 +242,23 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_492/1591303091.py:5: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/3092893564.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
- "/tmp/ipykernel_492/1591303091.py:9: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/3092893564.py:9: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
- "/tmp/ipykernel_492/1591303091.py:13: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/3092893564.py:10: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " suppliers['supplier_name'] = suppliers['supplier_name'].fillna('')\n",
+ "/tmp/ipykernel_42764/3092893564.py:14: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -386,169 +392,6 @@
"df1_ticket_information.head()"
]
},
- {
- "cell_type": "markdown",
- "id": "37499eae-1a7f-4dce-83b0-ff942ccf7a9d",
- "metadata": {},
- "source": [
- "### KPI tickets"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "043303fe-e90f-4689-a2a9-5d690555a045",
- "metadata": {},
- "outputs": [],
- "source": [
- "def tickets_kpi_function(tickets_information = None):\n",
- " tickets_information_copy = tickets_information.copy()\n",
- " tickets_information_copy['purchase_date_max'] = tickets_information_copy['purchase_date']\n",
- " tickets_kpi = (tickets_information_copy[['product_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'purchase_date_max']]\n",
- " .groupby(['product_id', 'customer_id'])\n",
- " .agg({'ticket_id': 'count', \n",
- " 'supplier_name': 'nunique',\n",
- " 'purchase_date_max' : 'max',\n",
- " 'purchase_date' : 'min'})\n",
- " .reset_index()\n",
- " )\n",
- " \n",
- " tickets_kpi.rename(columns = {'ticket_id' : 'nb_tickets', \n",
- " 'supplier_name' : 'nb_suppliers', \n",
- " 'purchase_date' : 'purchase_date_min'}, inplace = True)\n",
- " \n",
- " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n",
- " \n",
- " return tickets_kpi\n",
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "id": "5882234a-1ed5-4269-87a6-0d75613476e3",
- "metadata": {},
- "outputs": [],
- "source": [
- "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_ticket_information)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " product_id | \n",
- " customer_id | \n",
- " nb_tickets | \n",
- " nb_suppliers | \n",
- " purchase_date_max | \n",
- " purchase_date_min | \n",
- " time_between_purchase | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 107310 | \n",
- " 2805 | \n",
- " 4 | \n",
- " 2 | \n",
- " 2019-06-05 14:37:13+00:00 | \n",
- " 2019-06-05 14:18:38+00:00 | \n",
- " 0 days 00:18:35 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 110089 | \n",
- " 54355 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2017-02-17 13:32:51+00:00 | \n",
- " 2017-02-17 13:32:51+00:00 | \n",
- " 0 days 00:00:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 110089 | \n",
- " 54356 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2017-03-02 14:36:16+00:00 | \n",
- " 2017-03-02 14:36:16+00:00 | \n",
- " 0 days 00:00:00 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 110089 | \n",
- " 54357 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2017-03-06 15:16:41+00:00 | \n",
- " 2017-03-06 15:16:41+00:00 | \n",
- " 0 days 00:00:00 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 110089 | \n",
- " 54358 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2017-03-13 16:07:27+00:00 | \n",
- " 2017-03-13 16:07:27+00:00 | \n",
- " 0 days 00:00:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " product_id customer_id nb_tickets nb_suppliers \\\n",
- "0 107310 2805 4 2 \n",
- "1 110089 54355 1 1 \n",
- "2 110089 54356 1 1 \n",
- "3 110089 54357 1 1 \n",
- "4 110089 54358 1 1 \n",
- "\n",
- " purchase_date_max purchase_date_min time_between_purchase \n",
- "0 2019-06-05 14:37:13+00:00 2019-06-05 14:18:38+00:00 0 days 00:18:35 \n",
- "1 2017-02-17 13:32:51+00:00 2017-02-17 13:32:51+00:00 0 days 00:00:00 \n",
- "2 2017-03-02 14:36:16+00:00 2017-03-02 14:36:16+00:00 0 days 00:00:00 \n",
- "3 2017-03-06 15:16:41+00:00 2017-03-06 15:16:41+00:00 0 days 00:00:00 \n",
- "4 2017-03-13 16:07:27+00:00 2017-03-13 16:07:27+00:00 0 days 00:00:00 "
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df1_tickets_kpi.head()"
- ]
- },
{
"cell_type": "markdown",
"id": "096e47f4-1d65-4575-989d-83227eedad2b",
@@ -559,7 +402,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 11,
"id": "baed146a-9d3a-4397-a812-3d50c9a2f038",
"metadata": {},
"outputs": [],
@@ -588,7 +431,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 12,
"id": "5fbfd88b-b94c-489c-9201-670e96e453e7",
"metadata": {},
"outputs": [
@@ -596,7 +439,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_492/3848597476.py:4: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/3848597476.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -608,165 +451,6 @@
"df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)"
]
},
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- "
\n",
- " \n",
- " target_name | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " consentement optin mediation specialisee | \n",
- " 150000 | \n",
- "
\n",
- " \n",
- " consentement optin jeune public | \n",
- " 149979 | \n",
- "
\n",
- " \n",
- " consentement optin b2c | \n",
- " 108909 | \n",
- "
\n",
- " \n",
- " Arenametrix_bascule tel vers sib | \n",
- " 35216 | \n",
- "
\n",
- " \n",
- " consentement optout b2c | \n",
- " 34523 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_id\n",
- "target_name \n",
- "consentement optin mediation specialisee 150000\n",
- "consentement optin jeune public 149979\n",
- "consentement optin b2c 108909\n",
- "Arenametrix_bascule tel vers sib 35216\n",
- "consentement optout b2c 34523"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False).head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "id": "4417ff51-f501-4ab9-a192-4ab75764a8ed",
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- "
\n",
- " \n",
- " target_name | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Arenametrix_bascule tel vers sib | \n",
- " 35216 | \n",
- "
\n",
- " \n",
- " Autres_interet_exposition | \n",
- " 1021 | \n",
- "
\n",
- " \n",
- " COM Inscrits NL générale (historique) | \n",
- " 23005 | \n",
- "
\n",
- " \n",
- " Contacts_prenomsdoubles | \n",
- " 11643 | \n",
- "
\n",
- " \n",
- " DDCP MD Procès du Siècle | \n",
- " 1684 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_id\n",
- "target_name \n",
- "Arenametrix_bascule tel vers sib 35216\n",
- "Autres_interet_exposition 1021\n",
- "COM Inscrits NL générale (historique) 23005\n",
- "Contacts_prenomsdoubles 11643\n",
- "DDCP MD Procès du Siècle 1684"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n",
- "df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000].head()"
- ]
- },
{
"cell_type": "markdown",
"id": "cdbb48b4-5e16-4ef4-8791-ed213d68d52f",
@@ -777,7 +461,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 13,
"id": "d883cc7b-ac43-4485-b86f-eaf595fbad85",
"metadata": {},
"outputs": [],
@@ -802,7 +486,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 14,
"id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f",
"metadata": {},
"outputs": [
@@ -810,19 +494,19 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_492/1967867975.py:15: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
- "/tmp/ipykernel_492/1967867975.py:15: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
- "/tmp/ipykernel_492/1967867975.py:15: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@@ -837,7 +521,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 15,
"id": "c24457e7-3cad-451a-a65b-7373b656bd6e",
"metadata": {
"scrolled": true
@@ -957,7 +641,7 @@
"4 404 2021-03-27 23:00:00+00:00 "
]
},
- "execution_count": 20,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -966,159 +650,12 @@
"df1_campaigns_information.head()"
]
},
- {
- "cell_type": "code",
- "execution_count": 21,
- "id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
- "metadata": {},
- "outputs": [],
- "source": [
- "def campaigns_kpi_function(campaigns_information = None):\n",
- " # Nombre de campagnes de mails\n",
- " nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
- " nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n",
- " # Temps d'ouverture en min moyen \n",
- " campaigns_information['time_to_open'] = campaigns_information['opened_at'] - campaigns_information['delivered_at']\n",
- " time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()\n",
- "\n",
- " # Nombre de mail ouvert \n",
- " opened_campaign = campaigns_information[['customer_id', 'campaign_name', 'opened_at']]\n",
- " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n",
- " opened_campaign = opened_campaign[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
- " opened_campaign.rename(columns = {'campaign_name' : 'nb_campaigns_opened' }, inplace = True)\n",
- "\n",
- " # Fusion des indicateurs\n",
- " campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')\n",
- " campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n",
- "\n",
- " # Remplir les NaN : nb_campaigns_opened\n",
- " campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n",
- "\n",
- " # Remplir les NaT : time_to_open (??)\n",
- "\n",
- " return campaigns_reduced\n",
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "id": "24537647-bc29-4777-9848-ac4120a4aa60",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_492/3700263836.py:11: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n"
- ]
- }
- ],
- "source": [
- "df1_campaigns_kpi = campaigns_kpi_function(campaigns_information = df1_campaigns_information) "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- " nb_campaigns | \n",
- " nb_campaigns_opened | \n",
- " time_to_open | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2 | \n",
- " 4 | \n",
- " 0.0 | \n",
- " NaT | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 3 | \n",
- " 222 | \n",
- " 124.0 | \n",
- " 1 days 00:28:30.169354838 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 4 | \n",
- " 7 | \n",
- " 7.0 | \n",
- " 1 days 04:31:01.428571428 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 5 | \n",
- " 4 | \n",
- " 0.0 | \n",
- " NaT | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 6 | \n",
- " 20 | \n",
- " 0.0 | \n",
- " NaT | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_id nb_campaigns nb_campaigns_opened time_to_open\n",
- "0 2 4 0.0 NaT\n",
- "1 3 222 124.0 1 days 00:28:30.169354838\n",
- "2 4 7 7.0 1 days 04:31:01.428571428\n",
- "3 5 4 0.0 NaT\n",
- "4 6 20 0.0 NaT"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df1_campaigns_kpi.head()"
- ]
- },
{
"cell_type": "markdown",
"id": "56520a97-ede8-4920-a211-3b5b136af33d",
"metadata": {},
"source": [
- "## Create Products Table"
+ "## Product area"
]
},
{
@@ -1131,7 +668,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 16,
"id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d",
"metadata": {},
"outputs": [],
@@ -1142,7 +679,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 17,
"id": "607eb4b4-eed9-4b50-b823-f75c116dd37c",
"metadata": {},
"outputs": [],
@@ -1213,7 +750,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 18,
"id": "350b09b9-451f-4d47-81fe-f34b892db027",
"metadata": {},
"outputs": [],
@@ -1301,7 +838,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 19,
"id": "0fccc8ef-e575-4857-a401-94a7274394df",
"metadata": {},
"outputs": [
@@ -1454,7 +991,7 @@
"4 indiv entrées tp "
]
},
- "execution_count": 27,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -1466,7 +1003,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 20,
"id": "779d8aaf-6668-4f66-8852-847304407ea3",
"metadata": {},
"outputs": [
@@ -1636,7 +1173,7 @@
"4 spectacle vivant mucem "
]
},
- "execution_count": 28,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -1648,7 +1185,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 21,
"id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0",
"metadata": {},
"outputs": [
@@ -1747,7 +1284,7 @@
"4 37 383 269 1"
]
},
- "execution_count": 29,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -1767,7 +1304,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 22,
"id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba",
"metadata": {},
"outputs": [],
@@ -1789,13 +1326,13 @@
" products_global = order_columns_id(products_global)\n",
"\n",
" # remove useless columns \n",
- " products_global = products_global.drop(columns = ['type_of_id', 'name_events', 'name_seasons', 'name_categories'])\n",
+ " products_global = products_global.drop(columns = ['type_of_id']) # 'name_events', 'name_seasons', 'name_categories'\n",
" return products_global"
]
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 23,
"id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951",
"metadata": {},
"outputs": [
@@ -1849,12 +1386,15 @@
" id_representation_cap | \n",
" season_id | \n",
" facility_id | \n",
- " event_type_id | \n",
+ " ... | \n",
" event_type_key_id | \n",
" facility_key_id | \n",
" street_id | \n",
" amount | \n",
" is_full_price | \n",
+ " name_categories | \n",
+ " name_events | \n",
+ " name_seasons | \n",
" name_event_types | \n",
" name_facilities | \n",
" \n",
@@ -1872,12 +1412,15 @@
" 8789 | \n",
" 4 | \n",
" 1 | \n",
- " 2 | \n",
+ " ... | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 9.0 | \n",
" False | \n",
+ " indiv activité tr | \n",
+ " visite-jeu \"le classico des minots\" (1h30) | \n",
+ " 2017 | \n",
" offre muséale individuel | \n",
" mucem | \n",
" \n",
@@ -1893,12 +1436,15 @@
" 390 | \n",
" 2 | \n",
" 1 | \n",
- " 2 | \n",
+ " ... | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" 9.5 | \n",
" False | \n",
+ " indiv entrées tp | \n",
+ " billet mucem picasso | \n",
+ " 2016 | \n",
" offre muséale individuel | \n",
" mucem | \n",
" \n",
@@ -1914,12 +1460,15 @@
" 395 | \n",
" 2 | \n",
" 1 | \n",
- " 2 | \n",
+ " ... | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" 11.5 | \n",
" False | \n",
+ " indiv entrées tp | \n",
+ " billet mucem picasso | \n",
+ " 2016 | \n",
" offre muséale individuel | \n",
" mucem | \n",
" \n",
@@ -1935,12 +1484,15 @@
" 120199 | \n",
" 1754 | \n",
" 1 | \n",
- " 2 | \n",
+ " ... | \n",
" 4 | \n",
" 1 | \n",
" 1 | \n",
" 8.0 | \n",
" False | \n",
+ " indiv entrées tr | \n",
+ " NaN | \n",
+ " NaN | \n",
" offre muséale individuel | \n",
" mucem | \n",
" \n",
@@ -1956,17 +1508,21 @@
" 21 | \n",
" 4 | \n",
" 1 | \n",
- " 3 | \n",
+ " ... | \n",
" 6 | \n",
" 1 | \n",
" 1 | \n",
" 8.5 | \n",
" False | \n",
+ " indiv entrées tp | \n",
+ " non défini | \n",
+ " 2017 | \n",
" non défini | \n",
" mucem | \n",
" \n",
" \n",
"\n",
+ "5 rows × 21 columns
\n",
""
],
"text/plain": [
@@ -1984,22 +1540,38 @@
"3 156773 1 12365 120199 \n",
"4 1175 1 8 21 \n",
"\n",
- " season_id facility_id event_type_id event_type_key_id facility_key_id \\\n",
- "0 4 1 2 5 1 \n",
- "1 2 1 2 2 1 \n",
- "2 2 1 2 2 1 \n",
- "3 1754 1 2 4 1 \n",
- "4 4 1 3 6 1 \n",
+ " season_id facility_id ... event_type_key_id facility_key_id street_id \\\n",
+ "0 4 1 ... 5 1 1 \n",
+ "1 2 1 ... 2 1 1 \n",
+ "2 2 1 ... 2 1 1 \n",
+ "3 1754 1 ... 4 1 1 \n",
+ "4 4 1 ... 6 1 1 \n",
"\n",
- " street_id amount is_full_price name_event_types name_facilities \n",
- "0 1 9.0 False offre muséale individuel mucem \n",
- "1 1 9.5 False offre muséale individuel mucem \n",
- "2 1 11.5 False offre muséale individuel mucem \n",
- "3 1 8.0 False offre muséale individuel mucem \n",
- "4 1 8.5 False non défini mucem "
+ " amount is_full_price name_categories \\\n",
+ "0 9.0 False indiv activité tr \n",
+ "1 9.5 False indiv entrées tp \n",
+ "2 11.5 False indiv entrées tp \n",
+ "3 8.0 False indiv entrées tr \n",
+ "4 8.5 False indiv entrées tp \n",
+ "\n",
+ " name_events name_seasons \\\n",
+ "0 visite-jeu \"le classico des minots\" (1h30) 2017 \n",
+ "1 billet mucem picasso 2016 \n",
+ "2 billet mucem picasso 2016 \n",
+ "3 NaN NaN \n",
+ "4 non défini 2017 \n",
+ "\n",
+ " name_event_types name_facilities \n",
+ "0 offre muséale individuel mucem \n",
+ "1 offre muséale individuel mucem \n",
+ "2 offre muséale individuel mucem \n",
+ "3 offre muséale individuel mucem \n",
+ "4 non défini mucem \n",
+ "\n",
+ "[5 rows x 21 columns]"
]
},
- "execution_count": 31,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -2011,13 +1583,1076 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 24,
"id": "98f78cd5-b694-4cc6-b033-20170aa13e8d",
"metadata": {},
"outputs": [],
"source": [
"# Fusion liée au product\n",
- "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')"
+ "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n",
+ "\n",
+ "# Selection des variables d'intérêts\n",
+ "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d7c3668a-c016-4bd0-837e-04af328ff14f",
+ "metadata": {},
+ "source": [
+ "# Construction des variables explicatives"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "314f1b7f-ae48-4c6f-8469-9ce879043243",
+ "metadata": {},
+ "source": [
+ "## KPI campaigns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def campaigns_kpi_function(campaigns_information = None):\n",
+ " # Nombre de campagnes de mails\n",
+ " nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
+ " nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n",
+ " # Temps d'ouverture en min moyen \n",
+ " campaigns_information['time_to_open'] = campaigns_information['opened_at'] - campaigns_information['delivered_at']\n",
+ " time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()\n",
+ "\n",
+ " # Nombre de mail ouvert \n",
+ " opened_campaign = campaigns_information[['customer_id', 'campaign_name', 'opened_at']]\n",
+ " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n",
+ " opened_campaign = opened_campaign[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
+ " opened_campaign.rename(columns = {'campaign_name' : 'nb_campaigns_opened' }, inplace = True)\n",
+ "\n",
+ " # Fusion des indicateurs\n",
+ " campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')\n",
+ " campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n",
+ "\n",
+ " # Remplir les NaN : nb_campaigns_opened\n",
+ " campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n",
+ "\n",
+ " # Remplir les NaT : time_to_open (??)\n",
+ "\n",
+ " return campaigns_reduced\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "24537647-bc29-4777-9848-ac4120a4aa60",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_42764/3700263836.py:11: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n"
+ ]
+ }
+ ],
+ "source": [
+ "df1_campaigns_kpi = campaigns_kpi_function(campaigns_information = df1_campaigns_information) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 222 | \n",
+ " 124.0 | \n",
+ " 1 days 00:28:30.169354838 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 7 | \n",
+ " 7.0 | \n",
+ " 1 days 04:31:01.428571428 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 6 | \n",
+ " 20 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id nb_campaigns nb_campaigns_opened time_to_open\n",
+ "0 2 4 0.0 NaT\n",
+ "1 3 222 124.0 1 days 00:28:30.169354838\n",
+ "2 4 7 7.0 1 days 04:31:01.428571428\n",
+ "3 5 4 0.0 NaT\n",
+ "4 6 20 0.0 NaT"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_kpi.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d4dcfbe0-c6ce-497e-b75e-dc9e938801b2",
+ "metadata": {},
+ "source": [
+ "## KPI tickets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "b913a69e-3146-4919-b5f6-a6108532bffa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['spectacle vivant', 'offre muséale individuel', 'formule adhésion',\n",
+ " 'offre muséale groupe'], dtype=object)"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_products_purchased_reduced['name_event_types'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "2bda0b97-b28b-4070-a57d-aeab0e2f7dfe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Nombre de client assistant à plus de 2 type d'événement\n",
+ "nb_event_types = df1_products_purchased_reduced[['customer_id', 'name_event_types']].groupby('customer_id').nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "043303fe-e90f-4689-a2a9-5d690555a045",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def tickets_kpi_function(tickets_information = None):\n",
+ "\n",
+ " tickets_information_copy = tickets_information.copy()\n",
+ "\n",
+ " # Dummy : Canal de vente en ligne\n",
+ " liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] # vad = vente à distance\n",
+ " tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n",
+ "\n",
+ " # Proportion de vente en ligne\n",
+ " prop_vente_internet = tickets_information_copy[tickets_information_copy['vente_internet'] == 1].groupby(['customer_id', 'event_type_id'])['ticket_id'].count().reset_index()\n",
+ " prop_vente_internet.rename(columns = {'ticket_id' : 'nb_tickets_internet'}, inplace = True)\n",
+ " \n",
+ " tickets_kpi = (tickets_information_copy[['event_type_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'amount', 'vente_internet']]\n",
+ " .groupby(['customer_id', 'event_type_id']) \n",
+ " .agg({'ticket_id': 'count', \n",
+ " 'amount' : 'sum',\n",
+ " 'supplier_name': 'nunique',\n",
+ " 'vente_internet' : 'max',\n",
+ " 'purchase_date' : ['min', 'max']})\n",
+ " .reset_index()\n",
+ " )\n",
+ " \n",
+ " tickets_kpi.columns = tickets_kpi.columns.map('_'.join)\n",
+ " \n",
+ " tickets_kpi.rename(columns = {'ticket_id_count' : 'nb_tickets', \n",
+ " 'amount_sum' : 'total_amount',\n",
+ " 'supplier_name_nunique' : 'nb_suppliers', \n",
+ " 'customer_id_' : 'customer_id',\n",
+ " 'event_type_id_' : 'event_type_id'}, inplace = True)\n",
+ " \n",
+ " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n",
+ "\n",
+ " tickets_kpi = tickets_kpi.merge(prop_vente_internet, on = ['customer_id', 'event_type_id'], how = 'left')\n",
+ " tickets_kpi['nb_tickets_internet'] = tickets_kpi['nb_tickets_internet'].fillna(0)\n",
+ " \n",
+ " return tickets_kpi\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "5882234a-1ed5-4269-87a6-0d75613476e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_products_purchased_reduced)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "597b241e-a83d-4b7c-8ad7-eec50295dff2",
+ "metadata": {},
+ "source": [
+ "#### Exportation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "a4a2311d-8a72-4030-afd5-218004d5d2a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Exportation vers 'projet-bdc2324-team1'\n",
+ "BUCKET_OUT = \"projet-bdc2324-team1\"\n",
+ "FILE_KEY_OUT_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n",
+ "FILE_PATH_OUT_S3 = BUCKET_OUT + \"/\" + FILE_KEY_OUT_S3\n",
+ "\n",
+ "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
+ " df1_tickets_kpi.to_csv(file_out, index = False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " event_type_id | \n",
+ " nb_tickets | \n",
+ " total_amount | \n",
+ " nb_suppliers | \n",
+ " vente_internet_max | \n",
+ " purchase_date_min | \n",
+ " purchase_date_max | \n",
+ " time_between_purchase | \n",
+ " nb_tickets_internet | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 453242 | \n",
+ " 3248965.5 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 2013-09-23 14:45:01+00:00 | \n",
+ " 2023-11-03 14:11:01+00:00 | \n",
+ " 3692 days 23:26:00 | \n",
+ " 2988.0 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 384226 | \n",
+ " 2686540.5 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 2014-12-03 14:55:37+00:00 | \n",
+ " 2023-11-04 15:12:16+00:00 | \n",
+ " 3258 days 00:16:39 | \n",
+ " 51.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 217356 | \n",
+ " 1435871.5 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 2017-01-01 02:20:08+00:00 | \n",
+ " 2019-12-31 02:20:06+00:00 | \n",
+ " 1093 days 23:59:58 | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 201750 | \n",
+ " 1459190.0 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 2013-06-10 10:37:58+00:00 | \n",
+ " 2023-11-08 15:59:45+00:00 | \n",
+ " 3803 days 05:21:47 | \n",
+ " 9.0 | \n",
+ "
\n",
+ " \n",
+ " 5032 | \n",
+ " 6733 | \n",
+ " 6 | \n",
+ " 14208 | \n",
+ " 0.0 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2017-01-11 15:00:54+00:00 | \n",
+ " 2019-11-27 09:47:06+00:00 | \n",
+ " 1049 days 18:46:12 | \n",
+ " 13497.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n",
+ "1 1 4 453242 3248965.5 6 \n",
+ "0 1 2 384226 2686540.5 7 \n",
+ "3 1 6 217356 1435871.5 5 \n",
+ "2 1 5 201750 1459190.0 6 \n",
+ "5032 6733 6 14208 0.0 3 \n",
+ "\n",
+ " vente_internet_max purchase_date_min purchase_date_max \\\n",
+ "1 1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n",
+ "0 1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n",
+ "3 1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n",
+ "2 1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n",
+ "5032 1 2017-01-11 15:00:54+00:00 2019-11-27 09:47:06+00:00 \n",
+ "\n",
+ " time_between_purchase nb_tickets_internet \n",
+ "1 3692 days 23:26:00 2988.0 \n",
+ "0 3258 days 00:16:39 51.0 \n",
+ "3 1093 days 23:59:58 5.0 \n",
+ "2 3803 days 05:21:47 9.0 \n",
+ "5032 1049 days 18:46:12 13497.0 "
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_tickets_kpi.sort_values(by='nb_tickets', ascending=False).head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f1d7f7ba-361b-467d-b375-b09c149185f7",
+ "metadata": {},
+ "source": [
+ "## Alexis' work"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "4ab1c0d2-0097-4669-b984-b6822c976740",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " event_type_id | \n",
+ " avg_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 6.150659 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 7.762474 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 4.452618 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 6 | \n",
+ " 6.439463 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " event_type_id avg_amount\n",
+ "0 2 6.150659\n",
+ "1 4 7.762474\n",
+ "2 5 4.452618\n",
+ "3 6 6.439463"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "avg_amount = (df1_products_purchased_reduced.groupby([\"event_type_id\"])\n",
+ " .agg({\"amount\" : \"mean\"}).reset_index()\n",
+ " .rename(columns = {'amount' : 'avg_amount'}))\n",
+ "\n",
+ "avg_amount"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "a9c62b39-389e-4dac-89a6-ac8a59fea58a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " event_type_id | \n",
+ " nb_tickets | \n",
+ " avg_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 384226 | \n",
+ " 6.150659 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 453242 | \n",
+ " 7.762474 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 201750 | \n",
+ " 4.452618 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 217356 | \n",
+ " 6.439463 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 143 | \n",
+ " 6.150659 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id event_type_id nb_tickets avg_amount\n",
+ "0 1 2 384226 6.150659\n",
+ "1 1 4 453242 7.762474\n",
+ "2 1 5 201750 4.452618\n",
+ "3 1 6 217356 6.439463\n",
+ "4 2 2 143 6.150659"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nb_tickets = (df1_products_purchased_reduced.groupby([\"customer_id\", \"event_type_id\"])\n",
+ " .agg({\"ticket_id\" : \"count\"}).reset_index()\n",
+ " .rename(columns = {'ticket_id' : 'nb_tickets'})\n",
+ " .merge(avg_amount, how='left', on='event_type_id'))\n",
+ "nb_tickets.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "8710611c-7eb8-45ca-bdcc-009f4081f9e2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " birthdate | \n",
+ " street_id | \n",
+ " is_partner | \n",
+ " gender | \n",
+ " is_email_true | \n",
+ " opt_in | \n",
+ " structure_id | \n",
+ " profession | \n",
+ " language | \n",
+ " ... | \n",
+ " average_ticket_basket | \n",
+ " total_price | \n",
+ " purchase_count | \n",
+ " first_buying_date | \n",
+ " country | \n",
+ " age | \n",
+ " tenant_id | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 12751 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 12825 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11261 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 13071 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 653061 | \n",
+ " NaN | \n",
+ " 10 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " 80.0 | \n",
+ " 2.0 | \n",
+ " 0 days 19:53:02.500000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 28 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id birthdate street_id is_partner gender is_email_true \\\n",
+ "0 12751 NaN 2 False 1 True \n",
+ "1 12825 NaN 2 False 2 True \n",
+ "2 11261 NaN 2 False 1 True \n",
+ "3 13071 NaN 2 False 2 True \n",
+ "4 653061 NaN 10 False 2 True \n",
+ "\n",
+ " opt_in structure_id profession language ... average_ticket_basket \\\n",
+ "0 True NaN NaN NaN ... NaN \n",
+ "1 True NaN NaN NaN ... NaN \n",
+ "2 True NaN NaN NaN ... NaN \n",
+ "3 True NaN NaN NaN ... NaN \n",
+ "4 False NaN NaN NaN ... NaN \n",
+ "\n",
+ " total_price purchase_count first_buying_date country age tenant_id \\\n",
+ "0 NaN 0 NaT fr NaN 1311 \n",
+ "1 NaN 0 NaT fr NaN 1311 \n",
+ "2 NaN 0 NaT fr NaN 1311 \n",
+ "3 NaN 0 NaT fr NaN 1311 \n",
+ "4 NaN 0 NaT NaN NaN 1311 \n",
+ "\n",
+ " nb_campaigns nb_campaigns_opened time_to_open \n",
+ "0 NaN NaN NaT \n",
+ "1 NaN NaN NaT \n",
+ "2 NaN NaN NaT \n",
+ "3 NaN NaN NaT \n",
+ "4 80.0 2.0 0 days 19:53:02.500000 \n",
+ "\n",
+ "[5 rows x 28 columns]"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Fusion avec KPI campaigns liés au customer\n",
+ "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n",
+ "df1_customer.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "shape : (156289, 31)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " birthdate | \n",
+ " street_id | \n",
+ " is_partner | \n",
+ " gender | \n",
+ " is_email_true | \n",
+ " opt_in | \n",
+ " structure_id | \n",
+ " profession | \n",
+ " language | \n",
+ " ... | \n",
+ " first_buying_date | \n",
+ " country | \n",
+ " age | \n",
+ " tenant_id | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ " event_type_id | \n",
+ " nb_tickets | \n",
+ " avg_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 12751 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 12825 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11261 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 13071 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 653061 | \n",
+ " NaN | \n",
+ " 10 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ " 80.0 | \n",
+ " 2.0 | \n",
+ " 0 days 19:53:02.500000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 31 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id birthdate street_id is_partner gender is_email_true \\\n",
+ "0 12751 NaN 2 False 1 True \n",
+ "1 12825 NaN 2 False 2 True \n",
+ "2 11261 NaN 2 False 1 True \n",
+ "3 13071 NaN 2 False 2 True \n",
+ "4 653061 NaN 10 False 2 True \n",
+ "\n",
+ " opt_in structure_id profession language ... first_buying_date country \\\n",
+ "0 True NaN NaN NaN ... NaT fr \n",
+ "1 True NaN NaN NaN ... NaT fr \n",
+ "2 True NaN NaN NaN ... NaT fr \n",
+ "3 True NaN NaN NaN ... NaT fr \n",
+ "4 False NaN NaN NaN ... NaT NaN \n",
+ "\n",
+ " age tenant_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
+ "0 NaN 1311 NaN NaN NaT \n",
+ "1 NaN 1311 NaN NaN NaT \n",
+ "2 NaN 1311 NaN NaN NaT \n",
+ "3 NaN 1311 NaN NaN NaT \n",
+ "4 NaN 1311 80.0 2.0 0 days 19:53:02.500000 \n",
+ "\n",
+ " event_type_id nb_tickets avg_amount \n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ "[5 rows x 31 columns]"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
+ "print(\"shape : \", df1_customer_product.shape)\n",
+ "df1_customer_product.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
]
},
{
@@ -2030,7 +2665,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 42,
"id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
"metadata": {},
"outputs": [],
@@ -2041,13 +2676,23 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 43,
"id": "1e42a790-b215-4107-a969-85005da06ebd",
"metadata": {},
"outputs": [],
"source": [
"# Fusion avec KPI liés au comportement d'achat\n",
- "# df1_customer_product = pd.merge(df1_customer, df1_products_purchased, on = 'customer_id', how = 'left')"
+ "df1_customer_product = pd.merge(df1_tickets_kpi, df1_customer, on = 'customer_id', how = 'outer')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# df1_customer_product"
]
}
],
diff --git a/Exploration_billet_AJ.ipynb b/Exploration_billet_AJ.ipynb
index 13d581c..bec456e 100644
--- a/Exploration_billet_AJ.ipynb
+++ b/Exploration_billet_AJ.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "56b3d44e-1e3f-4726-9916-0f9af107860e",
+ "id": "5bf5c226",
"metadata": {},
"source": [
"# Business Data Challenge - Team 1"
@@ -11,7 +11,7 @@
{
"cell_type": "code",
"execution_count": 1,
- "id": "15103481-8d74-404c-aa09-7601fe7730da",
+ "id": "b1a5b9d3",
"metadata": {},
"outputs": [],
"source": [
@@ -24,7 +24,7 @@
},
{
"cell_type": "markdown",
- "id": "c3bb0d13-34b2-4e1c-9985-468cd87c5a0e",
+ "id": "ecfa2219",
"metadata": {},
"source": [
"Configuration de l'accès aux données"
@@ -33,7 +33,7 @@
{
"cell_type": "code",
"execution_count": 2,
- "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
+ "id": "1a094277",
"metadata": {},
"outputs": [],
"source": [
@@ -44,7 +44,7 @@
},
{
"cell_type": "markdown",
- "id": "f99da24f-0d93-4618-92bc-3ba81dc0445c",
+ "id": "c437eaec",
"metadata": {},
"source": [
"# Exemple sur Company 1"
@@ -52,7 +52,7 @@
},
{
"cell_type": "markdown",
- "id": "9d74b68f-ba07-4a15-9a27-dae931762d70",
+ "id": "a1c1fc39",
"metadata": {},
"source": [
"## Chargement données"
@@ -61,7 +61,7 @@
{
"cell_type": "code",
"execution_count": 3,
- "id": "699664b9-eee4-4f8d-a207-e524526560c5",
+ "id": "66f8c17b",
"metadata": {},
"outputs": [],
"source": [
@@ -72,17 +72,9 @@
{
"cell_type": "code",
"execution_count": 5,
- "id": "0cb92854-903b-4efd-ac1b-197e29f044b4",
+ "id": "c08e6798",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['bdc2324-data/1/1purchases.csv', 'bdc2324-data/1/1suppliers.csv', 'bdc2324-data/1/1tickets.csv', 'bdc2324-data/1/1type_ofs.csv']\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"liste_database_select = ['suppliers', 'ticket', 'purchase', 'consumption', 'type_ofs']\n",
"\n",
@@ -96,15 +88,20 @@
{
"cell_type": "code",
"execution_count": 6,
- "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
+ "id": "675f518d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
+ "<<<<<<< local \n",
"/tmp/ipykernel_445/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
- " df = pd.read_csv(file_in)\n"
+ " df = pd.read_csv(file_in)\n",
+ "=======\n",
+ "/tmp/ipykernel_15285/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " df = pd.read_csv(file_in)\n",
+ ">>>>>>> remote \n"
]
}
],
@@ -126,7 +123,7 @@
},
{
"cell_type": "markdown",
- "id": "f01e4530-1a61-49cb-a6b0-aa188cf1c0e0",
+ "id": "e855f403",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
@@ -137,52 +134,9 @@
{
"cell_type": "code",
"execution_count": 22,
- "id": "a01f993a-0f9f-4aed-bd23-bcdec9041bb3",
+ "id": "91a8f8c4",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 151866 entries, 0 to 151865\n",
- "Data columns (total 29 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 151866 non-null int64 \n",
- " 1 birthdate 5437 non-null object \n",
- " 2 street_id 151866 non-null int64 \n",
- " 3 civility 0 non-null float64\n",
- " 4 is_partner 151866 non-null bool \n",
- " 5 deleted_at 0 non-null float64\n",
- " 6 gender 151866 non-null int64 \n",
- " 7 is_email_true 151866 non-null bool \n",
- " 8 opt_in 151866 non-null bool \n",
- " 9 structure_id 18114 non-null float64\n",
- " 10 note 906 non-null object \n",
- " 11 profession 6206 non-null object \n",
- " 12 language 1092 non-null object \n",
- " 13 mcp_contact_id 98901 non-null float64\n",
- " 14 last_buying_date 73422 non-null object \n",
- " 15 max_price 73422 non-null float64\n",
- " 16 ticket_sum 151866 non-null int64 \n",
- " 17 average_price 138746 non-null float64\n",
- " 18 fidelity 151866 non-null int64 \n",
- " 19 average_purchase_delay 73422 non-null float64\n",
- " 20 average_price_basket 73422 non-null float64\n",
- " 21 average_ticket_basket 73422 non-null float64\n",
- " 22 total_price 86542 non-null float64\n",
- " 23 purchase_count 151866 non-null int64 \n",
- " 24 first_buying_date 73422 non-null object \n",
- " 25 last_visiting_date 0 non-null float64\n",
- " 26 country 143575 non-null object \n",
- " 27 age 5437 non-null float64\n",
- " 28 tenant_id 151866 non-null int64 \n",
- "dtypes: bool(3), float64(12), int64(7), object(7)\n",
- "memory usage: 30.6+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"a = pd.DataFrame(df1_customersplus.info())"
]
@@ -190,7 +144,7 @@
{
"cell_type": "code",
"execution_count": 31,
- "id": "45e82fc0-ba17-497b-9818-8be2bdc49d22",
+ "id": "2fda171d",
"metadata": {},
"outputs": [],
"source": [
@@ -219,7 +173,7 @@
{
"cell_type": "code",
"execution_count": 35,
- "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d",
+ "id": "205eeeab",
"metadata": {},
"outputs": [],
"source": [
@@ -244,7 +198,7 @@
{
"cell_type": "code",
"execution_count": 32,
- "id": "4bcdb081-c34f-4d51-b93f-abbb6fa49c5e",
+ "id": "634282c5",
"metadata": {},
"outputs": [],
"source": [
@@ -254,350 +208,9 @@
{
"cell_type": "code",
"execution_count": 33,
- "id": "319c814f-0956-4a92-9c0a-c6b9f53b04b5",
+ "id": "0e8d4133",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Nom_colonne | \n",
- " Type_colonne | \n",
- " Taux_NA | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " id | \n",
- " int64 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " lastname | \n",
- " object | \n",
- " 43.461341 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " firstname | \n",
- " object | \n",
- " 44.995588 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " birthdate | \n",
- " object | \n",
- " 96.419870 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " email | \n",
- " object | \n",
- " 8.622075 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " street_id | \n",
- " int64 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " created_at | \n",
- " object | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " updated_at | \n",
- " object | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " civility | \n",
- " float64 | \n",
- " 100.000000 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " is_partner | \n",
- " bool | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " extra | \n",
- " float64 | \n",
- " 100.000000 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " deleted_at | \n",
- " float64 | \n",
- " 100.000000 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " reference | \n",
- " float64 | \n",
- " 100.000000 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " gender | \n",
- " int64 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " is_email_true | \n",
- " bool | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " extra_field | \n",
- " float64 | \n",
- " 100.000000 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " identifier | \n",
- " object | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " opt_in | \n",
- " bool | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " structure_id | \n",
- " float64 | \n",
- " 88.072380 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " note | \n",
- " object | \n",
- " 99.403421 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " profession | \n",
- " object | \n",
- " 95.913503 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " language | \n",
- " object | \n",
- " 99.280945 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " mcp_contact_id | \n",
- " float64 | \n",
- " 34.876141 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " need_reload | \n",
- " bool | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " last_buying_date | \n",
- " object | \n",
- " 51.653431 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " max_price | \n",
- " float64 | \n",
- " 51.653431 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " ticket_sum | \n",
- " int64 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " average_price | \n",
- " float64 | \n",
- " 8.639195 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " fidelity | \n",
- " int64 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " average_purchase_delay | \n",
- " float64 | \n",
- " 51.653431 | \n",
- "
\n",
- " \n",
- " 30 | \n",
- " average_price_basket | \n",
- " float64 | \n",
- " 51.653431 | \n",
- "
\n",
- " \n",
- " 31 | \n",
- " average_ticket_basket | \n",
- " float64 | \n",
- " 51.653431 | \n",
- "
\n",
- " \n",
- " 32 | \n",
- " total_price | \n",
- " float64 | \n",
- " 43.014236 | \n",
- "
\n",
- " \n",
- " 33 | \n",
- " preferred_category | \n",
- " float64 | \n",
- " 100.000000 | \n",
- "
\n",
- " \n",
- " 34 | \n",
- " preferred_supplier | \n",
- " float64 | \n",
- " 100.000000 | \n",
- "
\n",
- " \n",
- " 35 | \n",
- " preferred_formula | \n",
- " float64 | \n",
- " 100.000000 | \n",
- "
\n",
- " \n",
- " 36 | \n",
- " purchase_count | \n",
- " int64 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 37 | \n",
- " first_buying_date | \n",
- " object | \n",
- " 51.653431 | \n",
- "
\n",
- " \n",
- " 38 | \n",
- " last_visiting_date | \n",
- " float64 | \n",
- " 100.000000 | \n",
- "
\n",
- " \n",
- " 39 | \n",
- " zipcode | \n",
- " object | \n",
- " 71.176564 | \n",
- "
\n",
- " \n",
- " 40 | \n",
- " country | \n",
- " object | \n",
- " 5.459418 | \n",
- "
\n",
- " \n",
- " 41 | \n",
- " age | \n",
- " float64 | \n",
- " 96.419870 | \n",
- "
\n",
- " \n",
- " 42 | \n",
- " tenant_id | \n",
- " int64 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Nom_colonne Type_colonne Taux_NA\n",
- "0 id int64 0.000000\n",
- "1 lastname object 43.461341\n",
- "2 firstname object 44.995588\n",
- "3 birthdate object 96.419870\n",
- "4 email object 8.622075\n",
- "5 street_id int64 0.000000\n",
- "6 created_at object 0.000000\n",
- "7 updated_at object 0.000000\n",
- "8 civility float64 100.000000\n",
- "9 is_partner bool 0.000000\n",
- "10 extra float64 100.000000\n",
- "11 deleted_at float64 100.000000\n",
- "12 reference float64 100.000000\n",
- "13 gender int64 0.000000\n",
- "14 is_email_true bool 0.000000\n",
- "15 extra_field float64 100.000000\n",
- "16 identifier object 0.000000\n",
- "17 opt_in bool 0.000000\n",
- "18 structure_id float64 88.072380\n",
- "19 note object 99.403421\n",
- "20 profession object 95.913503\n",
- "21 language object 99.280945\n",
- "22 mcp_contact_id float64 34.876141\n",
- "23 need_reload bool 0.000000\n",
- "24 last_buying_date object 51.653431\n",
- "25 max_price float64 51.653431\n",
- "26 ticket_sum int64 0.000000\n",
- "27 average_price float64 8.639195\n",
- "28 fidelity int64 0.000000\n",
- "29 average_purchase_delay float64 51.653431\n",
- "30 average_price_basket float64 51.653431\n",
- "31 average_ticket_basket float64 51.653431\n",
- "32 total_price float64 43.014236\n",
- "33 preferred_category float64 100.000000\n",
- "34 preferred_supplier float64 100.000000\n",
- "35 preferred_formula float64 100.000000\n",
- "36 purchase_count int64 0.000000\n",
- "37 first_buying_date object 51.653431\n",
- "38 last_visiting_date float64 100.000000\n",
- "39 zipcode object 71.176564\n",
- "40 country object 5.459418\n",
- "41 age float64 96.419870\n",
- "42 tenant_id int64 0.000000"
- ]
- },
- "execution_count": 33,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"a"
]
@@ -605,7 +218,7 @@
{
"cell_type": "code",
"execution_count": 16,
- "id": "e54a1170-2b10-4b22-8241-e7f5ec3fce75",
+ "id": "1268ad5a",
"metadata": {},
"outputs": [],
"source": [
@@ -615,216 +228,9 @@
{
"cell_type": "code",
"execution_count": 40,
- "id": "5c997ff6-251b-4e7f-8946-a8b722f5e97f",
+ "id": "bd41dc80",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- " birthdate | \n",
- " street_id | \n",
- " is_partner | \n",
- " gender | \n",
- " is_email_true | \n",
- " opt_in | \n",
- " structure_id | \n",
- " note | \n",
- " profession | \n",
- " ... | \n",
- " fidelity | \n",
- " average_purchase_delay | \n",
- " average_price_basket | \n",
- " average_ticket_basket | \n",
- " total_price | \n",
- " purchase_count | \n",
- " first_buying_date | \n",
- " country | \n",
- " age | \n",
- " tenant_id | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 12751 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 1 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 12825 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 11261 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 1 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 13071 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 653061 | \n",
- " NaN | \n",
- " 10 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " 1311 | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 26 columns
\n",
- "
"
- ],
- "text/plain": [
- " customer_id birthdate street_id is_partner gender is_email_true \\\n",
- "0 12751 NaN 2 False 1 True \n",
- "1 12825 NaN 2 False 2 True \n",
- "2 11261 NaN 2 False 1 True \n",
- "3 13071 NaN 2 False 2 True \n",
- "4 653061 NaN 10 False 2 True \n",
- "\n",
- " opt_in structure_id note profession ... fidelity average_purchase_delay \\\n",
- "0 True NaN NaN NaN ... 0 NaN \n",
- "1 True NaN NaN NaN ... 0 NaN \n",
- "2 True NaN NaN NaN ... 0 NaN \n",
- "3 True NaN NaN NaN ... 0 NaN \n",
- "4 False NaN NaN NaN ... 0 NaN \n",
- "\n",
- " average_price_basket average_ticket_basket total_price purchase_count \\\n",
- "0 NaN NaN NaN 0 \n",
- "1 NaN NaN NaN 0 \n",
- "2 NaN NaN NaN 0 \n",
- "3 NaN NaN NaN 0 \n",
- "4 NaN NaN NaN 0 \n",
- "\n",
- " first_buying_date country age tenant_id \n",
- "0 NaT fr NaN 1311 \n",
- "1 NaT fr NaN 1311 \n",
- "2 NaT fr NaN 1311 \n",
- "3 NaT fr NaN 1311 \n",
- "4 NaT NaN NaN 1311 \n",
- "\n",
- "[5 rows x 26 columns]"
- ]
- },
- "execution_count": 40,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Selection des variables\n",
"df1_customersplus_clean = df1_customersplus.copy()\n",
@@ -839,7 +245,7 @@
},
{
"cell_type": "markdown",
- "id": "e908f516-2a74-45d6-8492-7dcdc3afbe1f",
+ "id": "64d0f76b",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
@@ -850,264 +256,9 @@
{
"cell_type": "code",
"execution_count": 6,
- "id": "14f4158e-c9c0-4beb-826a-5e0f949434a4",
+ "id": "7e683711",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " number | \n",
- " created_at | \n",
- " updated_at | \n",
- " purchase_id | \n",
- " product_id | \n",
- " is_from_subscription | \n",
- " type_of | \n",
- " supplier_id | \n",
- " barcode | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 13070859 | \n",
- " 13593002661288 | \n",
- " 2021-12-28 20:47:10.320641+01:00 | \n",
- " 2022-02-14 18:46:53.614229+01:00 | \n",
- " 5107462 | \n",
- " 225251 | \n",
- " False | \n",
- " 1 | \n",
- " 3 | \n",
- " NaN | \n",
- " b6ad7fc36f33b5e05f58c7fca06688a6 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 13070860 | \n",
- " 13593002661399 | \n",
- " 2021-12-28 20:47:10.321037+01:00 | \n",
- " 2022-02-14 18:46:53.614761+01:00 | \n",
- " 5107462 | \n",
- " 224914 | \n",
- " False | \n",
- " 1 | \n",
- " 3 | \n",
- " NaN | \n",
- " b0903af480266f27802fe5c38c277c9e | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 13070861 | \n",
- " 13593002661419 | \n",
- " 2021-12-28 20:47:10.321629+01:00 | \n",
- " 2022-02-14 18:46:53.615521+01:00 | \n",
- " 5107462 | \n",
- " 224914 | \n",
- " False | \n",
- " 1 | \n",
- " 3 | \n",
- " NaN | \n",
- " 64ca12b7e26a65b90335c0702ea0faba | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 13070862 | \n",
- " 13593002661508 | \n",
- " 2021-12-28 20:47:10.322029+01:00 | \n",
- " 2022-02-14 18:46:53.616000+01:00 | \n",
- " 5107462 | \n",
- " 224914 | \n",
- " False | \n",
- " 1 | \n",
- " 3 | \n",
- " NaN | \n",
- " 5ac2f8150aa9f3a6b1599df08cc2f0c7 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 13070863 | \n",
- " 13593002661689 | \n",
- " 2021-12-28 20:47:10.322449+01:00 | \n",
- " 2022-02-14 18:46:53.616447+01:00 | \n",
- " 5107462 | \n",
- " 224914 | \n",
- " False | \n",
- " 1 | \n",
- " 3 | \n",
- " NaN | \n",
- " dfe30081bae020d12094279926136b9c | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 1826667 | \n",
- " 20662815 | \n",
- " 13593016154390 | \n",
- " 2023-11-09 07:51:34.935983+01:00 | \n",
- " 2023-11-09 07:51:34.935983+01:00 | \n",
- " 8007697 | \n",
- " 405689 | \n",
- " False | \n",
- " 1 | \n",
- " 3 | \n",
- " NaN | \n",
- " dba9aa428f843b79ae69dfacfe8fc579 | \n",
- "
\n",
- " \n",
- " 1826668 | \n",
- " 20662816 | \n",
- " 13593016154501 | \n",
- " 2023-11-09 07:51:34.937038+01:00 | \n",
- " 2023-11-09 07:51:34.937038+01:00 | \n",
- " 8007698 | \n",
- " 403658 | \n",
- " False | \n",
- " 1 | \n",
- " 3 | \n",
- " NaN | \n",
- " 93f1fcfc6ba4fa68f92eb4b4a619fcf0 | \n",
- "
\n",
- " \n",
- " 1826669 | \n",
- " 20662817 | \n",
- " 13593016154680 | \n",
- " 2023-11-09 07:51:34.938224+01:00 | \n",
- " 2023-11-09 07:51:34.938224+01:00 | \n",
- " 8007698 | \n",
- " 403658 | \n",
- " False | \n",
- " 1 | \n",
- " 3 | \n",
- " NaN | \n",
- " c8bbbd25df2c158767ceef42c3237f23 | \n",
- "
\n",
- " \n",
- " 1826670 | \n",
- " 20662818 | \n",
- " 13593016154899 | \n",
- " 2023-11-09 07:51:34.939328+01:00 | \n",
- " 2023-11-09 07:51:34.939328+01:00 | \n",
- " 8007699 | \n",
- " 403658 | \n",
- " False | \n",
- " 1 | \n",
- " 3 | \n",
- " NaN | \n",
- " 738f0a8b5088b5056bc3b32eff2dca1f | \n",
- "
\n",
- " \n",
- " 1826671 | \n",
- " 20662819 | \n",
- " 13593016154988 | \n",
- " 2023-11-09 07:51:34.940680+01:00 | \n",
- " 2023-11-09 07:51:34.940680+01:00 | \n",
- " 8007699 | \n",
- " 403658 | \n",
- " False | \n",
- " 1 | \n",
- " 3 | \n",
- " NaN | \n",
- " 4c5a6195434377380b4e6ae63b2e9cf6 | \n",
- "
\n",
- " \n",
- "
\n",
- "
1826672 rows × 11 columns
\n",
- "
"
- ],
- "text/plain": [
- " id number created_at \\\n",
- "0 13070859 13593002661288 2021-12-28 20:47:10.320641+01:00 \n",
- "1 13070860 13593002661399 2021-12-28 20:47:10.321037+01:00 \n",
- "2 13070861 13593002661419 2021-12-28 20:47:10.321629+01:00 \n",
- "3 13070862 13593002661508 2021-12-28 20:47:10.322029+01:00 \n",
- "4 13070863 13593002661689 2021-12-28 20:47:10.322449+01:00 \n",
- "... ... ... ... \n",
- "1826667 20662815 13593016154390 2023-11-09 07:51:34.935983+01:00 \n",
- "1826668 20662816 13593016154501 2023-11-09 07:51:34.937038+01:00 \n",
- "1826669 20662817 13593016154680 2023-11-09 07:51:34.938224+01:00 \n",
- "1826670 20662818 13593016154899 2023-11-09 07:51:34.939328+01:00 \n",
- "1826671 20662819 13593016154988 2023-11-09 07:51:34.940680+01:00 \n",
- "\n",
- " updated_at purchase_id product_id \\\n",
- "0 2022-02-14 18:46:53.614229+01:00 5107462 225251 \n",
- "1 2022-02-14 18:46:53.614761+01:00 5107462 224914 \n",
- "2 2022-02-14 18:46:53.615521+01:00 5107462 224914 \n",
- "3 2022-02-14 18:46:53.616000+01:00 5107462 224914 \n",
- "4 2022-02-14 18:46:53.616447+01:00 5107462 224914 \n",
- "... ... ... ... \n",
- "1826667 2023-11-09 07:51:34.935983+01:00 8007697 405689 \n",
- "1826668 2023-11-09 07:51:34.937038+01:00 8007698 403658 \n",
- "1826669 2023-11-09 07:51:34.938224+01:00 8007698 403658 \n",
- "1826670 2023-11-09 07:51:34.939328+01:00 8007699 403658 \n",
- "1826671 2023-11-09 07:51:34.940680+01:00 8007699 403658 \n",
- "\n",
- " is_from_subscription type_of supplier_id barcode \\\n",
- "0 False 1 3 NaN \n",
- "1 False 1 3 NaN \n",
- "2 False 1 3 NaN \n",
- "3 False 1 3 NaN \n",
- "4 False 1 3 NaN \n",
- "... ... ... ... ... \n",
- "1826667 False 1 3 NaN \n",
- "1826668 False 1 3 NaN \n",
- "1826669 False 1 3 NaN \n",
- "1826670 False 1 3 NaN \n",
- "1826671 False 1 3 NaN \n",
- "\n",
- " identifier \n",
- "0 b6ad7fc36f33b5e05f58c7fca06688a6 \n",
- "1 b0903af480266f27802fe5c38c277c9e \n",
- "2 64ca12b7e26a65b90335c0702ea0faba \n",
- "3 5ac2f8150aa9f3a6b1599df08cc2f0c7 \n",
- "4 dfe30081bae020d12094279926136b9c \n",
- "... ... \n",
- "1826667 dba9aa428f843b79ae69dfacfe8fc579 \n",
- "1826668 93f1fcfc6ba4fa68f92eb4b4a619fcf0 \n",
- "1826669 c8bbbd25df2c158767ceef42c3237f23 \n",
- "1826670 738f0a8b5088b5056bc3b32eff2dca1f \n",
- "1826671 4c5a6195434377380b4e6ae63b2e9cf6 \n",
- "\n",
- "[1826672 rows x 11 columns]"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_tickets"
]
@@ -1115,34 +266,9 @@
{
"cell_type": "code",
"execution_count": 7,
- "id": "f3c35394-b586-4ae4-b5ab-b03bb01bb618",
+ "id": "e7b9a52e",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 1826672 entries, 0 to 1826671\n",
- "Data columns (total 11 columns):\n",
- " # Column Dtype \n",
- "--- ------ ----- \n",
- " 0 id int64 \n",
- " 1 number object \n",
- " 2 created_at object \n",
- " 3 updated_at object \n",
- " 4 purchase_id int64 \n",
- " 5 product_id int64 \n",
- " 6 is_from_subscription bool \n",
- " 7 type_of int64 \n",
- " 8 supplier_id int64 \n",
- " 9 barcode float64\n",
- " 10 identifier object \n",
- "dtypes: bool(1), float64(1), int64(5), object(4)\n",
- "memory usage: 141.1+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"df1_tickets.info()"
]
@@ -1150,31 +276,9 @@
{
"cell_type": "code",
"execution_count": 8,
- "id": "c1b42769-03c7-4785-92ce-5e1e6b41908d",
+ "id": "568280e8",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "id 0.0\n",
- "number 0.0\n",
- "created_at 0.0\n",
- "updated_at 0.0\n",
- "purchase_id 0.0\n",
- "product_id 0.0\n",
- "is_from_subscription 0.0\n",
- "type_of 0.0\n",
- "supplier_id 0.0\n",
- "barcode 100.0\n",
- "identifier 0.0\n",
- "dtype: float64"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_tickets.isna().sum()/len(df1_tickets)*100"
]
@@ -1182,21 +286,9 @@
{
"cell_type": "code",
"execution_count": 9,
- "id": "42896791-2d93-4725-a50b-6c7cbe535ec7",
+ "id": "29ecec90",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_619/232847087.py:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " df1_tickets_clean.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Selection des variables\n",
"df1_tickets_clean = df1_tickets.drop(['lastname', 'firstname', 'email', 'created_at', 'updated_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode'], axis = 1, inplace=True)\n",
@@ -1205,7 +297,7 @@
},
{
"cell_type": "markdown",
- "id": "78453f3c-4f89-44ed-a6c6-2a7443b72b52",
+ "id": "22bb5de4",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
@@ -1216,194 +308,9 @@
{
"cell_type": "code",
"execution_count": 10,
- "id": "2e0dada0-9457-484c-aa55-77e44613ecca",
+ "id": "6a9a91f4",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " name | \n",
- " manually_added | \n",
- " label | \n",
- " itr | \n",
- " updated_at | \n",
- " created_at | \n",
- " commission | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1617 | \n",
- " j4 administration | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " 2021-07-29 09:21:37.325772+02:00 | \n",
- " 2021-07-29 09:21:37.325772+02:00 | \n",
- " NaN | \n",
- " 5958b2a060ac3e31678b438892a1bd2e | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 8 | \n",
- " non défini | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " 2020-09-03 13:16:35.329062+02:00 | \n",
- " 2020-09-03 13:16:35.329062+02:00 | \n",
- " NaN | \n",
- " 52ff3466787b4d538407372e5f7afe0f | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 4 | \n",
- " vad | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " 2020-09-03 13:11:23.896992+02:00 | \n",
- " 2020-09-03 13:11:23.896992+02:00 | \n",
- " NaN | \n",
- " 1225483c97b36018cab2bea14ab78ea6 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1 | \n",
- " fort saint jean | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " 2020-09-03 13:11:23.833073+02:00 | \n",
- " 2020-09-03 13:11:23.833073+02:00 | \n",
- " NaN | \n",
- " 001b9b4a524fe407150b8235b304d4ec | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2 | \n",
- " j4 | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " 2020-09-03 13:11:23.888993+02:00 | \n",
- " 2020-09-03 13:11:23.888993+02:00 | \n",
- " NaN | \n",
- " 6a0cf6edf20060344b465706b61719aa | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 5 | \n",
- " revendeur | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " 2020-09-03 13:11:23.900987+02:00 | \n",
- " 2020-09-03 13:11:23.900987+02:00 | \n",
- " NaN | \n",
- " 931239d4acb6214d7e5c98edecfb4916 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 3 | \n",
- " vente en ligne | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " 2020-09-03 13:11:23.893097+02:00 | \n",
- " 2020-09-03 13:11:23.893097+02:00 | \n",
- " NaN | \n",
- " bde8f2ccff510df8572d3214d86b837d | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 6 | \n",
- " ccr | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " 2020-09-03 13:11:23.904974+02:00 | \n",
- " 2020-09-03 13:11:23.904974+02:00 | \n",
- " NaN | \n",
- " b48ec279411f7dbbb68393c61a9724d9 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 7 | \n",
- " dab | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " 2020-09-03 13:11:23.908970+02:00 | \n",
- " 2020-09-03 13:11:23.908970+02:00 | \n",
- " NaN | \n",
- " 11c6d471fa4e354e62e684d293694202 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " id name manually_added label itr \\\n",
- "0 1617 j4 administration False NaN NaN \n",
- "1 8 non défini False NaN NaN \n",
- "2 4 vad False NaN NaN \n",
- "3 1 fort saint jean False NaN NaN \n",
- "4 2 j4 False NaN NaN \n",
- "5 5 revendeur False NaN NaN \n",
- "6 3 vente en ligne False NaN NaN \n",
- "7 6 ccr False NaN NaN \n",
- "8 7 dab False NaN NaN \n",
- "\n",
- " updated_at created_at \\\n",
- "0 2021-07-29 09:21:37.325772+02:00 2021-07-29 09:21:37.325772+02:00 \n",
- "1 2020-09-03 13:16:35.329062+02:00 2020-09-03 13:16:35.329062+02:00 \n",
- "2 2020-09-03 13:11:23.896992+02:00 2020-09-03 13:11:23.896992+02:00 \n",
- "3 2020-09-03 13:11:23.833073+02:00 2020-09-03 13:11:23.833073+02:00 \n",
- "4 2020-09-03 13:11:23.888993+02:00 2020-09-03 13:11:23.888993+02:00 \n",
- "5 2020-09-03 13:11:23.900987+02:00 2020-09-03 13:11:23.900987+02:00 \n",
- "6 2020-09-03 13:11:23.893097+02:00 2020-09-03 13:11:23.893097+02:00 \n",
- "7 2020-09-03 13:11:23.904974+02:00 2020-09-03 13:11:23.904974+02:00 \n",
- "8 2020-09-03 13:11:23.908970+02:00 2020-09-03 13:11:23.908970+02:00 \n",
- "\n",
- " commission identifier \n",
- "0 NaN 5958b2a060ac3e31678b438892a1bd2e \n",
- "1 NaN 52ff3466787b4d538407372e5f7afe0f \n",
- "2 NaN 1225483c97b36018cab2bea14ab78ea6 \n",
- "3 NaN 001b9b4a524fe407150b8235b304d4ec \n",
- "4 NaN 6a0cf6edf20060344b465706b61719aa \n",
- "5 NaN 931239d4acb6214d7e5c98edecfb4916 \n",
- "6 NaN bde8f2ccff510df8572d3214d86b837d \n",
- "7 NaN b48ec279411f7dbbb68393c61a9724d9 \n",
- "8 NaN 11c6d471fa4e354e62e684d293694202 "
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_suppliers"
]
@@ -1411,32 +318,9 @@
{
"cell_type": "code",
"execution_count": 11,
- "id": "b583be02-ab60-4e14-9325-0204f203a1af",
+ "id": "bab4758a",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 9 entries, 0 to 8\n",
- "Data columns (total 9 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 9 non-null int64 \n",
- " 1 name 9 non-null object \n",
- " 2 manually_added 9 non-null bool \n",
- " 3 label 0 non-null float64\n",
- " 4 itr 0 non-null float64\n",
- " 5 updated_at 9 non-null object \n",
- " 6 created_at 9 non-null object \n",
- " 7 commission 0 non-null float64\n",
- " 8 identifier 9 non-null object \n",
- "dtypes: bool(1), float64(3), int64(1), object(4)\n",
- "memory usage: 713.0+ bytes\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"df1_suppliers.info()"
]
@@ -1444,29 +328,9 @@
{
"cell_type": "code",
"execution_count": 12,
- "id": "6d7f338e-e4d3-422b-9cdc-dec967c0b28e",
+ "id": "b5fff251",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "id 0.0\n",
- "name 0.0\n",
- "manually_added 0.0\n",
- "label 100.0\n",
- "itr 100.0\n",
- "updated_at 0.0\n",
- "created_at 0.0\n",
- "commission 100.0\n",
- "identifier 0.0\n",
- "dtype: float64"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_suppliers.isna().sum()/len(df1_suppliers)*100"
]
@@ -1474,21 +338,9 @@
{
"cell_type": "code",
"execution_count": 13,
- "id": "3c645ab7-16bf-4054-9ae2-15a8c32e29c6",
+ "id": "8b09e2a3",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_619/302783287.py:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " df1_suppliers_clean.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Selection des variables\n",
"df1_suppliers_clean = df1_suppliers[['id', 'name']]\n",
@@ -1498,109 +350,16 @@
{
"cell_type": "code",
"execution_count": 14,
- "id": "4de7e2e2-6da4-4618-8444-b524399c5493",
+ "id": "ecee7cdc",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " supplier_name | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1617 | \n",
- " j4 administration | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 8 | \n",
- " non défini | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 4 | \n",
- " vad | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1 | \n",
- " fort saint jean | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2 | \n",
- " j4 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 5 | \n",
- " revendeur | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 3 | \n",
- " vente en ligne | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 6 | \n",
- " ccr | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 7 | \n",
- " dab | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " id supplier_name\n",
- "0 1617 j4 administration\n",
- "1 8 non défini\n",
- "2 4 vad\n",
- "3 1 fort saint jean\n",
- "4 2 j4\n",
- "5 5 revendeur\n",
- "6 3 vente en ligne\n",
- "7 6 ccr\n",
- "8 7 dab"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_suppliers_clean"
]
},
{
"cell_type": "markdown",
- "id": "0a6df975-c7fc-45bc-92af-a0bdab17d795",
+ "id": "c8e6e69b",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
@@ -1611,186 +370,9 @@
{
"cell_type": "code",
"execution_count": 15,
- "id": "a02f6594-3e91-4e87-bbb6-649c28d4f7e9",
+ "id": "1a6cff1f",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " name | \n",
- " children | \n",
- " created_at | \n",
- " updated_at | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2021-01-05 11:55:51.188106+01:00 | \n",
- " 2021-01-05 11:55:51.188106+01:00 | \n",
- " 623ec4067827558b28972cf39fe81ee7 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2 | \n",
- " Billet en nombre | \n",
- " pricing_formula | \n",
- " 2021-01-11 12:13:19.286301+01:00 | \n",
- " 2021-01-11 12:13:19.286301+01:00 | \n",
- " a53d313a97296ee37caa066dbfe7a45c | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3 | \n",
- " Groupe | \n",
- " pricing_formula | \n",
- " 2021-01-11 12:19:22.842917+01:00 | \n",
- " 2021-01-11 12:19:22.842917+01:00 | \n",
- " 1ab143efc3b85acbbc752fe8eb2b0b86 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 4 | \n",
- " Revendeur | \n",
- " pricing_formula | \n",
- " 2021-01-12 12:34:20.481236+01:00 | \n",
- " 2021-01-12 12:34:20.481236+01:00 | \n",
- " 8b332723366a07e1eef5f1c92f9ae067 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5 | \n",
- " Cinéma scolaire | \n",
- " pricing_formula | \n",
- " 2021-01-25 19:16:05.141719+01:00 | \n",
- " 2021-01-25 19:16:05.141719+01:00 | \n",
- " a12e62cb4c4f47e7406bd8fbff2bfe30 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 6 | \n",
- " Musée famille | \n",
- " pricing_formula | \n",
- " 2021-01-25 19:23:06.692627+01:00 | \n",
- " 2021-01-25 19:23:06.692627+01:00 | \n",
- " 1ec6c19283111ccb3ed67f52d414470e | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 7 | \n",
- " Spectacle famille | \n",
- " pricing_formula | \n",
- " 2021-01-25 19:28:21.390016+01:00 | \n",
- " 2021-01-25 19:28:21.390016+01:00 | \n",
- " 05e2104f1b74ced229c06847d6e91938 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 8 | \n",
- " Masterclass | \n",
- " pricing_formula | \n",
- " 2021-01-25 19:31:05.076904+01:00 | \n",
- " 2021-01-25 19:31:05.076904+01:00 | \n",
- " 9cc946edfb25e11b4282f58db16e6ae9 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 9 | \n",
- " Spectacle | \n",
- " pricing_formula | \n",
- " 2021-01-25 19:38:41.260535+01:00 | \n",
- " 2021-01-25 19:38:41.260535+01:00 | \n",
- " d88321c347f0e0ab101184cdf25c94bf | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 10 | \n",
- " Cinema | \n",
- " pricing_formula | \n",
- " 2021-02-05 11:12:31.932576+01:00 | \n",
- " 2021-02-05 11:12:31.932576+01:00 | \n",
- " 0870fef2bfcd5b30a12e4f5c7f4aaba7 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 11 | \n",
- " Musee | \n",
- " pricing_formula | \n",
- " 2021-02-05 11:52:05.468207+01:00 | \n",
- " 2021-02-05 11:52:05.468207+01:00 | \n",
- " 8ba8934454cc62c7cdb3eb6e1b39df0c | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 12 | \n",
- " Tarifs plein | \n",
- " category | \n",
- " 2023-03-13 11:31:50.528331+01:00 | \n",
- " 2023-03-13 11:31:50.528331+01:00 | \n",
- " a6969df76efc15d157be48e87a7bcf9a | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " id name children created_at \\\n",
- "0 1 Atelier pricing_formula 2021-01-05 11:55:51.188106+01:00 \n",
- "1 2 Billet en nombre pricing_formula 2021-01-11 12:13:19.286301+01:00 \n",
- "2 3 Groupe pricing_formula 2021-01-11 12:19:22.842917+01:00 \n",
- "3 4 Revendeur pricing_formula 2021-01-12 12:34:20.481236+01:00 \n",
- "4 5 Cinéma scolaire pricing_formula 2021-01-25 19:16:05.141719+01:00 \n",
- "5 6 Musée famille pricing_formula 2021-01-25 19:23:06.692627+01:00 \n",
- "6 7 Spectacle famille pricing_formula 2021-01-25 19:28:21.390016+01:00 \n",
- "7 8 Masterclass pricing_formula 2021-01-25 19:31:05.076904+01:00 \n",
- "8 9 Spectacle pricing_formula 2021-01-25 19:38:41.260535+01:00 \n",
- "9 10 Cinema pricing_formula 2021-02-05 11:12:31.932576+01:00 \n",
- "10 11 Musee pricing_formula 2021-02-05 11:52:05.468207+01:00 \n",
- "11 12 Tarifs plein category 2023-03-13 11:31:50.528331+01:00 \n",
- "\n",
- " updated_at identifier \n",
- "0 2021-01-05 11:55:51.188106+01:00 623ec4067827558b28972cf39fe81ee7 \n",
- "1 2021-01-11 12:13:19.286301+01:00 a53d313a97296ee37caa066dbfe7a45c \n",
- "2 2021-01-11 12:19:22.842917+01:00 1ab143efc3b85acbbc752fe8eb2b0b86 \n",
- "3 2021-01-12 12:34:20.481236+01:00 8b332723366a07e1eef5f1c92f9ae067 \n",
- "4 2021-01-25 19:16:05.141719+01:00 a12e62cb4c4f47e7406bd8fbff2bfe30 \n",
- "5 2021-01-25 19:23:06.692627+01:00 1ec6c19283111ccb3ed67f52d414470e \n",
- "6 2021-01-25 19:28:21.390016+01:00 05e2104f1b74ced229c06847d6e91938 \n",
- "7 2021-01-25 19:31:05.076904+01:00 9cc946edfb25e11b4282f58db16e6ae9 \n",
- "8 2021-01-25 19:38:41.260535+01:00 d88321c347f0e0ab101184cdf25c94bf \n",
- "9 2021-02-05 11:12:31.932576+01:00 0870fef2bfcd5b30a12e4f5c7f4aaba7 \n",
- "10 2021-02-05 11:52:05.468207+01:00 8ba8934454cc62c7cdb3eb6e1b39df0c \n",
- "11 2023-03-13 11:31:50.528331+01:00 a6969df76efc15d157be48e87a7bcf9a "
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_type_ofs"
]
@@ -1798,29 +380,9 @@
{
"cell_type": "code",
"execution_count": 16,
- "id": "e9c8d32b-22f4-4581-8af7-31cc1c31fa0e",
+ "id": "93630b41",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 12 entries, 0 to 11\n",
- "Data columns (total 6 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 12 non-null int64 \n",
- " 1 name 12 non-null object\n",
- " 2 children 12 non-null object\n",
- " 3 created_at 12 non-null object\n",
- " 4 updated_at 12 non-null object\n",
- " 5 identifier 12 non-null object\n",
- "dtypes: int64(1), object(5)\n",
- "memory usage: 704.0+ bytes\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"df1_type_ofs.info()"
]
@@ -1828,21 +390,9 @@
{
"cell_type": "code",
"execution_count": 17,
- "id": "cbb5e614-1fe5-4da0-bca0-8a242e0885da",
+ "id": "4f94481a",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_619/81842251.py:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " df1_type_ofs_clean.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Selection des variables\n",
"df1_type_ofs_clean = df1_type_ofs[['id', 'name', 'children']]\n",
@@ -1851,7 +401,7 @@
},
{
"cell_type": "markdown",
- "id": "676a9869-9a8b-4cd2-8b1c-0644b5229c72",
+ "id": "1b2811e2",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
@@ -1862,205 +412,11 @@
{
"cell_type": "code",
"execution_count": 18,
- "id": "f8d36b72-f8e7-45e5-b4fa-e0803493fd3c",
+ "id": "2455d2e1",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " purchase_date | \n",
- " customer_id | \n",
- " created_at | \n",
- " updated_at | \n",
- " number | \n",
- " identifier | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 5145662 | \n",
- " 2019-07-17 11:17:53+02:00 | \n",
- " 6632 | \n",
- " 2021-12-28 20:48:51.569237+01:00 | \n",
- " 2021-12-28 20:48:51.569237+01:00 | \n",
- " fa80c83b29a268b45728c910a8afcf79 | \n",
- " 82877c41df26f832eb823a83acd1a172 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 4941642 | \n",
- " 2018-10-31 11:59:00+01:00 | \n",
- " 1 | \n",
- " 2021-12-28 20:31:48.196681+01:00 | \n",
- " 2022-03-03 17:52:21.958861+01:00 | \n",
- " 597b6c06adfe6acc539b29b657b80da0 | \n",
- " e7102ebe65526c427245533ebabe66e5 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 5088860 | \n",
- " 2018-10-31 12:45:12+01:00 | \n",
- " 1 | \n",
- " 2021-12-28 20:46:34.703542+01:00 | \n",
- " 2021-12-28 20:46:34.703542+01:00 | \n",
- " 4a7f6baaf9be6a99e3fead7f7e981fa8 | \n",
- " af75c4ae53d1b6957875538355b162e1 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 5088862 | \n",
- " 2018-10-31 13:07:12+01:00 | \n",
- " 1 | \n",
- " 2021-12-28 20:46:34.704773+01:00 | \n",
- " 2021-12-28 20:46:34.704773+01:00 | \n",
- " 1d83dfad44b73070d1c6d5875d0edd2d | \n",
- " 4b2fe34659b177209b07270ae1043b40 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5088863 | \n",
- " 2018-10-31 13:08:50+01:00 | \n",
- " 1 | \n",
- " 2021-12-28 20:46:34.705453+01:00 | \n",
- " 2021-12-28 20:46:34.705453+01:00 | \n",
- " 7bfe2bc9c1670c973d0960e3fd408cf8 | \n",
- " b115f04a99b94df9e4a32185844f0998 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 742245 | \n",
- " 8007695 | \n",
- " 2023-11-08 17:51:19+01:00 | \n",
- " 1256133 | \n",
- " 2023-11-09 07:51:33.920187+01:00 | \n",
- " 2023-11-09 07:51:33.920187+01:00 | \n",
- " 99ad774dedbad43feb73514765d2f0ba | \n",
- " d68558180b4bf2e8a945724843655775 | \n",
- "
\n",
- " \n",
- " 742246 | \n",
- " 8007696 | \n",
- " 2023-11-08 18:17:51+01:00 | \n",
- " 1256134 | \n",
- " 2023-11-09 07:51:33.921967+01:00 | \n",
- " 2023-11-09 07:51:33.921967+01:00 | \n",
- " c1511614c511c5f95980172690179102 | \n",
- " f5102d910a7731091f239ad7b0df35b4 | \n",
- "
\n",
- " \n",
- " 742247 | \n",
- " 8007697 | \n",
- " 2023-11-08 18:23:54+01:00 | \n",
- " 1256135 | \n",
- " 2023-11-09 07:51:33.923034+01:00 | \n",
- " 2023-11-09 07:51:33.923034+01:00 | \n",
- " 33b64b39cc53428b4f17d65ff5b93104 | \n",
- " e2b917626be60cc2c3207cc037fe69e4 | \n",
- "
\n",
- " \n",
- " 742248 | \n",
- " 8007698 | \n",
- " 2023-11-08 19:32:18+01:00 | \n",
- " 1256136 | \n",
- " 2023-11-09 07:51:33.924135+01:00 | \n",
- " 2023-11-09 07:51:33.924135+01:00 | \n",
- " 9ae0b129e704b3d9c093ce9c7c4e5039 | \n",
- " 5bfa23236c31f8562c3a0233c1b53b31 | \n",
- "
\n",
- " \n",
- " 742249 | \n",
- " 8007699 | \n",
- " 2023-11-08 20:30:28+01:00 | \n",
- " 1256137 | \n",
- " 2023-11-09 07:51:33.925382+01:00 | \n",
- " 2023-11-09 07:51:33.925382+01:00 | \n",
- " d31ced089c2b1f90479257a4686f9306 | \n",
- " d86b1e0de3ff01eaf04fbcd031ac5fef | \n",
- "
\n",
- " \n",
- "
\n",
- "
742250 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " id purchase_date customer_id \\\n",
- "0 5145662 2019-07-17 11:17:53+02:00 6632 \n",
- "1 4941642 2018-10-31 11:59:00+01:00 1 \n",
- "2 5088860 2018-10-31 12:45:12+01:00 1 \n",
- "3 5088862 2018-10-31 13:07:12+01:00 1 \n",
- "4 5088863 2018-10-31 13:08:50+01:00 1 \n",
- "... ... ... ... \n",
- "742245 8007695 2023-11-08 17:51:19+01:00 1256133 \n",
- "742246 8007696 2023-11-08 18:17:51+01:00 1256134 \n",
- "742247 8007697 2023-11-08 18:23:54+01:00 1256135 \n",
- "742248 8007698 2023-11-08 19:32:18+01:00 1256136 \n",
- "742249 8007699 2023-11-08 20:30:28+01:00 1256137 \n",
- "\n",
- " created_at updated_at \\\n",
- "0 2021-12-28 20:48:51.569237+01:00 2021-12-28 20:48:51.569237+01:00 \n",
- "1 2021-12-28 20:31:48.196681+01:00 2022-03-03 17:52:21.958861+01:00 \n",
- "2 2021-12-28 20:46:34.703542+01:00 2021-12-28 20:46:34.703542+01:00 \n",
- "3 2021-12-28 20:46:34.704773+01:00 2021-12-28 20:46:34.704773+01:00 \n",
- "4 2021-12-28 20:46:34.705453+01:00 2021-12-28 20:46:34.705453+01:00 \n",
- "... ... ... \n",
- "742245 2023-11-09 07:51:33.920187+01:00 2023-11-09 07:51:33.920187+01:00 \n",
- "742246 2023-11-09 07:51:33.921967+01:00 2023-11-09 07:51:33.921967+01:00 \n",
- "742247 2023-11-09 07:51:33.923034+01:00 2023-11-09 07:51:33.923034+01:00 \n",
- "742248 2023-11-09 07:51:33.924135+01:00 2023-11-09 07:51:33.924135+01:00 \n",
- "742249 2023-11-09 07:51:33.925382+01:00 2023-11-09 07:51:33.925382+01:00 \n",
- "\n",
- " number identifier \n",
- "0 fa80c83b29a268b45728c910a8afcf79 82877c41df26f832eb823a83acd1a172 \n",
- "1 597b6c06adfe6acc539b29b657b80da0 e7102ebe65526c427245533ebabe66e5 \n",
- "2 4a7f6baaf9be6a99e3fead7f7e981fa8 af75c4ae53d1b6957875538355b162e1 \n",
- "3 1d83dfad44b73070d1c6d5875d0edd2d 4b2fe34659b177209b07270ae1043b40 \n",
- "4 7bfe2bc9c1670c973d0960e3fd408cf8 b115f04a99b94df9e4a32185844f0998 \n",
- "... ... ... \n",
- "742245 99ad774dedbad43feb73514765d2f0ba d68558180b4bf2e8a945724843655775 \n",
- "742246 c1511614c511c5f95980172690179102 f5102d910a7731091f239ad7b0df35b4 \n",
- "742247 33b64b39cc53428b4f17d65ff5b93104 e2b917626be60cc2c3207cc037fe69e4 \n",
- "742248 9ae0b129e704b3d9c093ce9c7c4e5039 5bfa23236c31f8562c3a0233c1b53b31 \n",
- "742249 d31ced089c2b1f90479257a4686f9306 d86b1e0de3ff01eaf04fbcd031ac5fef \n",
- "\n",
- "[742250 rows x 7 columns]"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_purchases"
]
@@ -2068,30 +424,9 @@
{
"cell_type": "code",
"execution_count": 19,
- "id": "3f266a9d-6eee-4b27-b6cc-d401bc2fa0b8",
+ "id": "5f9a159d",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 742250 entries, 0 to 742249\n",
- "Data columns (total 7 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 742250 non-null int64 \n",
- " 1 purchase_date 742250 non-null object\n",
- " 2 customer_id 742250 non-null int64 \n",
- " 3 created_at 742250 non-null object\n",
- " 4 updated_at 742250 non-null object\n",
- " 5 number 742250 non-null object\n",
- " 6 identifier 742250 non-null object\n",
- "dtypes: int64(2), object(5)\n",
- "memory usage: 39.6+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"df1_purchases.info()"
]
@@ -2099,7 +434,7 @@
{
"cell_type": "code",
"execution_count": 20,
- "id": "8b24ccbc-ccf0-4722-8cd9-8ee8aa90d1fd",
+ "id": "db201bf7",
"metadata": {},
"outputs": [],
"source": [
@@ -2111,30 +446,9 @@
{
"cell_type": "code",
"execution_count": 21,
- "id": "27d18584-228f-4698-85d6-4d23151ea5ed",
+ "id": "bd436fca",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 742250 entries, 0 to 742249\n",
- "Data columns (total 7 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 id 742250 non-null int64 \n",
- " 1 purchase_date 742250 non-null datetime64[ns, UTC]\n",
- " 2 customer_id 742250 non-null int64 \n",
- " 3 created_at 742250 non-null object \n",
- " 4 updated_at 742250 non-null object \n",
- " 5 number 742250 non-null object \n",
- " 6 identifier 742250 non-null object \n",
- "dtypes: datetime64[ns, UTC](1), int64(2), object(4)\n",
- "memory usage: 39.6+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"df1_purchases.info()"
]
@@ -2142,7 +456,7 @@
{
"cell_type": "code",
"execution_count": 22,
- "id": "ea22e3a2-2b25-481d-8ebc-194e11a06cd9",
+ "id": "83435862",
"metadata": {},
"outputs": [],
"source": [
@@ -2152,7 +466,7 @@
},
{
"cell_type": "markdown",
- "id": "53227600-c1c5-48aa-9f5d-db5a23a8a22a",
+ "id": "f210e730",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
@@ -2163,7 +477,7 @@
{
"cell_type": "code",
"execution_count": 23,
- "id": "e0b8b47a-b321-4a79-823c-36a131a78ac7",
+ "id": "1f8b3aa7",
"metadata": {},
"outputs": [],
"source": [
@@ -2183,224 +497,18 @@
{
"cell_type": "code",
"execution_count": 24,
- "id": "7572e6e7-f28d-43ba-b045-b9fa09e68e1d",
+ "id": "83a4d021",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " ticket_id | \n",
- " product_id | \n",
- " is_from_subscription | \n",
- " supplier_name | \n",
- " type_of_ticket_name | \n",
- " children | \n",
- " purchase_date | \n",
- " customer_id | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 13070859 | \n",
- " 225251 | \n",
- " False | \n",
- " vente en ligne | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " 48187 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 13070860 | \n",
- " 224914 | \n",
- " False | \n",
- " vente en ligne | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " 48187 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 13070861 | \n",
- " 224914 | \n",
- " False | \n",
- " vente en ligne | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " 48187 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 13070862 | \n",
- " 224914 | \n",
- " False | \n",
- " vente en ligne | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " 48187 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 13070863 | \n",
- " 224914 | \n",
- " False | \n",
- " vente en ligne | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " 48187 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 1826667 | \n",
- " 20662815 | \n",
- " 405689 | \n",
- " False | \n",
- " vente en ligne | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2023-11-08 17:23:54+00:00 | \n",
- " 1256135 | \n",
- "
\n",
- " \n",
- " 1826668 | \n",
- " 20662816 | \n",
- " 403658 | \n",
- " False | \n",
- " vente en ligne | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2023-11-08 18:32:18+00:00 | \n",
- " 1256136 | \n",
- "
\n",
- " \n",
- " 1826669 | \n",
- " 20662817 | \n",
- " 403658 | \n",
- " False | \n",
- " vente en ligne | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2023-11-08 18:32:18+00:00 | \n",
- " 1256136 | \n",
- "
\n",
- " \n",
- " 1826670 | \n",
- " 20662818 | \n",
- " 403658 | \n",
- " False | \n",
- " vente en ligne | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2023-11-08 19:30:28+00:00 | \n",
- " 1256137 | \n",
- "
\n",
- " \n",
- " 1826671 | \n",
- " 20662819 | \n",
- " 403658 | \n",
- " False | \n",
- " vente en ligne | \n",
- " Atelier | \n",
- " pricing_formula | \n",
- " 2023-11-08 19:30:28+00:00 | \n",
- " 1256137 | \n",
- "
\n",
- " \n",
- "
\n",
- "
1826672 rows × 8 columns
\n",
- "
"
- ],
- "text/plain": [
- " ticket_id product_id is_from_subscription supplier_name \\\n",
- "0 13070859 225251 False vente en ligne \n",
- "1 13070860 224914 False vente en ligne \n",
- "2 13070861 224914 False vente en ligne \n",
- "3 13070862 224914 False vente en ligne \n",
- "4 13070863 224914 False vente en ligne \n",
- "... ... ... ... ... \n",
- "1826667 20662815 405689 False vente en ligne \n",
- "1826668 20662816 403658 False vente en ligne \n",
- "1826669 20662817 403658 False vente en ligne \n",
- "1826670 20662818 403658 False vente en ligne \n",
- "1826671 20662819 403658 False vente en ligne \n",
- "\n",
- " type_of_ticket_name children purchase_date \\\n",
- "0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
- "1 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
- "2 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
- "3 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
- "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
- "... ... ... ... \n",
- "1826667 Atelier pricing_formula 2023-11-08 17:23:54+00:00 \n",
- "1826668 Atelier pricing_formula 2023-11-08 18:32:18+00:00 \n",
- "1826669 Atelier pricing_formula 2023-11-08 18:32:18+00:00 \n",
- "1826670 Atelier pricing_formula 2023-11-08 19:30:28+00:00 \n",
- "1826671 Atelier pricing_formula 2023-11-08 19:30:28+00:00 \n",
- "\n",
- " customer_id \n",
- "0 48187 \n",
- "1 48187 \n",
- "2 48187 \n",
- "3 48187 \n",
- "4 48187 \n",
- "... ... \n",
- "1826667 1256135 \n",
- "1826668 1256136 \n",
- "1826669 1256136 \n",
- "1826670 1256137 \n",
- "1826671 1256137 \n",
- "\n",
- "[1826672 rows x 8 columns]"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_ticket_information"
]
},
{
"cell_type": "markdown",
- "id": "ad2d0059-76d3-44b9-b0eb-0b0ca4d4ba75",
+ "id": "56e6ebd1",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
@@ -2411,25 +519,9 @@
{
"cell_type": "code",
"execution_count": 51,
- "id": "c1afe322-ff41-4760-819e-0195fed5b27d",
+ "id": "88fcde4b",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 20 entries, 0 to 19\n",
- "Data columns (total 2 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 opened_at 8 non-null object \n",
- " 1 opened_at_clean 8 non-null datetime64[ns, UTC]\n",
- "dtypes: datetime64[ns, UTC](1), object(1)\n",
- "memory usage: 448.0+ bytes\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Créer un DataFrame exemple\n",
"df_not_clean = df1_campaign_stats[['opened_at']].head(20)\n",
@@ -2445,7 +537,7 @@
},
{
"cell_type": "markdown",
- "id": "27ecf058-23eb-4018-abbd-68c4ebe7c786",
+ "id": "818f69db",
"metadata": {},
"source": [
"## Nettoyage, selection et fusion"
@@ -2454,190 +546,9 @@
{
"cell_type": "code",
"execution_count": 23,
- "id": "d887898c-6a21-41ed-901d-4d6fdbca5372",
+ "id": "c9654eda",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " ticket_id | \n",
- " product_id | \n",
- " is_from_subscription | \n",
- " type_of | \n",
- " supplier_name | \n",
- " purchase_date | \n",
- " customer_id | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 13070859 | \n",
- " 225251 | \n",
- " False | \n",
- " 1 | \n",
- " vente en ligne | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " 48187 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 13070860 | \n",
- " 224914 | \n",
- " False | \n",
- " 1 | \n",
- " vente en ligne | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " 48187 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 13070861 | \n",
- " 224914 | \n",
- " False | \n",
- " 1 | \n",
- " vente en ligne | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " 48187 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 13070862 | \n",
- " 224914 | \n",
- " False | \n",
- " 1 | \n",
- " vente en ligne | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " 48187 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 13070863 | \n",
- " 224914 | \n",
- " False | \n",
- " 1 | \n",
- " vente en ligne | \n",
- " 2018-12-28 14:47:50+00:00 | \n",
- " 48187 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 1826667 | \n",
- " 20662815 | \n",
- " 405689 | \n",
- " False | \n",
- " 1 | \n",
- " vente en ligne | \n",
- " 2023-11-08 17:23:54+00:00 | \n",
- " 1256135 | \n",
- "
\n",
- " \n",
- " 1826668 | \n",
- " 20662816 | \n",
- " 403658 | \n",
- " False | \n",
- " 1 | \n",
- " vente en ligne | \n",
- " 2023-11-08 18:32:18+00:00 | \n",
- " 1256136 | \n",
- "
\n",
- " \n",
- " 1826669 | \n",
- " 20662817 | \n",
- " 403658 | \n",
- " False | \n",
- " 1 | \n",
- " vente en ligne | \n",
- " 2023-11-08 18:32:18+00:00 | \n",
- " 1256136 | \n",
- "
\n",
- " \n",
- " 1826670 | \n",
- " 20662818 | \n",
- " 403658 | \n",
- " False | \n",
- " 1 | \n",
- " vente en ligne | \n",
- " 2023-11-08 19:30:28+00:00 | \n",
- " 1256137 | \n",
- "
\n",
- " \n",
- " 1826671 | \n",
- " 20662819 | \n",
- " 403658 | \n",
- " False | \n",
- " 1 | \n",
- " vente en ligne | \n",
- " 2023-11-08 19:30:28+00:00 | \n",
- " 1256137 | \n",
- "
\n",
- " \n",
- "
\n",
- "
1826672 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " ticket_id product_id is_from_subscription type_of supplier_name \\\n",
- "0 13070859 225251 False 1 vente en ligne \n",
- "1 13070860 224914 False 1 vente en ligne \n",
- "2 13070861 224914 False 1 vente en ligne \n",
- "3 13070862 224914 False 1 vente en ligne \n",
- "4 13070863 224914 False 1 vente en ligne \n",
- "... ... ... ... ... ... \n",
- "1826667 20662815 405689 False 1 vente en ligne \n",
- "1826668 20662816 403658 False 1 vente en ligne \n",
- "1826669 20662817 403658 False 1 vente en ligne \n",
- "1826670 20662818 403658 False 1 vente en ligne \n",
- "1826671 20662819 403658 False 1 vente en ligne \n",
- "\n",
- " purchase_date customer_id \n",
- "0 2018-12-28 14:47:50+00:00 48187 \n",
- "1 2018-12-28 14:47:50+00:00 48187 \n",
- "2 2018-12-28 14:47:50+00:00 48187 \n",
- "3 2018-12-28 14:47:50+00:00 48187 \n",
- "4 2018-12-28 14:47:50+00:00 48187 \n",
- "... ... ... \n",
- "1826667 2023-11-08 17:23:54+00:00 1256135 \n",
- "1826668 2023-11-08 18:32:18+00:00 1256136 \n",
- "1826669 2023-11-08 18:32:18+00:00 1256136 \n",
- "1826670 2023-11-08 19:30:28+00:00 1256137 \n",
- "1826671 2023-11-08 19:30:28+00:00 1256137 \n",
- "\n",
- "[1826672 rows x 7 columns]"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_ticket_information"
]
@@ -2645,37 +556,16 @@
{
"cell_type": "code",
"execution_count": 14,
- "id": "ac9a6373-c1c6-46b5-873b-dc22f17bcbdb",
+ "id": "7f2b620c",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 1826672 entries, 0 to 1826671\n",
- "Data columns (total 7 columns):\n",
- " # Column Dtype \n",
- "--- ------ ----- \n",
- " 0 ticket_id int64 \n",
- " 1 product_id int64 \n",
- " 2 is_from_subscription bool \n",
- " 3 type_of int64 \n",
- " 4 supplier_name object \n",
- " 5 purchase_date datetime64[ns, UTC]\n",
- " 6 customer_id int64 \n",
- "dtypes: bool(1), datetime64[ns, UTC](1), int64(4), object(1)\n",
- "memory usage: 85.4+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"df1_ticket_information.info()"
]
},
{
"cell_type": "markdown",
- "id": "b1719943-89eb-4ba0-a107-2f96d5d01ec9",
+ "id": "637bdb72",
"metadata": {},
"source": [
"# Customer information"
@@ -2683,7 +573,7 @@
},
{
"cell_type": "markdown",
- "id": "a2132ee2-3f22-45fd-b65b-72689c8b672c",
+ "id": "14c52894",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
@@ -2693,15 +583,15 @@
},
{
"cell_type": "code",
- "execution_count": 60,
- "id": "da5d4708-7147-4cc8-8686-52d4bcba5a7a",
+ "execution_count": 8,
+ "id": "d83abfbf",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_619/2625134041.py:3: SettingWithCopyWarning: \n",
+ "/tmp/ipykernel_15285/2625134041.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -2732,21 +622,9 @@
{
"cell_type": "code",
"execution_count": 62,
- "id": "b4fa5fe3-ce8e-4b0a-af94-fb468d241bad",
+ "id": "90d71b2c",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "id 5.080902\n",
- "dtype: float64"
- ]
- },
- "execution_count": 62,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_targets_test = df1_targets_full[['id', 'customer_id']].groupby(['customer_id']).count()\n",
"len(df1_targets_test[df1_targets_test['id'] > 1]) / len(df1_targets_test)\n",
@@ -2757,11 +635,9 @@
},
{
"cell_type": "code",
- "execution_count": 57,
- "id": "8072bbb7-1360-4882-bb2b-2f43b6beea0d",
- "metadata": {
- "scrolled": true
- },
+ "execution_count": 10,
+ "id": "2301de1e",
+ "metadata": {},
"outputs": [
{
"data": {
@@ -2793,226 +669,42 @@
" \n",
" \n",
" \n",
- " 8793 | \n",
- " 4584599 | \n",
- " 1 | \n",
- " consentement optin jeune public | \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " DDCP PROMO Réseau livres | \n",
" False | \n",
" manual_static_filter | \n",
"
\n",
" \n",
- " 13249 | \n",
- " 4567465 | \n",
- " 1 | \n",
- " DDCP rentrée culturelle 2023 | \n",
+ " 1 | \n",
+ " 210571 | \n",
+ " 2412 | \n",
+ " DDCP PROMO Réseau livres | \n",
" False | \n",
" manual_static_filter | \n",
"
\n",
" \n",
- " 21424 | \n",
- " 4544805 | \n",
- " 1 | \n",
- " spectateurs cine dimanche_cine concert_2122 | \n",
+ " 2 | \n",
+ " 210572 | \n",
+ " 4536 | \n",
+ " DDCP PROMO Réseau livres | \n",
" False | \n",
" manual_static_filter | \n",
"
\n",
" \n",
- " 21665 | \n",
- " 4544911 | \n",
- " 1 | \n",
- " DDCP Cine 2023 | \n",
+ " 3 | \n",
+ " 210573 | \n",
+ " 6736 | \n",
+ " DDCP PROMO Réseau livres | \n",
" False | \n",
" manual_static_filter | \n",
"
\n",
" \n",
- " 22811 | \n",
- " 4545766 | \n",
- " 1 | \n",
- " DDCP OLBJ! 2023 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 57305 | \n",
- " 4457909 | \n",
- " 1 | \n",
- " ddcp_promo_visiteurs occasionnels_musee_8mois | \n",
- " False | \n",
- " manual_dynamic_filter | \n",
- "
\n",
- " \n",
- " 58843 | \n",
- " 3688872 | \n",
- " 1 | \n",
- " DDCP promo livemag | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 66813 | \n",
- " 4313646 | \n",
- " 1 | \n",
- " DDCP spectateurs Classique mais pas que 2022 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 68367 | \n",
- " 4547662 | \n",
- " 1 | \n",
- " ddcp_promo_musee_au moins 3 achats_dps8mois | \n",
- " False | \n",
- " manual_dynamic_filter | \n",
- "
\n",
- " \n",
- " 77320 | \n",
- " 4285520 | \n",
- " 1 | \n",
- " DDCP spectateurs Iminente | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 84350 | \n",
- " 4037805 | \n",
- " 1 | \n",
- " DDCP spectateurs Marseille Jazz 18-19-21 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 85383 | \n",
- " 4569504 | \n",
- " 1 | \n",
- " DDCP rendez-vous de septembre offre spéciale | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 92868 | \n",
- " 4433064 | \n",
- " 1 | \n",
- " ddcp_promo_plein air_ateliers_jardins | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 99670 | \n",
- " 3858684 | \n",
- " 1 | \n",
- " Acid Arab | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 105477 | \n",
- " 4321810 | \n",
- " 1 | \n",
- " Arenametrix_bascule tel vers sib | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 169513 | \n",
- " 3697992 | \n",
- " 1 | \n",
- " ddcp_achats billets nb dps 19052021 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 214421 | \n",
- " 2925324 | \n",
- " 1 | \n",
- " consentement optout scolaires | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 234546 | \n",
- " 4575957 | \n",
- " 1 | \n",
- " Portrait de Leila shahid | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 259808 | \n",
- " 3722259 | \n",
- " 1 | \n",
- " consentement optin b2b | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 274380 | \n",
- " 4510423 | \n",
- " 1 | \n",
- " DDCP_marseille_jazz_2023 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 307511 | \n",
- " 5174466 | \n",
- " 1 | \n",
- " ddcp actoral 21-22 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 357509 | \n",
- " 4442526 | \n",
- " 1 | \n",
- " ddcp musique barvalo | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 392920 | \n",
- " 4390642 | \n",
- " 1 | \n",
- " ddcp_md_promo_spectateurs theatre contempo | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 449620 | \n",
- " 4411897 | \n",
- " 1 | \n",
- " FORMATION _ acheteurs optin last year | \n",
- " False | \n",
- " manual_dynamic_filter | \n",
- "
\n",
- " \n",
- " 503809 | \n",
- " 4734591 | \n",
- " 1 | \n",
- " consentement optin mediation specialisee | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 651222 | \n",
- " 3554426 | \n",
- " 1 | \n",
- " consentement optin b2c | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 654246 | \n",
- " 5182212 | \n",
- " 1 | \n",
- " DDCP spectateurs Festival de Marseille 2023 | \n",
- " False | \n",
- " manual_static_filter | \n",
- "
\n",
- " \n",
- " 654395 | \n",
- " 5182456 | \n",
- " 1 | \n",
- " rencontres_echelle_spectateurs_2021_2023 | \n",
+ " 4 | \n",
+ " 210574 | \n",
+ " 38210 | \n",
+ " DDCP PROMO Réseau livres | \n",
" False | \n",
" manual_static_filter | \n",
"
\n",
@@ -3021,79 +713,238 @@
""
],
"text/plain": [
- " id customer_id target_name \\\n",
- "8793 4584599 1 consentement optin jeune public \n",
- "13249 4567465 1 DDCP rentrée culturelle 2023 \n",
- "21424 4544805 1 spectateurs cine dimanche_cine concert_2122 \n",
- "21665 4544911 1 DDCP Cine 2023 \n",
- "22811 4545766 1 DDCP OLBJ! 2023 \n",
- "57305 4457909 1 ddcp_promo_visiteurs occasionnels_musee_8mois \n",
- "58843 3688872 1 DDCP promo livemag \n",
- "66813 4313646 1 DDCP spectateurs Classique mais pas que 2022 \n",
- "68367 4547662 1 ddcp_promo_musee_au moins 3 achats_dps8mois \n",
- "77320 4285520 1 DDCP spectateurs Iminente \n",
- "84350 4037805 1 DDCP spectateurs Marseille Jazz 18-19-21 \n",
- "85383 4569504 1 DDCP rendez-vous de septembre offre spéciale \n",
- "92868 4433064 1 ddcp_promo_plein air_ateliers_jardins \n",
- "99670 3858684 1 Acid Arab \n",
- "105477 4321810 1 Arenametrix_bascule tel vers sib \n",
- "169513 3697992 1 ddcp_achats billets nb dps 19052021 \n",
- "214421 2925324 1 consentement optout scolaires \n",
- "234546 4575957 1 Portrait de Leila shahid \n",
- "259808 3722259 1 consentement optin b2b \n",
- "274380 4510423 1 DDCP_marseille_jazz_2023 \n",
- "307511 5174466 1 ddcp actoral 21-22 \n",
- "357509 4442526 1 ddcp musique barvalo \n",
- "392920 4390642 1 ddcp_md_promo_spectateurs theatre contempo \n",
- "449620 4411897 1 FORMATION _ acheteurs optin last year \n",
- "503809 4734591 1 consentement optin mediation specialisee \n",
- "651222 3554426 1 consentement optin b2c \n",
- "654246 5182212 1 DDCP spectateurs Festival de Marseille 2023 \n",
- "654395 5182456 1 rencontres_echelle_spectateurs_2021_2023 \n",
+ " id customer_id target_name target_type_is_import \\\n",
+ "0 1184824 645400 DDCP PROMO Réseau livres False \n",
+ "1 210571 2412 DDCP PROMO Réseau livres False \n",
+ "2 210572 4536 DDCP PROMO Réseau livres False \n",
+ "3 210573 6736 DDCP PROMO Réseau livres False \n",
+ "4 210574 38210 DDCP PROMO Réseau livres False \n",
"\n",
- " target_type_is_import target_type_name \n",
- "8793 False manual_static_filter \n",
- "13249 False manual_static_filter \n",
- "21424 False manual_static_filter \n",
- "21665 False manual_static_filter \n",
- "22811 False manual_static_filter \n",
- "57305 False manual_dynamic_filter \n",
- "58843 False manual_static_filter \n",
- "66813 False manual_static_filter \n",
- "68367 False manual_dynamic_filter \n",
- "77320 False manual_static_filter \n",
- "84350 False manual_static_filter \n",
- "85383 False manual_static_filter \n",
- "92868 False manual_static_filter \n",
- "99670 False manual_static_filter \n",
- "105477 False manual_static_filter \n",
- "169513 False manual_static_filter \n",
- "214421 False manual_static_filter \n",
- "234546 False manual_static_filter \n",
- "259808 False manual_static_filter \n",
- "274380 False manual_static_filter \n",
- "307511 False manual_static_filter \n",
- "357509 False manual_static_filter \n",
- "392920 False manual_static_filter \n",
- "449620 False manual_dynamic_filter \n",
- "503809 False manual_static_filter \n",
- "651222 False manual_static_filter \n",
- "654246 False manual_static_filter \n",
- "654395 False manual_static_filter "
+ " target_type_name \n",
+ "0 manual_static_filter \n",
+ "1 manual_static_filter \n",
+ "2 manual_static_filter \n",
+ "3 manual_static_filter \n",
+ "4 manual_static_filter "
]
},
- "execution_count": 57,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df1_targets_full[df1_targets_full['customer_id'] == 1]"
+ "df1_targets_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "75fbc2f7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package stopwords is already up-to-date!\n",
+ "[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Catégorisation des target_name\n",
+ "import pandas as pd\n",
+ "import nltk\n",
+ "from nltk.tokenize import word_tokenize\n",
+ "from nltk.corpus import stopwords\n",
+ "from nltk.stem import WordNetLemmatizer\n",
+ "from nltk.probability import FreqDist\n",
+ "\n",
+ "# Téléchargement des ressources nécessaires\n",
+ "nltk.download('punkt')\n",
+ "nltk.download('stopwords')\n",
+ "nltk.download('wordnet')\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "55cddf92",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Mots les plus fréquents:\n",
+ "consentement: 550777\n",
+ "optin: 463579\n",
+ "jeune: 155103\n",
+ "public: 155103\n",
+ "mediation: 150001\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Définition des fonctions de tokenisation, suppression des mots vides et lemmatisation\n",
+ "def preprocess_text(texte):\n",
+ " # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
+ " texte_concat = ' '.join(texte)\n",
+ " \n",
+ " # Tokenisation des mots\n",
+ " tokens = word_tokenize(texte_concat.lower())\n",
+ " \n",
+ " # Suppression des mots vides (stopwords)\n",
+ " stop_words = set(stopwords.words('french'))\n",
+ " filtered_tokens = [word for word in tokens if word not in stop_words]\n",
+ " \n",
+ " # Lemmatisation des mots\n",
+ " lemmatizer = WordNetLemmatizer()\n",
+ " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
+ " \n",
+ " return lemmatized_tokens\n",
+ "\n",
+ "\n",
+ "# Appliquer le prétraitement à la colonne de texte\n",
+ "df1_targets_full['target_name_tokened'] = df1_targets_full['target_name'].apply(preprocess_text)\n",
+ "\n",
+ "# Concaténer les listes de mots pour obtenir une liste de tous les mots dans le corpus\n",
+ "all_words = [word for tokens in df1_targets_full['target_name_tokened'] for word in tokens]\n",
+ "\n",
+ "# Calculer la fréquence des mots\n",
+ "freq_dist = FreqDist(all_words)\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "7fd98a85",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Mots les plus fréquents:\n",
+ "consentement: 550777\n",
+ "optin: 463579\n",
+ "jeune: 155103\n",
+ "public: 155103\n",
+ "mediation: 150001\n",
+ "specialisee: 150001\n",
+ "b2c: 143432\n",
+ "optout: 97683\n",
+ "newsletter: 56022\n",
+ "(: 46084\n",
+ "): 46084\n",
+ "inscrits: 42296\n",
+ "nl: 42294\n",
+ "générale: 41037\n",
+ "generale: 40950\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Affichage des mots les plus fréquents\n",
+ "print(\"Mots les plus fréquents:\")\n",
+ "for mot, freq in freq_dist.most_common(15):\n",
+ " print(f\"{mot}: {freq}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "cf94bb1d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " texte \\\n",
+ "0 Le chat noir mange une souris. \n",
+ "1 Le chien blanc aboie. \n",
+ "\n",
+ " texte_preprocessed \n",
+ "0 [e, h, a, o, i, r, a, g, e, u, e, o, u, r, i, .] \n",
+ "1 [e, h, i, e, b, a, a, b, o, i, e, .] \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package stopwords is already up-to-date!\n",
+ "[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import nltk\n",
+ "from nltk.tokenize import word_tokenize\n",
+ "from nltk.corpus import stopwords\n",
+ "from nltk.stem import WordNetLemmatizer\n",
+ "\n",
+ "# Téléchargement des ressources nécessaires\n",
+ "nltk.download('punkt')\n",
+ "nltk.download('stopwords')\n",
+ "nltk.download('wordnet')\n",
+ "\n",
+ "# Création de la DataFrame d'exemple\n",
+ "data = {'texte': [\"Le chat noir mange une souris.\", \"Le chien blanc aboie.\"]}\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "# Fonction pour prétraiter le texte\n",
+ "def preprocess_text(texte):\n",
+ " # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
+ " texte_concat = ' '.join(texte)\n",
+ " \n",
+ " # Tokenisation des mots\n",
+ " tokens = word_tokenize(texte_concat.lower())\n",
+ " \n",
+ " # Suppression des mots vides (stopwords)\n",
+ " stop_words = set(stopwords.words('french'))\n",
+ " filtered_tokens = [word for word in tokens if word not in stop_words]\n",
+ " \n",
+ " # Lemmatisation des mots\n",
+ " lemmatizer = WordNetLemmatizer()\n",
+ " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
+ " \n",
+ " return lemmatized_tokens\n",
+ "\n",
+ "# Appliquer la fonction de prétraitement à la colonne de texte\n",
+ "df['texte_preprocessed'] = df['texte'].apply(preprocess_text)\n",
+ "\n",
+ "# Afficher le résultat\n",
+ "print(df)\n"
]
},
{
"cell_type": "markdown",
- "id": "2f665824-a026-4acd-8358-b408a61854b4",
+ "id": "711d3884",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
@@ -3104,34 +955,9 @@
{
"cell_type": "code",
"execution_count": 52,
- "id": "5d05203c-ea30-4208-a29f-fef7737c672e",
+ "id": "c25b5295",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
- "/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
- "/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# campaign_stats cleaning \n",
"df1_campaign_stats_clean = df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
@@ -3151,31 +977,9 @@
{
"cell_type": "code",
"execution_count": 53,
- "id": "8ac634cf-2a30-4ccc-a34d-0fd401a49aaa",
+ "id": "2a3de6a5",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 6214808 entries, 0 to 6214807\n",
- "Data columns (total 8 columns):\n",
- " # Column Dtype \n",
- "--- ------ ----- \n",
- " 0 id int64 \n",
- " 1 customer_id int64 \n",
- " 2 opened_at datetime64[ns, UTC]\n",
- " 3 sent_at datetime64[ns, UTC]\n",
- " 4 delivered_at datetime64[ns, UTC]\n",
- " 5 campaign_name object \n",
- " 6 campaign_service_id int64 \n",
- " 7 campaign_sent_at datetime64[ns, UTC]\n",
- "dtypes: datetime64[ns, UTC](4), int64(3), object(1)\n",
- "memory usage: 379.3+ MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"df1_campaigns_full.info()"
]
@@ -3183,235 +987,16 @@
{
"cell_type": "code",
"execution_count": 56,
- "id": "7d22cdd5-2060-4922-8e04-27b613d4ee27",
+ "id": "3fc1f446",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " customer_id | \n",
- " opened_at | \n",
- " sent_at | \n",
- " delivered_at | \n",
- " campaign_name | \n",
- " campaign_service_id | \n",
- " campaign_sent_at | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 19793 | \n",
- " 112597 | \n",
- " NaT | \n",
- " 2021-03-28 16:01:09+00:00 | \n",
- " 2021-03-28 16:24:18+00:00 | \n",
- " Le Mucem chez vous, gardons le lien #22 | \n",
- " 404 | \n",
- " 2021-03-27 23:00:00+00:00 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 14211 | \n",
- " 113666 | \n",
- " NaT | \n",
- " 2021-03-28 16:01:09+00:00 | \n",
- " 2021-03-28 16:21:02+00:00 | \n",
- " Le Mucem chez vous, gardons le lien #22 | \n",
- " 404 | \n",
- " 2021-03-27 23:00:00+00:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 13150 | \n",
- " 280561 | \n",
- " NaT | \n",
- " 2021-03-28 16:00:59+00:00 | \n",
- " 2021-03-28 16:08:45+00:00 | \n",
- " Le Mucem chez vous, gardons le lien #22 | \n",
- " 404 | \n",
- " 2021-03-27 23:00:00+00:00 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 7073 | \n",
- " 101007 | \n",
- " 2021-03-28 18:11:06+00:00 | \n",
- " 2021-03-28 16:00:59+00:00 | \n",
- " 2021-03-28 16:09:47+00:00 | \n",
- " Le Mucem chez vous, gardons le lien #22 | \n",
- " 404 | \n",
- " 2021-03-27 23:00:00+00:00 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5175 | \n",
- " 103972 | \n",
- " NaT | \n",
- " 2021-03-28 16:01:06+00:00 | \n",
- " 2021-03-28 16:05:03+00:00 | \n",
- " Le Mucem chez vous, gardons le lien #22 | \n",
- " 404 | \n",
- " 2021-03-27 23:00:00+00:00 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 6214803 | \n",
- " 8302994 | \n",
- " 266155 | \n",
- " 2023-10-23 09:43:25+00:00 | \n",
- " 2023-10-23 09:32:33+00:00 | \n",
- " 2023-10-23 09:32:34+00:00 | \n",
- " dre_nov_2023 | \n",
- " 1318 | \n",
- " 2023-10-23 09:31:17+00:00 | \n",
- "
\n",
- " \n",
- " 6214804 | \n",
- " 8303307 | \n",
- " 21355 | \n",
- " 2023-10-23 09:44:02+00:00 | \n",
- " 2023-10-23 09:32:49+00:00 | \n",
- " 2023-10-23 09:32:49+00:00 | \n",
- " dre_nov_2023 | \n",
- " 1318 | \n",
- " 2023-10-23 09:31:17+00:00 | \n",
- "
\n",
- " \n",
- " 6214805 | \n",
- " 8304346 | \n",
- " 21849 | \n",
- " 2023-10-23 09:45:52+00:00 | \n",
- " 2023-10-23 09:33:28+00:00 | \n",
- " 2023-10-23 09:33:29+00:00 | \n",
- " dre_nov_2023 | \n",
- " 1318 | \n",
- " 2023-10-23 09:31:17+00:00 | \n",
- "
\n",
- " \n",
- " 6214806 | \n",
- " 8302037 | \n",
- " 667789 | \n",
- " 2023-10-23 09:47:32+00:00 | \n",
- " 2023-10-23 09:31:53+00:00 | \n",
- " 2023-10-23 09:31:54+00:00 | \n",
- " dre_nov_2023 | \n",
- " 1318 | \n",
- " 2023-10-23 09:31:17+00:00 | \n",
- "
\n",
- " \n",
- " 6214807 | \n",
- " 8304939 | \n",
- " 294154 | \n",
- " NaT | \n",
- " 2023-10-23 09:33:54+00:00 | \n",
- " 2023-10-23 09:33:55+00:00 | \n",
- " dre_nov_2023 | \n",
- " 1318 | \n",
- " 2023-10-23 09:31:17+00:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
6214808 rows × 8 columns
\n",
- "
"
- ],
- "text/plain": [
- " id customer_id opened_at \\\n",
- "0 19793 112597 NaT \n",
- "1 14211 113666 NaT \n",
- "2 13150 280561 NaT \n",
- "3 7073 101007 2021-03-28 18:11:06+00:00 \n",
- "4 5175 103972 NaT \n",
- "... ... ... ... \n",
- "6214803 8302994 266155 2023-10-23 09:43:25+00:00 \n",
- "6214804 8303307 21355 2023-10-23 09:44:02+00:00 \n",
- "6214805 8304346 21849 2023-10-23 09:45:52+00:00 \n",
- "6214806 8302037 667789 2023-10-23 09:47:32+00:00 \n",
- "6214807 8304939 294154 NaT \n",
- "\n",
- " sent_at delivered_at \\\n",
- "0 2021-03-28 16:01:09+00:00 2021-03-28 16:24:18+00:00 \n",
- "1 2021-03-28 16:01:09+00:00 2021-03-28 16:21:02+00:00 \n",
- "2 2021-03-28 16:00:59+00:00 2021-03-28 16:08:45+00:00 \n",
- "3 2021-03-28 16:00:59+00:00 2021-03-28 16:09:47+00:00 \n",
- "4 2021-03-28 16:01:06+00:00 2021-03-28 16:05:03+00:00 \n",
- "... ... ... \n",
- "6214803 2023-10-23 09:32:33+00:00 2023-10-23 09:32:34+00:00 \n",
- "6214804 2023-10-23 09:32:49+00:00 2023-10-23 09:32:49+00:00 \n",
- "6214805 2023-10-23 09:33:28+00:00 2023-10-23 09:33:29+00:00 \n",
- "6214806 2023-10-23 09:31:53+00:00 2023-10-23 09:31:54+00:00 \n",
- "6214807 2023-10-23 09:33:54+00:00 2023-10-23 09:33:55+00:00 \n",
- "\n",
- " campaign_name campaign_service_id \\\n",
- "0 Le Mucem chez vous, gardons le lien #22 404 \n",
- "1 Le Mucem chez vous, gardons le lien #22 404 \n",
- "2 Le Mucem chez vous, gardons le lien #22 404 \n",
- "3 Le Mucem chez vous, gardons le lien #22 404 \n",
- "4 Le Mucem chez vous, gardons le lien #22 404 \n",
- "... ... ... \n",
- "6214803 dre_nov_2023 1318 \n",
- "6214804 dre_nov_2023 1318 \n",
- "6214805 dre_nov_2023 1318 \n",
- "6214806 dre_nov_2023 1318 \n",
- "6214807 dre_nov_2023 1318 \n",
- "\n",
- " campaign_sent_at \n",
- "0 2021-03-27 23:00:00+00:00 \n",
- "1 2021-03-27 23:00:00+00:00 \n",
- "2 2021-03-27 23:00:00+00:00 \n",
- "3 2021-03-27 23:00:00+00:00 \n",
- "4 2021-03-27 23:00:00+00:00 \n",
- "... ... \n",
- "6214803 2023-10-23 09:31:17+00:00 \n",
- "6214804 2023-10-23 09:31:17+00:00 \n",
- "6214805 2023-10-23 09:31:17+00:00 \n",
- "6214806 2023-10-23 09:31:17+00:00 \n",
- "6214807 2023-10-23 09:31:17+00:00 \n",
- "\n",
- "[6214808 rows x 8 columns]"
- ]
- },
- "execution_count": 56,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_campaigns_information"
]
},
{
"cell_type": "markdown",
- "id": "0a5b24f0-4bca-4cde-a6ba-eb130b38cac4",
+ "id": "20e69ee3",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
@@ -3422,264 +1007,9 @@
{
"cell_type": "code",
"execution_count": 37,
- "id": "bc63bc4e-6cc1-4d35-9635-faf55339e186",
+ "id": "d9cbdbce",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " name | \n",
- " service_id | \n",
- " created_at | \n",
- " updated_at | \n",
- " process_id | \n",
- " report_url | \n",
- " category | \n",
- " to_be_synced | \n",
- " identifier | \n",
- " sent_at | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1319613 | \n",
- " newsletter enseignants janvier 2022 | \n",
- " 721 | \n",
- " 2022-01-14 16:06:42.586321+01:00 | \n",
- " 2022-02-03 14:17:27.112963+01:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " False | \n",
- " aba3b6fd5d186d28e06ff97135cade7f | \n",
- " 2022-01-14 00:00:00+01:00 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 1319586 | \n",
- " lsf_janvier_2022 | \n",
- " 717 | \n",
- " 2022-01-07 11:30:35.315895+01:00 | \n",
- " 2022-02-03 14:17:27.116171+01:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " False | \n",
- " 788d986905533aba051261497ecffcbb | \n",
- " 2022-01-07 00:00:00+01:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 1319282 | \n",
- " Invitation à déjeuner au Mucem | Vernissage « ... | \n",
- " 591 | \n",
- " 2021-09-28 12:50:24.448752+02:00 | \n",
- " 2022-02-03 14:17:27.119582+01:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " False | \n",
- " 3493894fa4ea036cfc6433c3e2ee63b0 | \n",
- " 2021-09-28 00:00:00+02:00 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1319283 | \n",
- " Vacances de la Toussaint - centres des loisirs | \n",
- " 590 | \n",
- " 2021-09-28 18:01:04.692073+02:00 | \n",
- " 2022-02-03 14:17:27.124408+01:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " False | \n",
- " 08b255a5d42b89b0585260b6f2360bdd | \n",
- " 2021-09-28 00:00:00+02:00 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 1319636 | \n",
- " ddcp_promo_md_livemag | \n",
- " 730 | \n",
- " 2022-01-27 18:00:41.053069+01:00 | \n",
- " 2022-02-03 14:17:27.127607+01:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " False | \n",
- " d5cfead94f5350c12c322b5b664544c1 | \n",
- " 2022-01-27 00:00:00+01:00 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 952 | \n",
- " 1320072 | \n",
- " dre_gaza0106 | \n",
- " 881 | \n",
- " 2022-05-26 09:01:35.523639+02:00 | \n",
- " 2022-12-02 17:51:22.614046+01:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " False | \n",
- " 7504adad8bb96320eb3afdd4df6e1f60 | \n",
- " 2022-05-26 00:00:00+02:00 | \n",
- "
\n",
- " \n",
- " 953 | \n",
- " 661398 | \n",
- " DDCP Plan Bis 4 - Marketing direct - MJ5C | \n",
- " 183 | \n",
- " 2021-06-18 10:30:01.259578+02:00 | \n",
- " 2021-09-24 11:56:09.082785+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " False | \n",
- " cedebb6e872f539bef8c3f919874e9d7 | \n",
- " 2020-07-27 00:00:00+02:00 | \n",
- "
\n",
- " \n",
- " 954 | \n",
- " 1320487 | \n",
- " Invitation portes ouvertes amitiés | \n",
- " 988 | \n",
- " 2022-09-29 18:01:33.834090+02:00 | \n",
- " 2022-12-02 17:51:23.258324+01:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " False | \n",
- " 9908279ebbf1f9b250ba689db6a0222b | \n",
- " 2022-09-29 00:00:00+02:00 | \n",
- "
\n",
- " \n",
- " 955 | \n",
- " 906903 | \n",
- " DDCP PROMO La méditerranée des philosophes #3 ... | \n",
- " 310 | \n",
- " 2021-07-19 14:07:16.177390+02:00 | \n",
- " 2021-09-24 11:56:09.086101+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " False | \n",
- " 06eb61b839a0cefee4967c67ccb099dc | \n",
- " 2020-12-23 00:00:00+01:00 | \n",
- "
\n",
- " \n",
- " 956 | \n",
- " 579313 | \n",
- " ddcp_promo_automation_manuel_pre_visit | \n",
- " 481 | \n",
- " 2021-06-08 17:38:54.041310+02:00 | \n",
- " 2021-09-24 11:56:09.089394+02:00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " False | \n",
- " 9461cce28ebe3e76fb4b931c35a169b0 | \n",
- " 2021-06-08 00:00:00+02:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
957 rows × 11 columns
\n",
- "
"
- ],
- "text/plain": [
- " id name service_id \\\n",
- "0 1319613 newsletter enseignants janvier 2022 721 \n",
- "1 1319586 lsf_janvier_2022 717 \n",
- "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n",
- "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n",
- "4 1319636 ddcp_promo_md_livemag 730 \n",
- ".. ... ... ... \n",
- "952 1320072 dre_gaza0106 881 \n",
- "953 661398 DDCP Plan Bis 4 - Marketing direct - MJ5C 183 \n",
- "954 1320487 Invitation portes ouvertes amitiés 988 \n",
- "955 906903 DDCP PROMO La méditerranée des philosophes #3 ... 310 \n",
- "956 579313 ddcp_promo_automation_manuel_pre_visit 481 \n",
- "\n",
- " created_at updated_at \\\n",
- "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n",
- "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n",
- "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n",
- "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n",
- "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n",
- ".. ... ... \n",
- "952 2022-05-26 09:01:35.523639+02:00 2022-12-02 17:51:22.614046+01:00 \n",
- "953 2021-06-18 10:30:01.259578+02:00 2021-09-24 11:56:09.082785+02:00 \n",
- "954 2022-09-29 18:01:33.834090+02:00 2022-12-02 17:51:23.258324+01:00 \n",
- "955 2021-07-19 14:07:16.177390+02:00 2021-09-24 11:56:09.086101+02:00 \n",
- "956 2021-06-08 17:38:54.041310+02:00 2021-09-24 11:56:09.089394+02:00 \n",
- "\n",
- " process_id report_url category to_be_synced \\\n",
- "0 NaN NaN 0.0 False \n",
- "1 NaN NaN 0.0 False \n",
- "2 NaN NaN 0.0 False \n",
- "3 NaN NaN 0.0 False \n",
- "4 NaN NaN 0.0 False \n",
- ".. ... ... ... ... \n",
- "952 NaN NaN 0.0 False \n",
- "953 NaN NaN 0.0 False \n",
- "954 NaN NaN 0.0 False \n",
- "955 NaN NaN 0.0 False \n",
- "956 NaN NaN 0.0 False \n",
- "\n",
- " identifier sent_at \n",
- "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n",
- "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n",
- "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n",
- "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n",
- "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 \n",
- ".. ... ... \n",
- "952 7504adad8bb96320eb3afdd4df6e1f60 2022-05-26 00:00:00+02:00 \n",
- "953 cedebb6e872f539bef8c3f919874e9d7 2020-07-27 00:00:00+02:00 \n",
- "954 9908279ebbf1f9b250ba689db6a0222b 2022-09-29 00:00:00+02:00 \n",
- "955 06eb61b839a0cefee4967c67ccb099dc 2020-12-23 00:00:00+01:00 \n",
- "956 9461cce28ebe3e76fb4b931c35a169b0 2021-06-08 00:00:00+02:00 \n",
- "\n",
- "[957 rows x 11 columns]"
- ]
- },
- "execution_count": 37,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_campaigns"
]
@@ -3687,185 +1017,16 @@
{
"cell_type": "code",
"execution_count": 38,
- "id": "c19b321f-65f9-4d6c-8c1f-edb2eb9d70e7",
+ "id": "c07459f0",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " clicked_at | \n",
- " link_id | \n",
- " customer_id | \n",
- " created_at | \n",
- " updated_at | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " 2021-03-26 16:30:36+01:00 | \n",
- " 1 | \n",
- " 284033 | \n",
- " 2021-03-26 15:30:37.050161+01:00 | \n",
- " 2021-03-26 15:30:37.050161+01:00 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2 | \n",
- " 2021-03-26 17:16:34+01:00 | \n",
- " 2 | \n",
- " 119768 | \n",
- " 2021-03-26 16:16:34.950871+01:00 | \n",
- " 2021-03-26 16:16:34.950871+01:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 272 | \n",
- " 2021-03-28 20:03:32+02:00 | \n",
- " 42 | \n",
- " 113105 | \n",
- " 2021-03-28 18:03:32.736394+02:00 | \n",
- " 2021-03-28 18:03:32.736394+02:00 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 4 | \n",
- " 2021-03-26 17:43:19+01:00 | \n",
- " 3 | \n",
- " 272280 | \n",
- " 2021-03-26 16:43:19.338321+01:00 | \n",
- " 2021-03-26 16:43:19.338321+01:00 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5 | \n",
- " 2021-03-26 17:46:00+01:00 | \n",
- " 3 | \n",
- " 105095 | \n",
- " 2021-03-26 16:46:00.502945+01:00 | \n",
- " 2021-03-26 16:46:00.502945+01:00 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 151046 | \n",
- " 243553 | \n",
- " 2023-11-09 16:34:27+01:00 | \n",
- " 14666 | \n",
- " 998 | \n",
- " 2023-11-09 15:34:29.425425+01:00 | \n",
- " 2023-11-09 15:34:29.425425+01:00 | \n",
- "
\n",
- " \n",
- " 151047 | \n",
- " 243554 | \n",
- " 2023-11-09 16:34:35+01:00 | \n",
- " 14670 | \n",
- " 998 | \n",
- " 2023-11-09 15:34:37.505505+01:00 | \n",
- " 2023-11-09 15:34:37.505505+01:00 | \n",
- "
\n",
- " \n",
- " 151048 | \n",
- " 243559 | \n",
- " 2023-11-09 16:51:15+01:00 | \n",
- " 14686 | \n",
- " 82923 | \n",
- " 2023-11-09 15:51:17.439518+01:00 | \n",
- " 2023-11-09 15:51:17.439518+01:00 | \n",
- "
\n",
- " \n",
- " 151049 | \n",
- " 243561 | \n",
- " 2023-11-09 16:59:42+01:00 | \n",
- " 14677 | \n",
- " 82923 | \n",
- " 2023-11-09 15:59:44.030922+01:00 | \n",
- " 2023-11-09 15:59:44.030922+01:00 | \n",
- "
\n",
- " \n",
- " 151050 | \n",
- " 243564 | \n",
- " 2023-11-09 17:16:41+01:00 | \n",
- " 14691 | \n",
- " 1254355 | \n",
- " 2023-11-09 16:16:43.012932+01:00 | \n",
- " 2023-11-09 16:16:43.012932+01:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
151051 rows × 6 columns
\n",
- "
"
- ],
- "text/plain": [
- " id clicked_at link_id customer_id \\\n",
- "0 1 2021-03-26 16:30:36+01:00 1 284033 \n",
- "1 2 2021-03-26 17:16:34+01:00 2 119768 \n",
- "2 272 2021-03-28 20:03:32+02:00 42 113105 \n",
- "3 4 2021-03-26 17:43:19+01:00 3 272280 \n",
- "4 5 2021-03-26 17:46:00+01:00 3 105095 \n",
- "... ... ... ... ... \n",
- "151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n",
- "151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n",
- "151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n",
- "151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n",
- "151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n",
- "\n",
- " created_at updated_at \n",
- "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n",
- "1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n",
- "2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n",
- "3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n",
- "4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n",
- "... ... ... \n",
- "151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n",
- "151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n",
- "151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n",
- "151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n",
- "151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n",
- "\n",
- "[151051 rows x 6 columns]"
- ]
- },
- "execution_count": 38,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df1_link_stats"
]
},
{
"cell_type": "markdown",
- "id": "96ea2523-38dc-47ef-a49e-2c2d9ad0b1c6",
+ "id": "80ae4c42",
"metadata": {},
"source": [
"## Exploration variables"
@@ -3874,7 +1035,7 @@
{
"cell_type": "code",
"execution_count": 7,
- "id": "aaa41688-ea7e-4dba-851c-1f0b0ec43c71",
+ "id": "b50b8f95",
"metadata": {},
"outputs": [],
"source": [
@@ -3897,7 +1058,7 @@
{
"cell_type": "code",
"execution_count": 8,
- "id": "2fecc2e1-113f-46ed-9065-0b9ee416166e",
+ "id": "7e292935",
"metadata": {},
"outputs": [],
"source": [
@@ -3907,7 +1068,7 @@
{
"cell_type": "code",
"execution_count": 9,
- "id": "55f6170a-36fb-4efb-9810-f982883660cf",
+ "id": "05b6f2b0",
"metadata": {},
"outputs": [
{
@@ -3966,7 +1127,7 @@
{
"cell_type": "code",
"execution_count": 10,
- "id": "0030fd02-09e3-42f5-9c83-290458a38c29",
+ "id": "c9324d80",
"metadata": {},
"outputs": [],
"source": [
@@ -3981,7 +1142,7 @@
{
"cell_type": "code",
"execution_count": 11,
- "id": "6b1736d1-8fd7-4fcc-9431-b8bf0c7b4f2b",
+ "id": "10304058",
"metadata": {},
"outputs": [
{
@@ -4005,7 +1166,7 @@
{
"cell_type": "code",
"execution_count": 32,
- "id": "226b694b-0b00-4167-b69f-3178902254eb",
+ "id": "ffa423e5",
"metadata": {},
"outputs": [],
"source": [
@@ -4026,7 +1187,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "f51d8836-6eef-47d5-873d-4327e12a3245",
+ "id": "70bdc88d",
"metadata": {},
"outputs": [],
"source": []
@@ -4034,7 +1195,7 @@
{
"cell_type": "code",
"execution_count": 45,
- "id": "90b94363-a562-4633-ba27-622422e2368c",
+ "id": "6a0f567d",
"metadata": {},
"outputs": [],
"source": [
@@ -4053,7 +1214,7 @@
{
"cell_type": "code",
"execution_count": 63,
- "id": "fedbfbd2-698b-4846-9618-84a3c8d087c7",
+ "id": "1522d8cd",
"metadata": {},
"outputs": [],
"source": [
@@ -4063,7 +1224,7 @@
{
"cell_type": "code",
"execution_count": 66,
- "id": "8d365bb5-2ddc-4f68-b415-e21f960c2c0f",
+ "id": "b0e42a61",
"metadata": {},
"outputs": [],
"source": [
@@ -4077,7 +1238,7 @@
{
"cell_type": "code",
"execution_count": 68,
- "id": "fc37348d-b282-42ad-b768-c882148d8f66",
+ "id": "d299ae91",
"metadata": {},
"outputs": [
{