diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb index 9f3f20b..ced5bdf 100644 --- a/0_Cleaning_and_merge.ipynb +++ b/0_Cleaning_and_merge.ipynb @@ -38,8 +38,7 @@ "outputs": [], "source": [ "# Create filesystem object\n", - "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", - "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})" + "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},key ='WKTGH4YGUBAT3TR0OSUR', secret = 'g8ozi6ZUrBy8DzaAip4F7zOizbr4DKf4RgYNseqU', token = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJXS1RHSDRZR1VCQVQzVFIwT1NVUiIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiaHR0cHM6Ly9vbnl4aWEubGFiLmdyb3VwZS1nZW5lcy5mciJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTcwNzU4NjUwMCwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJhbnRvaW5lLmpvdWJyZWxAZW5zYWUuZnIiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZXhwIjoxNzA3NjczMDQ3LCJmYW1pbHlfbmFtZSI6IkpPVUJSRUwiLCJnaXZlbl9uYW1lIjoiQW50b2luZSIsImdyb3VwcyI6WyJiZGMyMzI0LXRlYW0xIl0sImlhdCI6MTcwNzU4NjY0NywiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiI1MjQ2MDZmMS1lYWM3LTQxZDgtYTEzMy04MGZjMDk0MGVlNzEiLCJuYW1lIjoiQW50b2luZSBKT1VCUkVMIiwicG9saWN5Ijoic3Rzb25seSIsInByZWZlcnJlZF91c2VybmFtZSI6ImFqb3VicmVsLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNlc3Npb25fc3RhdGUiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzaWQiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzdWIiOiIwNWYwZDk3Mi1jNWM4LTQyNmYtODAwZC00NmQ0OGU4NjkwMzUiLCJ0eXAiOiJCZWFyZXIifQ.-imw-N4bk1uCcQGobkxhsRoeBAqxC9rT7PifElbC7ODOStnwIulc7HRR2fmtiqI2PdyrfnVvzfmIPK1g056HbA')" ] }, { @@ -79,7 +78,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_492/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_42764/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(file_in)\n" ] } @@ -205,6 +204,7 @@ " # Base des fournisseurs\n", " suppliers = suppliers[['id', 'name']]\n", " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n", + " suppliers['supplier_name'] = suppliers['supplier_name'].fillna('')\n", "\n", " # Base des types de billets\n", " type_ofs = type_ofs[['id', 'name', 'children']]\n", @@ -242,17 +242,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_492/1591303091.py:5: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/3092893564.py:5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n", - "/tmp/ipykernel_492/1591303091.py:9: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/3092893564.py:9: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n", - "/tmp/ipykernel_492/1591303091.py:13: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/3092893564.py:10: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " suppliers['supplier_name'] = suppliers['supplier_name'].fillna('')\n", + "/tmp/ipykernel_42764/3092893564.py:14: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -386,169 +392,6 @@ "df1_ticket_information.head()" ] }, - { - "cell_type": "markdown", - "id": "37499eae-1a7f-4dce-83b0-ff942ccf7a9d", - "metadata": {}, - "source": [ - "### KPI tickets" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "043303fe-e90f-4689-a2a9-5d690555a045", - "metadata": {}, - "outputs": [], - "source": [ - "def tickets_kpi_function(tickets_information = None):\n", - " tickets_information_copy = tickets_information.copy()\n", - " tickets_information_copy['purchase_date_max'] = tickets_information_copy['purchase_date']\n", - " tickets_kpi = (tickets_information_copy[['product_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'purchase_date_max']]\n", - " .groupby(['product_id', 'customer_id'])\n", - " .agg({'ticket_id': 'count', \n", - " 'supplier_name': 'nunique',\n", - " 'purchase_date_max' : 'max',\n", - " 'purchase_date' : 'min'})\n", - " .reset_index()\n", - " )\n", - " \n", - " tickets_kpi.rename(columns = {'ticket_id' : 'nb_tickets', \n", - " 'supplier_name' : 'nb_suppliers', \n", - " 'purchase_date' : 'purchase_date_min'}, inplace = True)\n", - " \n", - " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n", - " \n", - " return tickets_kpi\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "5882234a-1ed5-4269-87a6-0d75613476e3", - "metadata": {}, - "outputs": [], - "source": [ - "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_ticket_information)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
product_idcustomer_idnb_ticketsnb_supplierspurchase_date_maxpurchase_date_mintime_between_purchase
01073102805422019-06-05 14:37:13+00:002019-06-05 14:18:38+00:000 days 00:18:35
111008954355112017-02-17 13:32:51+00:002017-02-17 13:32:51+00:000 days 00:00:00
211008954356112017-03-02 14:36:16+00:002017-03-02 14:36:16+00:000 days 00:00:00
311008954357112017-03-06 15:16:41+00:002017-03-06 15:16:41+00:000 days 00:00:00
411008954358112017-03-13 16:07:27+00:002017-03-13 16:07:27+00:000 days 00:00:00
\n", - "
" - ], - "text/plain": [ - " product_id customer_id nb_tickets nb_suppliers \\\n", - "0 107310 2805 4 2 \n", - "1 110089 54355 1 1 \n", - "2 110089 54356 1 1 \n", - "3 110089 54357 1 1 \n", - "4 110089 54358 1 1 \n", - "\n", - " purchase_date_max purchase_date_min time_between_purchase \n", - "0 2019-06-05 14:37:13+00:00 2019-06-05 14:18:38+00:00 0 days 00:18:35 \n", - "1 2017-02-17 13:32:51+00:00 2017-02-17 13:32:51+00:00 0 days 00:00:00 \n", - "2 2017-03-02 14:36:16+00:00 2017-03-02 14:36:16+00:00 0 days 00:00:00 \n", - "3 2017-03-06 15:16:41+00:00 2017-03-06 15:16:41+00:00 0 days 00:00:00 \n", - "4 2017-03-13 16:07:27+00:00 2017-03-13 16:07:27+00:00 0 days 00:00:00 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_tickets_kpi.head()" - ] - }, { "cell_type": "markdown", "id": "096e47f4-1d65-4575-989d-83227eedad2b", @@ -559,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "id": "baed146a-9d3a-4397-a812-3d50c9a2f038", "metadata": {}, "outputs": [], @@ -588,7 +431,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "id": "5fbfd88b-b94c-489c-9201-670e96e453e7", "metadata": {}, "outputs": [ @@ -596,7 +439,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_492/3848597476.py:4: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/3848597476.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -608,165 +451,6 @@ "df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)" ] }, - { - "cell_type": "code", - "execution_count": 16, - "id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_id
target_name
consentement optin mediation specialisee150000
consentement optin jeune public149979
consentement optin b2c108909
Arenametrix_bascule tel vers sib35216
consentement optout b2c34523
\n", - "
" - ], - "text/plain": [ - " customer_id\n", - "target_name \n", - "consentement optin mediation specialisee 150000\n", - "consentement optin jeune public 149979\n", - "consentement optin b2c 108909\n", - "Arenametrix_bascule tel vers sib 35216\n", - "consentement optout b2c 34523" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False).head()" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "4417ff51-f501-4ab9-a192-4ab75764a8ed", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_id
target_name
Arenametrix_bascule tel vers sib35216
Autres_interet_exposition1021
COM Inscrits NL générale (historique)23005
Contacts_prenomsdoubles11643
DDCP MD Procès du Siècle1684
\n", - "
" - ], - "text/plain": [ - " customer_id\n", - "target_name \n", - "Arenametrix_bascule tel vers sib 35216\n", - "Autres_interet_exposition 1021\n", - "COM Inscrits NL générale (historique) 23005\n", - "Contacts_prenomsdoubles 11643\n", - "DDCP MD Procès du Siècle 1684" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n", - "df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000].head()" - ] - }, { "cell_type": "markdown", "id": "cdbb48b4-5e16-4ef4-8791-ed213d68d52f", @@ -777,7 +461,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85", "metadata": {}, "outputs": [], @@ -802,7 +486,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 14, "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f", "metadata": {}, "outputs": [ @@ -810,19 +494,19 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_492/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_492/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_492/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_42764/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -837,7 +521,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 15, "id": "c24457e7-3cad-451a-a65b-7373b656bd6e", "metadata": { "scrolled": true @@ -957,7 +641,7 @@ "4 404 2021-03-27 23:00:00+00:00 " ] }, - "execution_count": 20, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -966,159 +650,12 @@ "df1_campaigns_information.head()" ] }, - { - "cell_type": "code", - "execution_count": 21, - "id": "e2c88552-b863-47a2-be23-8d2898fb28bc", - "metadata": {}, - "outputs": [], - "source": [ - "def campaigns_kpi_function(campaigns_information = None):\n", - " # Nombre de campagnes de mails\n", - " nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n", - " nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n", - " # Temps d'ouverture en min moyen \n", - " campaigns_information['time_to_open'] = campaigns_information['opened_at'] - campaigns_information['delivered_at']\n", - " time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()\n", - "\n", - " # Nombre de mail ouvert \n", - " opened_campaign = campaigns_information[['customer_id', 'campaign_name', 'opened_at']]\n", - " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n", - " opened_campaign = opened_campaign[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n", - " opened_campaign.rename(columns = {'campaign_name' : 'nb_campaigns_opened' }, inplace = True)\n", - "\n", - " # Fusion des indicateurs\n", - " campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')\n", - " campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n", - "\n", - " # Remplir les NaN : nb_campaigns_opened\n", - " campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n", - "\n", - " # Remplir les NaT : time_to_open (??)\n", - "\n", - " return campaigns_reduced\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "24537647-bc29-4777-9848-ac4120a4aa60", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_492/3700263836.py:11: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n" - ] - } - ], - "source": [ - "df1_campaigns_kpi = campaigns_kpi_function(campaigns_information = df1_campaigns_information) " - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idnb_campaignsnb_campaigns_openedtime_to_open
0240.0NaT
13222124.01 days 00:28:30.169354838
2477.01 days 04:31:01.428571428
3540.0NaT
46200.0NaT
\n", - "
" - ], - "text/plain": [ - " customer_id nb_campaigns nb_campaigns_opened time_to_open\n", - "0 2 4 0.0 NaT\n", - "1 3 222 124.0 1 days 00:28:30.169354838\n", - "2 4 7 7.0 1 days 04:31:01.428571428\n", - "3 5 4 0.0 NaT\n", - "4 6 20 0.0 NaT" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_kpi.head()" - ] - }, { "cell_type": "markdown", "id": "56520a97-ede8-4920-a211-3b5b136af33d", "metadata": {}, "source": [ - "## Create Products Table" + "## Product area" ] }, { @@ -1131,7 +668,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 16, "id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d", "metadata": {}, "outputs": [], @@ -1142,7 +679,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 17, "id": "607eb4b4-eed9-4b50-b823-f75c116dd37c", "metadata": {}, "outputs": [], @@ -1213,7 +750,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 18, "id": "350b09b9-451f-4d47-81fe-f34b892db027", "metadata": {}, "outputs": [], @@ -1301,7 +838,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 19, "id": "0fccc8ef-e575-4857-a401-94a7274394df", "metadata": {}, "outputs": [ @@ -1454,7 +991,7 @@ "4 indiv entrées tp " ] }, - "execution_count": 27, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1466,7 +1003,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 20, "id": "779d8aaf-6668-4f66-8852-847304407ea3", "metadata": {}, "outputs": [ @@ -1636,7 +1173,7 @@ "4 spectacle vivant mucem " ] }, - "execution_count": 28, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1648,7 +1185,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 21, "id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0", "metadata": {}, "outputs": [ @@ -1747,7 +1284,7 @@ "4 37 383 269 1" ] }, - "execution_count": 29, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1767,7 +1304,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 22, "id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba", "metadata": {}, "outputs": [], @@ -1789,13 +1326,13 @@ " products_global = order_columns_id(products_global)\n", "\n", " # remove useless columns \n", - " products_global = products_global.drop(columns = ['type_of_id', 'name_events', 'name_seasons', 'name_categories'])\n", + " products_global = products_global.drop(columns = ['type_of_id']) # 'name_events', 'name_seasons', 'name_categories'\n", " return products_global" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 23, "id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951", "metadata": {}, "outputs": [ @@ -1849,12 +1386,15 @@ " id_representation_cap\n", " season_id\n", " facility_id\n", - " event_type_id\n", + " ...\n", " event_type_key_id\n", " facility_key_id\n", " street_id\n", " amount\n", " is_full_price\n", + " name_categories\n", + " name_events\n", + " name_seasons\n", " name_event_types\n", " name_facilities\n", " \n", @@ -1872,12 +1412,15 @@ " 8789\n", " 4\n", " 1\n", - " 2\n", + " ...\n", " 5\n", " 1\n", " 1\n", " 9.0\n", " False\n", + " indiv activité tr\n", + " visite-jeu \"le classico des minots\" (1h30)\n", + " 2017\n", " offre muséale individuel\n", " mucem\n", " \n", @@ -1893,12 +1436,15 @@ " 390\n", " 2\n", " 1\n", - " 2\n", + " ...\n", " 2\n", " 1\n", " 1\n", " 9.5\n", " False\n", + " indiv entrées tp\n", + " billet mucem picasso\n", + " 2016\n", " offre muséale individuel\n", " mucem\n", " \n", @@ -1914,12 +1460,15 @@ " 395\n", " 2\n", " 1\n", - " 2\n", + " ...\n", " 2\n", " 1\n", " 1\n", " 11.5\n", " False\n", + " indiv entrées tp\n", + " billet mucem picasso\n", + " 2016\n", " offre muséale individuel\n", " mucem\n", " \n", @@ -1935,12 +1484,15 @@ " 120199\n", " 1754\n", " 1\n", - " 2\n", + " ...\n", " 4\n", " 1\n", " 1\n", " 8.0\n", " False\n", + " indiv entrées tr\n", + " NaN\n", + " NaN\n", " offre muséale individuel\n", " mucem\n", " \n", @@ -1956,17 +1508,21 @@ " 21\n", " 4\n", " 1\n", - " 3\n", + " ...\n", " 6\n", " 1\n", " 1\n", " 8.5\n", " False\n", + " indiv entrées tp\n", + " non défini\n", + " 2017\n", " non défini\n", " mucem\n", " \n", " \n", "\n", + "

5 rows × 21 columns

\n", "" ], "text/plain": [ @@ -1984,22 +1540,38 @@ "3 156773 1 12365 120199 \n", "4 1175 1 8 21 \n", "\n", - " season_id facility_id event_type_id event_type_key_id facility_key_id \\\n", - "0 4 1 2 5 1 \n", - "1 2 1 2 2 1 \n", - "2 2 1 2 2 1 \n", - "3 1754 1 2 4 1 \n", - "4 4 1 3 6 1 \n", + " season_id facility_id ... event_type_key_id facility_key_id street_id \\\n", + "0 4 1 ... 5 1 1 \n", + "1 2 1 ... 2 1 1 \n", + "2 2 1 ... 2 1 1 \n", + "3 1754 1 ... 4 1 1 \n", + "4 4 1 ... 6 1 1 \n", "\n", - " street_id amount is_full_price name_event_types name_facilities \n", - "0 1 9.0 False offre muséale individuel mucem \n", - "1 1 9.5 False offre muséale individuel mucem \n", - "2 1 11.5 False offre muséale individuel mucem \n", - "3 1 8.0 False offre muséale individuel mucem \n", - "4 1 8.5 False non défini mucem " + " amount is_full_price name_categories \\\n", + "0 9.0 False indiv activité tr \n", + "1 9.5 False indiv entrées tp \n", + "2 11.5 False indiv entrées tp \n", + "3 8.0 False indiv entrées tr \n", + "4 8.5 False indiv entrées tp \n", + "\n", + " name_events name_seasons \\\n", + "0 visite-jeu \"le classico des minots\" (1h30) 2017 \n", + "1 billet mucem picasso 2016 \n", + "2 billet mucem picasso 2016 \n", + "3 NaN NaN \n", + "4 non défini 2017 \n", + "\n", + " name_event_types name_facilities \n", + "0 offre muséale individuel mucem \n", + "1 offre muséale individuel mucem \n", + "2 offre muséale individuel mucem \n", + "3 offre muséale individuel mucem \n", + "4 non défini mucem \n", + "\n", + "[5 rows x 21 columns]" ] }, - "execution_count": 31, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -2011,13 +1583,1076 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 24, "id": "98f78cd5-b694-4cc6-b033-20170aa13e8d", "metadata": {}, "outputs": [], "source": [ "# Fusion liée au product\n", - "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')" + "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n", + "\n", + "# Selection des variables d'intérêts\n", + "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]" + ] + }, + { + "cell_type": "markdown", + "id": "d7c3668a-c016-4bd0-837e-04af328ff14f", + "metadata": {}, + "source": [ + "# Construction des variables explicatives" + ] + }, + { + "cell_type": "markdown", + "id": "314f1b7f-ae48-4c6f-8469-9ce879043243", + "metadata": {}, + "source": [ + "## KPI campaigns" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "e2c88552-b863-47a2-be23-8d2898fb28bc", + "metadata": {}, + "outputs": [], + "source": [ + "def campaigns_kpi_function(campaigns_information = None):\n", + " # Nombre de campagnes de mails\n", + " nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n", + " nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n", + " # Temps d'ouverture en min moyen \n", + " campaigns_information['time_to_open'] = campaigns_information['opened_at'] - campaigns_information['delivered_at']\n", + " time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()\n", + "\n", + " # Nombre de mail ouvert \n", + " opened_campaign = campaigns_information[['customer_id', 'campaign_name', 'opened_at']]\n", + " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n", + " opened_campaign = opened_campaign[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n", + " opened_campaign.rename(columns = {'campaign_name' : 'nb_campaigns_opened' }, inplace = True)\n", + "\n", + " # Fusion des indicateurs\n", + " campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')\n", + " campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n", + "\n", + " # Remplir les NaN : nb_campaigns_opened\n", + " campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n", + "\n", + " # Remplir les NaT : time_to_open (??)\n", + "\n", + " return campaigns_reduced\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "24537647-bc29-4777-9848-ac4120a4aa60", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_42764/3700263836.py:11: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n" + ] + } + ], + "source": [ + "df1_campaigns_kpi = campaigns_kpi_function(campaigns_information = df1_campaigns_information) " + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idnb_campaignsnb_campaigns_openedtime_to_open
0240.0NaT
13222124.01 days 00:28:30.169354838
2477.01 days 04:31:01.428571428
3540.0NaT
46200.0NaT
\n", + "
" + ], + "text/plain": [ + " customer_id nb_campaigns nb_campaigns_opened time_to_open\n", + "0 2 4 0.0 NaT\n", + "1 3 222 124.0 1 days 00:28:30.169354838\n", + "2 4 7 7.0 1 days 04:31:01.428571428\n", + "3 5 4 0.0 NaT\n", + "4 6 20 0.0 NaT" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_campaigns_kpi.head()" + ] + }, + { + "cell_type": "markdown", + "id": "d4dcfbe0-c6ce-497e-b75e-dc9e938801b2", + "metadata": {}, + "source": [ + "## KPI tickets" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "b913a69e-3146-4919-b5f6-a6108532bffa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['spectacle vivant', 'offre muséale individuel', 'formule adhésion',\n", + " 'offre muséale groupe'], dtype=object)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_products_purchased_reduced['name_event_types'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "2bda0b97-b28b-4070-a57d-aeab0e2f7dfe", + "metadata": {}, + "outputs": [], + "source": [ + "# Nombre de client assistant à plus de 2 type d'événement\n", + "nb_event_types = df1_products_purchased_reduced[['customer_id', 'name_event_types']].groupby('customer_id').nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "043303fe-e90f-4689-a2a9-5d690555a045", + "metadata": {}, + "outputs": [], + "source": [ + "def tickets_kpi_function(tickets_information = None):\n", + "\n", + " tickets_information_copy = tickets_information.copy()\n", + "\n", + " # Dummy : Canal de vente en ligne\n", + " liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] # vad = vente à distance\n", + " tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n", + "\n", + " # Proportion de vente en ligne\n", + " prop_vente_internet = tickets_information_copy[tickets_information_copy['vente_internet'] == 1].groupby(['customer_id', 'event_type_id'])['ticket_id'].count().reset_index()\n", + " prop_vente_internet.rename(columns = {'ticket_id' : 'nb_tickets_internet'}, inplace = True)\n", + " \n", + " tickets_kpi = (tickets_information_copy[['event_type_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'amount', 'vente_internet']]\n", + " .groupby(['customer_id', 'event_type_id']) \n", + " .agg({'ticket_id': 'count', \n", + " 'amount' : 'sum',\n", + " 'supplier_name': 'nunique',\n", + " 'vente_internet' : 'max',\n", + " 'purchase_date' : ['min', 'max']})\n", + " .reset_index()\n", + " )\n", + " \n", + " tickets_kpi.columns = tickets_kpi.columns.map('_'.join)\n", + " \n", + " tickets_kpi.rename(columns = {'ticket_id_count' : 'nb_tickets', \n", + " 'amount_sum' : 'total_amount',\n", + " 'supplier_name_nunique' : 'nb_suppliers', \n", + " 'customer_id_' : 'customer_id',\n", + " 'event_type_id_' : 'event_type_id'}, inplace = True)\n", + " \n", + " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n", + "\n", + " tickets_kpi = tickets_kpi.merge(prop_vente_internet, on = ['customer_id', 'event_type_id'], how = 'left')\n", + " tickets_kpi['nb_tickets_internet'] = tickets_kpi['nb_tickets_internet'].fillna(0)\n", + " \n", + " return tickets_kpi\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "5882234a-1ed5-4269-87a6-0d75613476e3", + "metadata": {}, + "outputs": [], + "source": [ + "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_products_purchased_reduced)" + ] + }, + { + "cell_type": "markdown", + "id": "597b241e-a83d-4b7c-8ad7-eec50295dff2", + "metadata": {}, + "source": [ + "#### Exportation" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "a4a2311d-8a72-4030-afd5-218004d5d2a5", + "metadata": {}, + "outputs": [], + "source": [ + "# Exportation vers 'projet-bdc2324-team1'\n", + "BUCKET_OUT = \"projet-bdc2324-team1\"\n", + "FILE_KEY_OUT_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n", + "FILE_PATH_OUT_S3 = BUCKET_OUT + \"/\" + FILE_KEY_OUT_S3\n", + "\n", + "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n", + " df1_tickets_kpi.to_csv(file_out, index = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idevent_type_idnb_ticketstotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internet
1144532423248965.5612013-09-23 14:45:01+00:002023-11-03 14:11:01+00:003692 days 23:26:002988.0
0123842262686540.5712014-12-03 14:55:37+00:002023-11-04 15:12:16+00:003258 days 00:16:3951.0
3162173561435871.5512017-01-01 02:20:08+00:002019-12-31 02:20:06+00:001093 days 23:59:585.0
2152017501459190.0612013-06-10 10:37:58+00:002023-11-08 15:59:45+00:003803 days 05:21:479.0
503267336142080.0312017-01-11 15:00:54+00:002019-11-27 09:47:06+00:001049 days 18:46:1213497.0
\n", + "
" + ], + "text/plain": [ + " customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n", + "1 1 4 453242 3248965.5 6 \n", + "0 1 2 384226 2686540.5 7 \n", + "3 1 6 217356 1435871.5 5 \n", + "2 1 5 201750 1459190.0 6 \n", + "5032 6733 6 14208 0.0 3 \n", + "\n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "1 1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n", + "0 1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n", + "3 1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n", + "2 1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n", + "5032 1 2017-01-11 15:00:54+00:00 2019-11-27 09:47:06+00:00 \n", + "\n", + " time_between_purchase nb_tickets_internet \n", + "1 3692 days 23:26:00 2988.0 \n", + "0 3258 days 00:16:39 51.0 \n", + "3 1093 days 23:59:58 5.0 \n", + "2 3803 days 05:21:47 9.0 \n", + "5032 1049 days 18:46:12 13497.0 " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_tickets_kpi.sort_values(by='nb_tickets', ascending=False).head(5)" + ] + }, + { + "cell_type": "markdown", + "id": "f1d7f7ba-361b-467d-b375-b09c149185f7", + "metadata": {}, + "source": [ + "## Alexis' work" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "4ab1c0d2-0097-4669-b984-b6822c976740", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
event_type_idavg_amount
026.150659
147.762474
254.452618
366.439463
\n", + "
" + ], + "text/plain": [ + " event_type_id avg_amount\n", + "0 2 6.150659\n", + "1 4 7.762474\n", + "2 5 4.452618\n", + "3 6 6.439463" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_amount = (df1_products_purchased_reduced.groupby([\"event_type_id\"])\n", + " .agg({\"amount\" : \"mean\"}).reset_index()\n", + " .rename(columns = {'amount' : 'avg_amount'}))\n", + "\n", + "avg_amount" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "a9c62b39-389e-4dac-89a6-ac8a59fea58a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idevent_type_idnb_ticketsavg_amount
0123842266.150659
1144532427.762474
2152017504.452618
3162173566.439463
4221436.150659
\n", + "
" + ], + "text/plain": [ + " customer_id event_type_id nb_tickets avg_amount\n", + "0 1 2 384226 6.150659\n", + "1 1 4 453242 7.762474\n", + "2 1 5 201750 4.452618\n", + "3 1 6 217356 6.439463\n", + "4 2 2 143 6.150659" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nb_tickets = (df1_products_purchased_reduced.groupby([\"customer_id\", \"event_type_id\"])\n", + " .agg({\"ticket_id\" : \"count\"}).reset_index()\n", + " .rename(columns = {'ticket_id' : 'nb_tickets'})\n", + " .merge(avg_amount, how='left', on='event_type_id'))\n", + "nb_tickets.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "8710611c-7eb8-45ca-bdcc-009f4081f9e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguage...average_ticket_baskettotal_pricepurchase_countfirst_buying_datecountryagetenant_idnb_campaignsnb_campaigns_openedtime_to_open
012751NaN2False1TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
112825NaN2False2TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
211261NaN2False1TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
313071NaN2False2TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
4653061NaN10False2TrueFalseNaNNaNNaN...NaNNaN0NaTNaNNaN131180.02.00 days 19:53:02.500000
\n", + "

5 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " customer_id birthdate street_id is_partner gender is_email_true \\\n", + "0 12751 NaN 2 False 1 True \n", + "1 12825 NaN 2 False 2 True \n", + "2 11261 NaN 2 False 1 True \n", + "3 13071 NaN 2 False 2 True \n", + "4 653061 NaN 10 False 2 True \n", + "\n", + " opt_in structure_id profession language ... average_ticket_basket \\\n", + "0 True NaN NaN NaN ... NaN \n", + "1 True NaN NaN NaN ... NaN \n", + "2 True NaN NaN NaN ... NaN \n", + "3 True NaN NaN NaN ... NaN \n", + "4 False NaN NaN NaN ... NaN \n", + "\n", + " total_price purchase_count first_buying_date country age tenant_id \\\n", + "0 NaN 0 NaT fr NaN 1311 \n", + "1 NaN 0 NaT fr NaN 1311 \n", + "2 NaN 0 NaT fr NaN 1311 \n", + "3 NaN 0 NaT fr NaN 1311 \n", + "4 NaN 0 NaT NaN NaN 1311 \n", + "\n", + " nb_campaigns nb_campaigns_opened time_to_open \n", + "0 NaN NaN NaT \n", + "1 NaN NaN NaT \n", + "2 NaN NaN NaT \n", + "3 NaN NaN NaT \n", + "4 80.0 2.0 0 days 19:53:02.500000 \n", + "\n", + "[5 rows x 28 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fusion avec KPI campaigns liés au customer\n", + "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n", + "df1_customer.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape : (156289, 31)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguage...first_buying_datecountryagetenant_idnb_campaignsnb_campaigns_openedtime_to_openevent_type_idnb_ticketsavg_amount
012751NaN2False1TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
112825NaN2False2TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
211261NaN2False1TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
313071NaN2False2TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
4653061NaN10False2TrueFalseNaNNaNNaN...NaTNaNNaN131180.02.00 days 19:53:02.500000NaNNaNNaN
\n", + "

5 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " customer_id birthdate street_id is_partner gender is_email_true \\\n", + "0 12751 NaN 2 False 1 True \n", + "1 12825 NaN 2 False 2 True \n", + "2 11261 NaN 2 False 1 True \n", + "3 13071 NaN 2 False 2 True \n", + "4 653061 NaN 10 False 2 True \n", + "\n", + " opt_in structure_id profession language ... first_buying_date country \\\n", + "0 True NaN NaN NaN ... NaT fr \n", + "1 True NaN NaN NaN ... NaT fr \n", + "2 True NaN NaN NaN ... NaT fr \n", + "3 True NaN NaN NaN ... NaT fr \n", + "4 False NaN NaN NaN ... NaT NaN \n", + "\n", + " age tenant_id nb_campaigns nb_campaigns_opened time_to_open \\\n", + "0 NaN 1311 NaN NaN NaT \n", + "1 NaN 1311 NaN NaN NaT \n", + "2 NaN 1311 NaN NaN NaT \n", + "3 NaN 1311 NaN NaN NaT \n", + "4 NaN 1311 80.0 2.0 0 days 19:53:02.500000 \n", + "\n", + " event_type_id nb_tickets avg_amount \n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n", + "print(\"shape : \", df1_customer_product.shape)\n", + "df1_customer_product.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd", + "metadata": {}, + "outputs": [], + "source": [ + "# df1_customer_product.to_csv(\"customer_product.csv\", index = False)" ] }, { @@ -2030,7 +2665,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 42, "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b", "metadata": {}, "outputs": [], @@ -2041,13 +2676,23 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 43, "id": "1e42a790-b215-4107-a969-85005da06ebd", "metadata": {}, "outputs": [], "source": [ "# Fusion avec KPI liés au comportement d'achat\n", - "# df1_customer_product = pd.merge(df1_customer, df1_products_purchased, on = 'customer_id', how = 'left')" + "df1_customer_product = pd.merge(df1_tickets_kpi, df1_customer, on = 'customer_id', how = 'outer')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f", + "metadata": {}, + "outputs": [], + "source": [ + "# df1_customer_product" ] } ], diff --git a/Exploration_billet_AJ.ipynb b/Exploration_billet_AJ.ipynb index 13d581c..bec456e 100644 --- a/Exploration_billet_AJ.ipynb +++ b/Exploration_billet_AJ.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "56b3d44e-1e3f-4726-9916-0f9af107860e", + "id": "5bf5c226", "metadata": {}, "source": [ "# Business Data Challenge - Team 1" @@ -11,7 +11,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "15103481-8d74-404c-aa09-7601fe7730da", + "id": "b1a5b9d3", "metadata": {}, "outputs": [], "source": [ @@ -24,7 +24,7 @@ }, { "cell_type": "markdown", - "id": "c3bb0d13-34b2-4e1c-9985-468cd87c5a0e", + "id": "ecfa2219", "metadata": {}, "source": [ "Configuration de l'accès aux données" @@ -33,7 +33,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4", + "id": "1a094277", "metadata": {}, "outputs": [], "source": [ @@ -44,7 +44,7 @@ }, { "cell_type": "markdown", - "id": "f99da24f-0d93-4618-92bc-3ba81dc0445c", + "id": "c437eaec", "metadata": {}, "source": [ "# Exemple sur Company 1" @@ -52,7 +52,7 @@ }, { "cell_type": "markdown", - "id": "9d74b68f-ba07-4a15-9a27-dae931762d70", + "id": "a1c1fc39", "metadata": {}, "source": [ "## Chargement données" @@ -61,7 +61,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "699664b9-eee4-4f8d-a207-e524526560c5", + "id": "66f8c17b", "metadata": {}, "outputs": [], "source": [ @@ -72,17 +72,9 @@ { "cell_type": "code", "execution_count": 5, - "id": "0cb92854-903b-4efd-ac1b-197e29f044b4", + "id": "c08e6798", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['bdc2324-data/1/1purchases.csv', 'bdc2324-data/1/1suppliers.csv', 'bdc2324-data/1/1tickets.csv', 'bdc2324-data/1/1type_ofs.csv']\n" - ] - } - ], + "outputs": [], "source": [ "liste_database_select = ['suppliers', 'ticket', 'purchase', 'consumption', 'type_ofs']\n", "\n", @@ -96,15 +88,20 @@ { "cell_type": "code", "execution_count": 6, - "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed", + "id": "675f518d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ + "<<<<<<< local \n", "/tmp/ipykernel_445/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(file_in)\n" + " df = pd.read_csv(file_in)\n", + "=======\n", + "/tmp/ipykernel_15285/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(file_in)\n", + ">>>>>>> remote \n" ] } ], @@ -126,7 +123,7 @@ }, { "cell_type": "markdown", - "id": "f01e4530-1a61-49cb-a6b0-aa188cf1c0e0", + "id": "e855f403", "metadata": { "jp-MarkdownHeadingCollapsed": true }, @@ -137,52 +134,9 @@ { "cell_type": "code", "execution_count": 22, - "id": "a01f993a-0f9f-4aed-bd23-bcdec9041bb3", + "id": "91a8f8c4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 151866 entries, 0 to 151865\n", - "Data columns (total 29 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 151866 non-null int64 \n", - " 1 birthdate 5437 non-null object \n", - " 2 street_id 151866 non-null int64 \n", - " 3 civility 0 non-null float64\n", - " 4 is_partner 151866 non-null bool \n", - " 5 deleted_at 0 non-null float64\n", - " 6 gender 151866 non-null int64 \n", - " 7 is_email_true 151866 non-null bool \n", - " 8 opt_in 151866 non-null bool \n", - " 9 structure_id 18114 non-null float64\n", - " 10 note 906 non-null object \n", - " 11 profession 6206 non-null object \n", - " 12 language 1092 non-null object \n", - " 13 mcp_contact_id 98901 non-null float64\n", - " 14 last_buying_date 73422 non-null object \n", - " 15 max_price 73422 non-null float64\n", - " 16 ticket_sum 151866 non-null int64 \n", - " 17 average_price 138746 non-null float64\n", - " 18 fidelity 151866 non-null int64 \n", - " 19 average_purchase_delay 73422 non-null float64\n", - " 20 average_price_basket 73422 non-null float64\n", - " 21 average_ticket_basket 73422 non-null float64\n", - " 22 total_price 86542 non-null float64\n", - " 23 purchase_count 151866 non-null int64 \n", - " 24 first_buying_date 73422 non-null object \n", - " 25 last_visiting_date 0 non-null float64\n", - " 26 country 143575 non-null object \n", - " 27 age 5437 non-null float64\n", - " 28 tenant_id 151866 non-null int64 \n", - "dtypes: bool(3), float64(12), int64(7), object(7)\n", - "memory usage: 30.6+ MB\n" - ] - } - ], + "outputs": [], "source": [ "a = pd.DataFrame(df1_customersplus.info())" ] @@ -190,7 +144,7 @@ { "cell_type": "code", "execution_count": 31, - "id": "45e82fc0-ba17-497b-9818-8be2bdc49d22", + "id": "2fda171d", "metadata": {}, "outputs": [], "source": [ @@ -219,7 +173,7 @@ { "cell_type": "code", "execution_count": 35, - "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d", + "id": "205eeeab", "metadata": {}, "outputs": [], "source": [ @@ -244,7 +198,7 @@ { "cell_type": "code", "execution_count": 32, - "id": "4bcdb081-c34f-4d51-b93f-abbb6fa49c5e", + "id": "634282c5", "metadata": {}, "outputs": [], "source": [ @@ -254,350 +208,9 @@ { "cell_type": "code", "execution_count": 33, - "id": "319c814f-0956-4a92-9c0a-c6b9f53b04b5", + "id": "0e8d4133", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Nom_colonneType_colonneTaux_NA
0idint640.000000
1lastnameobject43.461341
2firstnameobject44.995588
3birthdateobject96.419870
4emailobject8.622075
5street_idint640.000000
6created_atobject0.000000
7updated_atobject0.000000
8civilityfloat64100.000000
9is_partnerbool0.000000
10extrafloat64100.000000
11deleted_atfloat64100.000000
12referencefloat64100.000000
13genderint640.000000
14is_email_truebool0.000000
15extra_fieldfloat64100.000000
16identifierobject0.000000
17opt_inbool0.000000
18structure_idfloat6488.072380
19noteobject99.403421
20professionobject95.913503
21languageobject99.280945
22mcp_contact_idfloat6434.876141
23need_reloadbool0.000000
24last_buying_dateobject51.653431
25max_pricefloat6451.653431
26ticket_sumint640.000000
27average_pricefloat648.639195
28fidelityint640.000000
29average_purchase_delayfloat6451.653431
30average_price_basketfloat6451.653431
31average_ticket_basketfloat6451.653431
32total_pricefloat6443.014236
33preferred_categoryfloat64100.000000
34preferred_supplierfloat64100.000000
35preferred_formulafloat64100.000000
36purchase_countint640.000000
37first_buying_dateobject51.653431
38last_visiting_datefloat64100.000000
39zipcodeobject71.176564
40countryobject5.459418
41agefloat6496.419870
42tenant_idint640.000000
\n", - "
" - ], - "text/plain": [ - " Nom_colonne Type_colonne Taux_NA\n", - "0 id int64 0.000000\n", - "1 lastname object 43.461341\n", - "2 firstname object 44.995588\n", - "3 birthdate object 96.419870\n", - "4 email object 8.622075\n", - "5 street_id int64 0.000000\n", - "6 created_at object 0.000000\n", - "7 updated_at object 0.000000\n", - "8 civility float64 100.000000\n", - "9 is_partner bool 0.000000\n", - "10 extra float64 100.000000\n", - "11 deleted_at float64 100.000000\n", - "12 reference float64 100.000000\n", - "13 gender int64 0.000000\n", - "14 is_email_true bool 0.000000\n", - "15 extra_field float64 100.000000\n", - "16 identifier object 0.000000\n", - "17 opt_in bool 0.000000\n", - "18 structure_id float64 88.072380\n", - "19 note object 99.403421\n", - "20 profession object 95.913503\n", - "21 language object 99.280945\n", - "22 mcp_contact_id float64 34.876141\n", - "23 need_reload bool 0.000000\n", - "24 last_buying_date object 51.653431\n", - "25 max_price float64 51.653431\n", - "26 ticket_sum int64 0.000000\n", - "27 average_price float64 8.639195\n", - "28 fidelity int64 0.000000\n", - "29 average_purchase_delay float64 51.653431\n", - "30 average_price_basket float64 51.653431\n", - "31 average_ticket_basket float64 51.653431\n", - "32 total_price float64 43.014236\n", - "33 preferred_category float64 100.000000\n", - "34 preferred_supplier float64 100.000000\n", - "35 preferred_formula float64 100.000000\n", - "36 purchase_count int64 0.000000\n", - "37 first_buying_date object 51.653431\n", - "38 last_visiting_date float64 100.000000\n", - "39 zipcode object 71.176564\n", - "40 country object 5.459418\n", - "41 age float64 96.419870\n", - "42 tenant_id int64 0.000000" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "a" ] @@ -605,7 +218,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "e54a1170-2b10-4b22-8241-e7f5ec3fce75", + "id": "1268ad5a", "metadata": {}, "outputs": [], "source": [ @@ -615,216 +228,9 @@ { "cell_type": "code", "execution_count": 40, - "id": "5c997ff6-251b-4e7f-8946-a8b722f5e97f", + "id": "bd41dc80", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idnoteprofession...fidelityaverage_purchase_delayaverage_price_basketaverage_ticket_baskettotal_pricepurchase_countfirst_buying_datecountryagetenant_id
012751NaN2False1TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
112825NaN2False2TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
211261NaN2False1TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
313071NaN2False2TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
4653061NaN10False2TrueFalseNaNNaNNaN...0NaNNaNNaNNaN0NaTNaNNaN1311
\n", - "

5 rows × 26 columns

\n", - "
" - ], - "text/plain": [ - " customer_id birthdate street_id is_partner gender is_email_true \\\n", - "0 12751 NaN 2 False 1 True \n", - "1 12825 NaN 2 False 2 True \n", - "2 11261 NaN 2 False 1 True \n", - "3 13071 NaN 2 False 2 True \n", - "4 653061 NaN 10 False 2 True \n", - "\n", - " opt_in structure_id note profession ... fidelity average_purchase_delay \\\n", - "0 True NaN NaN NaN ... 0 NaN \n", - "1 True NaN NaN NaN ... 0 NaN \n", - "2 True NaN NaN NaN ... 0 NaN \n", - "3 True NaN NaN NaN ... 0 NaN \n", - "4 False NaN NaN NaN ... 0 NaN \n", - "\n", - " average_price_basket average_ticket_basket total_price purchase_count \\\n", - "0 NaN NaN NaN 0 \n", - "1 NaN NaN NaN 0 \n", - "2 NaN NaN NaN 0 \n", - "3 NaN NaN NaN 0 \n", - "4 NaN NaN NaN 0 \n", - "\n", - " first_buying_date country age tenant_id \n", - "0 NaT fr NaN 1311 \n", - "1 NaT fr NaN 1311 \n", - "2 NaT fr NaN 1311 \n", - "3 NaT fr NaN 1311 \n", - "4 NaT NaN NaN 1311 \n", - "\n", - "[5 rows x 26 columns]" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Selection des variables\n", "df1_customersplus_clean = df1_customersplus.copy()\n", @@ -839,7 +245,7 @@ }, { "cell_type": "markdown", - "id": "e908f516-2a74-45d6-8492-7dcdc3afbe1f", + "id": "64d0f76b", "metadata": { "jp-MarkdownHeadingCollapsed": true }, @@ -850,264 +256,9 @@ { "cell_type": "code", "execution_count": 6, - "id": "14f4158e-c9c0-4beb-826a-5e0f949434a4", + "id": "7e683711", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnumbercreated_atupdated_atpurchase_idproduct_idis_from_subscriptiontype_ofsupplier_idbarcodeidentifier
013070859135930026612882021-12-28 20:47:10.320641+01:002022-02-14 18:46:53.614229+01:005107462225251False13NaNb6ad7fc36f33b5e05f58c7fca06688a6
113070860135930026613992021-12-28 20:47:10.321037+01:002022-02-14 18:46:53.614761+01:005107462224914False13NaNb0903af480266f27802fe5c38c277c9e
213070861135930026614192021-12-28 20:47:10.321629+01:002022-02-14 18:46:53.615521+01:005107462224914False13NaN64ca12b7e26a65b90335c0702ea0faba
313070862135930026615082021-12-28 20:47:10.322029+01:002022-02-14 18:46:53.616000+01:005107462224914False13NaN5ac2f8150aa9f3a6b1599df08cc2f0c7
413070863135930026616892021-12-28 20:47:10.322449+01:002022-02-14 18:46:53.616447+01:005107462224914False13NaNdfe30081bae020d12094279926136b9c
....................................
182666720662815135930161543902023-11-09 07:51:34.935983+01:002023-11-09 07:51:34.935983+01:008007697405689False13NaNdba9aa428f843b79ae69dfacfe8fc579
182666820662816135930161545012023-11-09 07:51:34.937038+01:002023-11-09 07:51:34.937038+01:008007698403658False13NaN93f1fcfc6ba4fa68f92eb4b4a619fcf0
182666920662817135930161546802023-11-09 07:51:34.938224+01:002023-11-09 07:51:34.938224+01:008007698403658False13NaNc8bbbd25df2c158767ceef42c3237f23
182667020662818135930161548992023-11-09 07:51:34.939328+01:002023-11-09 07:51:34.939328+01:008007699403658False13NaN738f0a8b5088b5056bc3b32eff2dca1f
182667120662819135930161549882023-11-09 07:51:34.940680+01:002023-11-09 07:51:34.940680+01:008007699403658False13NaN4c5a6195434377380b4e6ae63b2e9cf6
\n", - "

1826672 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " id number created_at \\\n", - "0 13070859 13593002661288 2021-12-28 20:47:10.320641+01:00 \n", - "1 13070860 13593002661399 2021-12-28 20:47:10.321037+01:00 \n", - "2 13070861 13593002661419 2021-12-28 20:47:10.321629+01:00 \n", - "3 13070862 13593002661508 2021-12-28 20:47:10.322029+01:00 \n", - "4 13070863 13593002661689 2021-12-28 20:47:10.322449+01:00 \n", - "... ... ... ... \n", - "1826667 20662815 13593016154390 2023-11-09 07:51:34.935983+01:00 \n", - "1826668 20662816 13593016154501 2023-11-09 07:51:34.937038+01:00 \n", - "1826669 20662817 13593016154680 2023-11-09 07:51:34.938224+01:00 \n", - "1826670 20662818 13593016154899 2023-11-09 07:51:34.939328+01:00 \n", - "1826671 20662819 13593016154988 2023-11-09 07:51:34.940680+01:00 \n", - "\n", - " updated_at purchase_id product_id \\\n", - "0 2022-02-14 18:46:53.614229+01:00 5107462 225251 \n", - "1 2022-02-14 18:46:53.614761+01:00 5107462 224914 \n", - "2 2022-02-14 18:46:53.615521+01:00 5107462 224914 \n", - "3 2022-02-14 18:46:53.616000+01:00 5107462 224914 \n", - "4 2022-02-14 18:46:53.616447+01:00 5107462 224914 \n", - "... ... ... ... \n", - "1826667 2023-11-09 07:51:34.935983+01:00 8007697 405689 \n", - "1826668 2023-11-09 07:51:34.937038+01:00 8007698 403658 \n", - "1826669 2023-11-09 07:51:34.938224+01:00 8007698 403658 \n", - "1826670 2023-11-09 07:51:34.939328+01:00 8007699 403658 \n", - "1826671 2023-11-09 07:51:34.940680+01:00 8007699 403658 \n", - "\n", - " is_from_subscription type_of supplier_id barcode \\\n", - "0 False 1 3 NaN \n", - "1 False 1 3 NaN \n", - "2 False 1 3 NaN \n", - "3 False 1 3 NaN \n", - "4 False 1 3 NaN \n", - "... ... ... ... ... \n", - "1826667 False 1 3 NaN \n", - "1826668 False 1 3 NaN \n", - "1826669 False 1 3 NaN \n", - "1826670 False 1 3 NaN \n", - "1826671 False 1 3 NaN \n", - "\n", - " identifier \n", - "0 b6ad7fc36f33b5e05f58c7fca06688a6 \n", - "1 b0903af480266f27802fe5c38c277c9e \n", - "2 64ca12b7e26a65b90335c0702ea0faba \n", - "3 5ac2f8150aa9f3a6b1599df08cc2f0c7 \n", - "4 dfe30081bae020d12094279926136b9c \n", - "... ... \n", - "1826667 dba9aa428f843b79ae69dfacfe8fc579 \n", - "1826668 93f1fcfc6ba4fa68f92eb4b4a619fcf0 \n", - "1826669 c8bbbd25df2c158767ceef42c3237f23 \n", - "1826670 738f0a8b5088b5056bc3b32eff2dca1f \n", - "1826671 4c5a6195434377380b4e6ae63b2e9cf6 \n", - "\n", - "[1826672 rows x 11 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_tickets" ] @@ -1115,34 +266,9 @@ { "cell_type": "code", "execution_count": 7, - "id": "f3c35394-b586-4ae4-b5ab-b03bb01bb618", + "id": "e7b9a52e", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 1826672 entries, 0 to 1826671\n", - "Data columns (total 11 columns):\n", - " # Column Dtype \n", - "--- ------ ----- \n", - " 0 id int64 \n", - " 1 number object \n", - " 2 created_at object \n", - " 3 updated_at object \n", - " 4 purchase_id int64 \n", - " 5 product_id int64 \n", - " 6 is_from_subscription bool \n", - " 7 type_of int64 \n", - " 8 supplier_id int64 \n", - " 9 barcode float64\n", - " 10 identifier object \n", - "dtypes: bool(1), float64(1), int64(5), object(4)\n", - "memory usage: 141.1+ MB\n" - ] - } - ], + "outputs": [], "source": [ "df1_tickets.info()" ] @@ -1150,31 +276,9 @@ { "cell_type": "code", "execution_count": 8, - "id": "c1b42769-03c7-4785-92ce-5e1e6b41908d", + "id": "568280e8", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 0.0\n", - "number 0.0\n", - "created_at 0.0\n", - "updated_at 0.0\n", - "purchase_id 0.0\n", - "product_id 0.0\n", - "is_from_subscription 0.0\n", - "type_of 0.0\n", - "supplier_id 0.0\n", - "barcode 100.0\n", - "identifier 0.0\n", - "dtype: float64" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_tickets.isna().sum()/len(df1_tickets)*100" ] @@ -1182,21 +286,9 @@ { "cell_type": "code", "execution_count": 9, - "id": "42896791-2d93-4725-a50b-6c7cbe535ec7", + "id": "29ecec90", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_619/232847087.py:3: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df1_tickets_clean.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n" - ] - } - ], + "outputs": [], "source": [ "# Selection des variables\n", "df1_tickets_clean = df1_tickets.drop(['lastname', 'firstname', 'email', 'created_at', 'updated_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode'], axis = 1, inplace=True)\n", @@ -1205,7 +297,7 @@ }, { "cell_type": "markdown", - "id": "78453f3c-4f89-44ed-a6c6-2a7443b72b52", + "id": "22bb5de4", "metadata": { "jp-MarkdownHeadingCollapsed": true }, @@ -1216,194 +308,9 @@ { "cell_type": "code", "execution_count": 10, - "id": "2e0dada0-9457-484c-aa55-77e44613ecca", + "id": "6a9a91f4", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamemanually_addedlabelitrupdated_atcreated_atcommissionidentifier
01617j4 administrationFalseNaNNaN2021-07-29 09:21:37.325772+02:002021-07-29 09:21:37.325772+02:00NaN5958b2a060ac3e31678b438892a1bd2e
18non définiFalseNaNNaN2020-09-03 13:16:35.329062+02:002020-09-03 13:16:35.329062+02:00NaN52ff3466787b4d538407372e5f7afe0f
24vadFalseNaNNaN2020-09-03 13:11:23.896992+02:002020-09-03 13:11:23.896992+02:00NaN1225483c97b36018cab2bea14ab78ea6
31fort saint jeanFalseNaNNaN2020-09-03 13:11:23.833073+02:002020-09-03 13:11:23.833073+02:00NaN001b9b4a524fe407150b8235b304d4ec
42j4FalseNaNNaN2020-09-03 13:11:23.888993+02:002020-09-03 13:11:23.888993+02:00NaN6a0cf6edf20060344b465706b61719aa
55revendeurFalseNaNNaN2020-09-03 13:11:23.900987+02:002020-09-03 13:11:23.900987+02:00NaN931239d4acb6214d7e5c98edecfb4916
63vente en ligneFalseNaNNaN2020-09-03 13:11:23.893097+02:002020-09-03 13:11:23.893097+02:00NaNbde8f2ccff510df8572d3214d86b837d
76ccrFalseNaNNaN2020-09-03 13:11:23.904974+02:002020-09-03 13:11:23.904974+02:00NaNb48ec279411f7dbbb68393c61a9724d9
87dabFalseNaNNaN2020-09-03 13:11:23.908970+02:002020-09-03 13:11:23.908970+02:00NaN11c6d471fa4e354e62e684d293694202
\n", - "
" - ], - "text/plain": [ - " id name manually_added label itr \\\n", - "0 1617 j4 administration False NaN NaN \n", - "1 8 non défini False NaN NaN \n", - "2 4 vad False NaN NaN \n", - "3 1 fort saint jean False NaN NaN \n", - "4 2 j4 False NaN NaN \n", - "5 5 revendeur False NaN NaN \n", - "6 3 vente en ligne False NaN NaN \n", - "7 6 ccr False NaN NaN \n", - "8 7 dab False NaN NaN \n", - "\n", - " updated_at created_at \\\n", - "0 2021-07-29 09:21:37.325772+02:00 2021-07-29 09:21:37.325772+02:00 \n", - "1 2020-09-03 13:16:35.329062+02:00 2020-09-03 13:16:35.329062+02:00 \n", - "2 2020-09-03 13:11:23.896992+02:00 2020-09-03 13:11:23.896992+02:00 \n", - "3 2020-09-03 13:11:23.833073+02:00 2020-09-03 13:11:23.833073+02:00 \n", - "4 2020-09-03 13:11:23.888993+02:00 2020-09-03 13:11:23.888993+02:00 \n", - "5 2020-09-03 13:11:23.900987+02:00 2020-09-03 13:11:23.900987+02:00 \n", - "6 2020-09-03 13:11:23.893097+02:00 2020-09-03 13:11:23.893097+02:00 \n", - "7 2020-09-03 13:11:23.904974+02:00 2020-09-03 13:11:23.904974+02:00 \n", - "8 2020-09-03 13:11:23.908970+02:00 2020-09-03 13:11:23.908970+02:00 \n", - "\n", - " commission identifier \n", - "0 NaN 5958b2a060ac3e31678b438892a1bd2e \n", - "1 NaN 52ff3466787b4d538407372e5f7afe0f \n", - "2 NaN 1225483c97b36018cab2bea14ab78ea6 \n", - "3 NaN 001b9b4a524fe407150b8235b304d4ec \n", - "4 NaN 6a0cf6edf20060344b465706b61719aa \n", - "5 NaN 931239d4acb6214d7e5c98edecfb4916 \n", - "6 NaN bde8f2ccff510df8572d3214d86b837d \n", - "7 NaN b48ec279411f7dbbb68393c61a9724d9 \n", - "8 NaN 11c6d471fa4e354e62e684d293694202 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_suppliers" ] @@ -1411,32 +318,9 @@ { "cell_type": "code", "execution_count": 11, - "id": "b583be02-ab60-4e14-9325-0204f203a1af", + "id": "bab4758a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 9 entries, 0 to 8\n", - "Data columns (total 9 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 9 non-null int64 \n", - " 1 name 9 non-null object \n", - " 2 manually_added 9 non-null bool \n", - " 3 label 0 non-null float64\n", - " 4 itr 0 non-null float64\n", - " 5 updated_at 9 non-null object \n", - " 6 created_at 9 non-null object \n", - " 7 commission 0 non-null float64\n", - " 8 identifier 9 non-null object \n", - "dtypes: bool(1), float64(3), int64(1), object(4)\n", - "memory usage: 713.0+ bytes\n" - ] - } - ], + "outputs": [], "source": [ "df1_suppliers.info()" ] @@ -1444,29 +328,9 @@ { "cell_type": "code", "execution_count": 12, - "id": "6d7f338e-e4d3-422b-9cdc-dec967c0b28e", + "id": "b5fff251", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 0.0\n", - "name 0.0\n", - "manually_added 0.0\n", - "label 100.0\n", - "itr 100.0\n", - "updated_at 0.0\n", - "created_at 0.0\n", - "commission 100.0\n", - "identifier 0.0\n", - "dtype: float64" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_suppliers.isna().sum()/len(df1_suppliers)*100" ] @@ -1474,21 +338,9 @@ { "cell_type": "code", "execution_count": 13, - "id": "3c645ab7-16bf-4054-9ae2-15a8c32e29c6", + "id": "8b09e2a3", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_619/302783287.py:3: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df1_suppliers_clean.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n" - ] - } - ], + "outputs": [], "source": [ "# Selection des variables\n", "df1_suppliers_clean = df1_suppliers[['id', 'name']]\n", @@ -1498,109 +350,16 @@ { "cell_type": "code", "execution_count": 14, - "id": "4de7e2e2-6da4-4618-8444-b524399c5493", + "id": "ecee7cdc", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idsupplier_name
01617j4 administration
18non défini
24vad
31fort saint jean
42j4
55revendeur
63vente en ligne
76ccr
87dab
\n", - "
" - ], - "text/plain": [ - " id supplier_name\n", - "0 1617 j4 administration\n", - "1 8 non défini\n", - "2 4 vad\n", - "3 1 fort saint jean\n", - "4 2 j4\n", - "5 5 revendeur\n", - "6 3 vente en ligne\n", - "7 6 ccr\n", - "8 7 dab" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_suppliers_clean" ] }, { "cell_type": "markdown", - "id": "0a6df975-c7fc-45bc-92af-a0bdab17d795", + "id": "c8e6e69b", "metadata": { "jp-MarkdownHeadingCollapsed": true }, @@ -1611,186 +370,9 @@ { "cell_type": "code", "execution_count": 15, - "id": "a02f6594-3e91-4e87-bbb6-649c28d4f7e9", + "id": "1a6cff1f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamechildrencreated_atupdated_atidentifier
01Atelierpricing_formula2021-01-05 11:55:51.188106+01:002021-01-05 11:55:51.188106+01:00623ec4067827558b28972cf39fe81ee7
12Billet en nombrepricing_formula2021-01-11 12:13:19.286301+01:002021-01-11 12:13:19.286301+01:00a53d313a97296ee37caa066dbfe7a45c
23Groupepricing_formula2021-01-11 12:19:22.842917+01:002021-01-11 12:19:22.842917+01:001ab143efc3b85acbbc752fe8eb2b0b86
34Revendeurpricing_formula2021-01-12 12:34:20.481236+01:002021-01-12 12:34:20.481236+01:008b332723366a07e1eef5f1c92f9ae067
45Cinéma scolairepricing_formula2021-01-25 19:16:05.141719+01:002021-01-25 19:16:05.141719+01:00a12e62cb4c4f47e7406bd8fbff2bfe30
56Musée famillepricing_formula2021-01-25 19:23:06.692627+01:002021-01-25 19:23:06.692627+01:001ec6c19283111ccb3ed67f52d414470e
67Spectacle famillepricing_formula2021-01-25 19:28:21.390016+01:002021-01-25 19:28:21.390016+01:0005e2104f1b74ced229c06847d6e91938
78Masterclasspricing_formula2021-01-25 19:31:05.076904+01:002021-01-25 19:31:05.076904+01:009cc946edfb25e11b4282f58db16e6ae9
89Spectaclepricing_formula2021-01-25 19:38:41.260535+01:002021-01-25 19:38:41.260535+01:00d88321c347f0e0ab101184cdf25c94bf
910Cinemapricing_formula2021-02-05 11:12:31.932576+01:002021-02-05 11:12:31.932576+01:000870fef2bfcd5b30a12e4f5c7f4aaba7
1011Museepricing_formula2021-02-05 11:52:05.468207+01:002021-02-05 11:52:05.468207+01:008ba8934454cc62c7cdb3eb6e1b39df0c
1112Tarifs pleincategory2023-03-13 11:31:50.528331+01:002023-03-13 11:31:50.528331+01:00a6969df76efc15d157be48e87a7bcf9a
\n", - "
" - ], - "text/plain": [ - " id name children created_at \\\n", - "0 1 Atelier pricing_formula 2021-01-05 11:55:51.188106+01:00 \n", - "1 2 Billet en nombre pricing_formula 2021-01-11 12:13:19.286301+01:00 \n", - "2 3 Groupe pricing_formula 2021-01-11 12:19:22.842917+01:00 \n", - "3 4 Revendeur pricing_formula 2021-01-12 12:34:20.481236+01:00 \n", - "4 5 Cinéma scolaire pricing_formula 2021-01-25 19:16:05.141719+01:00 \n", - "5 6 Musée famille pricing_formula 2021-01-25 19:23:06.692627+01:00 \n", - "6 7 Spectacle famille pricing_formula 2021-01-25 19:28:21.390016+01:00 \n", - "7 8 Masterclass pricing_formula 2021-01-25 19:31:05.076904+01:00 \n", - "8 9 Spectacle pricing_formula 2021-01-25 19:38:41.260535+01:00 \n", - "9 10 Cinema pricing_formula 2021-02-05 11:12:31.932576+01:00 \n", - "10 11 Musee pricing_formula 2021-02-05 11:52:05.468207+01:00 \n", - "11 12 Tarifs plein category 2023-03-13 11:31:50.528331+01:00 \n", - "\n", - " updated_at identifier \n", - "0 2021-01-05 11:55:51.188106+01:00 623ec4067827558b28972cf39fe81ee7 \n", - "1 2021-01-11 12:13:19.286301+01:00 a53d313a97296ee37caa066dbfe7a45c \n", - "2 2021-01-11 12:19:22.842917+01:00 1ab143efc3b85acbbc752fe8eb2b0b86 \n", - "3 2021-01-12 12:34:20.481236+01:00 8b332723366a07e1eef5f1c92f9ae067 \n", - "4 2021-01-25 19:16:05.141719+01:00 a12e62cb4c4f47e7406bd8fbff2bfe30 \n", - "5 2021-01-25 19:23:06.692627+01:00 1ec6c19283111ccb3ed67f52d414470e \n", - "6 2021-01-25 19:28:21.390016+01:00 05e2104f1b74ced229c06847d6e91938 \n", - "7 2021-01-25 19:31:05.076904+01:00 9cc946edfb25e11b4282f58db16e6ae9 \n", - "8 2021-01-25 19:38:41.260535+01:00 d88321c347f0e0ab101184cdf25c94bf \n", - "9 2021-02-05 11:12:31.932576+01:00 0870fef2bfcd5b30a12e4f5c7f4aaba7 \n", - "10 2021-02-05 11:52:05.468207+01:00 8ba8934454cc62c7cdb3eb6e1b39df0c \n", - "11 2023-03-13 11:31:50.528331+01:00 a6969df76efc15d157be48e87a7bcf9a " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_type_ofs" ] @@ -1798,29 +380,9 @@ { "cell_type": "code", "execution_count": 16, - "id": "e9c8d32b-22f4-4581-8af7-31cc1c31fa0e", + "id": "93630b41", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 12 entries, 0 to 11\n", - "Data columns (total 6 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 12 non-null int64 \n", - " 1 name 12 non-null object\n", - " 2 children 12 non-null object\n", - " 3 created_at 12 non-null object\n", - " 4 updated_at 12 non-null object\n", - " 5 identifier 12 non-null object\n", - "dtypes: int64(1), object(5)\n", - "memory usage: 704.0+ bytes\n" - ] - } - ], + "outputs": [], "source": [ "df1_type_ofs.info()" ] @@ -1828,21 +390,9 @@ { "cell_type": "code", "execution_count": 17, - "id": "cbb5e614-1fe5-4da0-bca0-8a242e0885da", + "id": "4f94481a", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_619/81842251.py:3: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df1_type_ofs_clean.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n" - ] - } - ], + "outputs": [], "source": [ "# Selection des variables\n", "df1_type_ofs_clean = df1_type_ofs[['id', 'name', 'children']]\n", @@ -1851,7 +401,7 @@ }, { "cell_type": "markdown", - "id": "676a9869-9a8b-4cd2-8b1c-0644b5229c72", + "id": "1b2811e2", "metadata": { "jp-MarkdownHeadingCollapsed": true }, @@ -1862,205 +412,11 @@ { "cell_type": "code", "execution_count": 18, - "id": "f8d36b72-f8e7-45e5-b4fa-e0803493fd3c", + "id": "2455d2e1", "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idpurchase_datecustomer_idcreated_atupdated_atnumberidentifier
051456622019-07-17 11:17:53+02:0066322021-12-28 20:48:51.569237+01:002021-12-28 20:48:51.569237+01:00fa80c83b29a268b45728c910a8afcf7982877c41df26f832eb823a83acd1a172
149416422018-10-31 11:59:00+01:0012021-12-28 20:31:48.196681+01:002022-03-03 17:52:21.958861+01:00597b6c06adfe6acc539b29b657b80da0e7102ebe65526c427245533ebabe66e5
250888602018-10-31 12:45:12+01:0012021-12-28 20:46:34.703542+01:002021-12-28 20:46:34.703542+01:004a7f6baaf9be6a99e3fead7f7e981fa8af75c4ae53d1b6957875538355b162e1
350888622018-10-31 13:07:12+01:0012021-12-28 20:46:34.704773+01:002021-12-28 20:46:34.704773+01:001d83dfad44b73070d1c6d5875d0edd2d4b2fe34659b177209b07270ae1043b40
450888632018-10-31 13:08:50+01:0012021-12-28 20:46:34.705453+01:002021-12-28 20:46:34.705453+01:007bfe2bc9c1670c973d0960e3fd408cf8b115f04a99b94df9e4a32185844f0998
........................
74224580076952023-11-08 17:51:19+01:0012561332023-11-09 07:51:33.920187+01:002023-11-09 07:51:33.920187+01:0099ad774dedbad43feb73514765d2f0bad68558180b4bf2e8a945724843655775
74224680076962023-11-08 18:17:51+01:0012561342023-11-09 07:51:33.921967+01:002023-11-09 07:51:33.921967+01:00c1511614c511c5f95980172690179102f5102d910a7731091f239ad7b0df35b4
74224780076972023-11-08 18:23:54+01:0012561352023-11-09 07:51:33.923034+01:002023-11-09 07:51:33.923034+01:0033b64b39cc53428b4f17d65ff5b93104e2b917626be60cc2c3207cc037fe69e4
74224880076982023-11-08 19:32:18+01:0012561362023-11-09 07:51:33.924135+01:002023-11-09 07:51:33.924135+01:009ae0b129e704b3d9c093ce9c7c4e50395bfa23236c31f8562c3a0233c1b53b31
74224980076992023-11-08 20:30:28+01:0012561372023-11-09 07:51:33.925382+01:002023-11-09 07:51:33.925382+01:00d31ced089c2b1f90479257a4686f9306d86b1e0de3ff01eaf04fbcd031ac5fef
\n", - "

742250 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " id purchase_date customer_id \\\n", - "0 5145662 2019-07-17 11:17:53+02:00 6632 \n", - "1 4941642 2018-10-31 11:59:00+01:00 1 \n", - "2 5088860 2018-10-31 12:45:12+01:00 1 \n", - "3 5088862 2018-10-31 13:07:12+01:00 1 \n", - "4 5088863 2018-10-31 13:08:50+01:00 1 \n", - "... ... ... ... \n", - "742245 8007695 2023-11-08 17:51:19+01:00 1256133 \n", - "742246 8007696 2023-11-08 18:17:51+01:00 1256134 \n", - "742247 8007697 2023-11-08 18:23:54+01:00 1256135 \n", - "742248 8007698 2023-11-08 19:32:18+01:00 1256136 \n", - "742249 8007699 2023-11-08 20:30:28+01:00 1256137 \n", - "\n", - " created_at updated_at \\\n", - "0 2021-12-28 20:48:51.569237+01:00 2021-12-28 20:48:51.569237+01:00 \n", - "1 2021-12-28 20:31:48.196681+01:00 2022-03-03 17:52:21.958861+01:00 \n", - "2 2021-12-28 20:46:34.703542+01:00 2021-12-28 20:46:34.703542+01:00 \n", - "3 2021-12-28 20:46:34.704773+01:00 2021-12-28 20:46:34.704773+01:00 \n", - "4 2021-12-28 20:46:34.705453+01:00 2021-12-28 20:46:34.705453+01:00 \n", - "... ... ... \n", - "742245 2023-11-09 07:51:33.920187+01:00 2023-11-09 07:51:33.920187+01:00 \n", - "742246 2023-11-09 07:51:33.921967+01:00 2023-11-09 07:51:33.921967+01:00 \n", - "742247 2023-11-09 07:51:33.923034+01:00 2023-11-09 07:51:33.923034+01:00 \n", - "742248 2023-11-09 07:51:33.924135+01:00 2023-11-09 07:51:33.924135+01:00 \n", - "742249 2023-11-09 07:51:33.925382+01:00 2023-11-09 07:51:33.925382+01:00 \n", - "\n", - " number identifier \n", - "0 fa80c83b29a268b45728c910a8afcf79 82877c41df26f832eb823a83acd1a172 \n", - "1 597b6c06adfe6acc539b29b657b80da0 e7102ebe65526c427245533ebabe66e5 \n", - "2 4a7f6baaf9be6a99e3fead7f7e981fa8 af75c4ae53d1b6957875538355b162e1 \n", - "3 1d83dfad44b73070d1c6d5875d0edd2d 4b2fe34659b177209b07270ae1043b40 \n", - "4 7bfe2bc9c1670c973d0960e3fd408cf8 b115f04a99b94df9e4a32185844f0998 \n", - "... ... ... \n", - "742245 99ad774dedbad43feb73514765d2f0ba d68558180b4bf2e8a945724843655775 \n", - "742246 c1511614c511c5f95980172690179102 f5102d910a7731091f239ad7b0df35b4 \n", - "742247 33b64b39cc53428b4f17d65ff5b93104 e2b917626be60cc2c3207cc037fe69e4 \n", - "742248 9ae0b129e704b3d9c093ce9c7c4e5039 5bfa23236c31f8562c3a0233c1b53b31 \n", - "742249 d31ced089c2b1f90479257a4686f9306 d86b1e0de3ff01eaf04fbcd031ac5fef \n", - "\n", - "[742250 rows x 7 columns]" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_purchases" ] @@ -2068,30 +424,9 @@ { "cell_type": "code", "execution_count": 19, - "id": "3f266a9d-6eee-4b27-b6cc-d401bc2fa0b8", + "id": "5f9a159d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 742250 entries, 0 to 742249\n", - "Data columns (total 7 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 742250 non-null int64 \n", - " 1 purchase_date 742250 non-null object\n", - " 2 customer_id 742250 non-null int64 \n", - " 3 created_at 742250 non-null object\n", - " 4 updated_at 742250 non-null object\n", - " 5 number 742250 non-null object\n", - " 6 identifier 742250 non-null object\n", - "dtypes: int64(2), object(5)\n", - "memory usage: 39.6+ MB\n" - ] - } - ], + "outputs": [], "source": [ "df1_purchases.info()" ] @@ -2099,7 +434,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "8b24ccbc-ccf0-4722-8cd9-8ee8aa90d1fd", + "id": "db201bf7", "metadata": {}, "outputs": [], "source": [ @@ -2111,30 +446,9 @@ { "cell_type": "code", "execution_count": 21, - "id": "27d18584-228f-4698-85d6-4d23151ea5ed", + "id": "bd436fca", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 742250 entries, 0 to 742249\n", - "Data columns (total 7 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 742250 non-null int64 \n", - " 1 purchase_date 742250 non-null datetime64[ns, UTC]\n", - " 2 customer_id 742250 non-null int64 \n", - " 3 created_at 742250 non-null object \n", - " 4 updated_at 742250 non-null object \n", - " 5 number 742250 non-null object \n", - " 6 identifier 742250 non-null object \n", - "dtypes: datetime64[ns, UTC](1), int64(2), object(4)\n", - "memory usage: 39.6+ MB\n" - ] - } - ], + "outputs": [], "source": [ "df1_purchases.info()" ] @@ -2142,7 +456,7 @@ { "cell_type": "code", "execution_count": 22, - "id": "ea22e3a2-2b25-481d-8ebc-194e11a06cd9", + "id": "83435862", "metadata": {}, "outputs": [], "source": [ @@ -2152,7 +466,7 @@ }, { "cell_type": "markdown", - "id": "53227600-c1c5-48aa-9f5d-db5a23a8a22a", + "id": "f210e730", "metadata": { "jp-MarkdownHeadingCollapsed": true }, @@ -2163,7 +477,7 @@ { "cell_type": "code", "execution_count": 23, - "id": "e0b8b47a-b321-4a79-823c-36a131a78ac7", + "id": "1f8b3aa7", "metadata": {}, "outputs": [], "source": [ @@ -2183,224 +497,18 @@ { "cell_type": "code", "execution_count": 24, - "id": "7572e6e7-f28d-43ba-b045-b9fa09e68e1d", + "id": "83a4d021", "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ticket_idproduct_idis_from_subscriptionsupplier_nametype_of_ticket_namechildrenpurchase_datecustomer_id
013070859225251Falsevente en ligneAtelierpricing_formula2018-12-28 14:47:50+00:0048187
113070860224914Falsevente en ligneAtelierpricing_formula2018-12-28 14:47:50+00:0048187
213070861224914Falsevente en ligneAtelierpricing_formula2018-12-28 14:47:50+00:0048187
313070862224914Falsevente en ligneAtelierpricing_formula2018-12-28 14:47:50+00:0048187
413070863224914Falsevente en ligneAtelierpricing_formula2018-12-28 14:47:50+00:0048187
...........................
182666720662815405689Falsevente en ligneAtelierpricing_formula2023-11-08 17:23:54+00:001256135
182666820662816403658Falsevente en ligneAtelierpricing_formula2023-11-08 18:32:18+00:001256136
182666920662817403658Falsevente en ligneAtelierpricing_formula2023-11-08 18:32:18+00:001256136
182667020662818403658Falsevente en ligneAtelierpricing_formula2023-11-08 19:30:28+00:001256137
182667120662819403658Falsevente en ligneAtelierpricing_formula2023-11-08 19:30:28+00:001256137
\n", - "

1826672 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " ticket_id product_id is_from_subscription supplier_name \\\n", - "0 13070859 225251 False vente en ligne \n", - "1 13070860 224914 False vente en ligne \n", - "2 13070861 224914 False vente en ligne \n", - "3 13070862 224914 False vente en ligne \n", - "4 13070863 224914 False vente en ligne \n", - "... ... ... ... ... \n", - "1826667 20662815 405689 False vente en ligne \n", - "1826668 20662816 403658 False vente en ligne \n", - "1826669 20662817 403658 False vente en ligne \n", - "1826670 20662818 403658 False vente en ligne \n", - "1826671 20662819 403658 False vente en ligne \n", - "\n", - " type_of_ticket_name children purchase_date \\\n", - "0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "1 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "2 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "3 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "... ... ... ... \n", - "1826667 Atelier pricing_formula 2023-11-08 17:23:54+00:00 \n", - "1826668 Atelier pricing_formula 2023-11-08 18:32:18+00:00 \n", - "1826669 Atelier pricing_formula 2023-11-08 18:32:18+00:00 \n", - "1826670 Atelier pricing_formula 2023-11-08 19:30:28+00:00 \n", - "1826671 Atelier pricing_formula 2023-11-08 19:30:28+00:00 \n", - "\n", - " customer_id \n", - "0 48187 \n", - "1 48187 \n", - "2 48187 \n", - "3 48187 \n", - "4 48187 \n", - "... ... \n", - "1826667 1256135 \n", - "1826668 1256136 \n", - "1826669 1256136 \n", - "1826670 1256137 \n", - "1826671 1256137 \n", - "\n", - "[1826672 rows x 8 columns]" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_ticket_information" ] }, { "cell_type": "markdown", - "id": "ad2d0059-76d3-44b9-b0eb-0b0ca4d4ba75", + "id": "56e6ebd1", "metadata": { "jp-MarkdownHeadingCollapsed": true }, @@ -2411,25 +519,9 @@ { "cell_type": "code", "execution_count": 51, - "id": "c1afe322-ff41-4760-819e-0195fed5b27d", + "id": "88fcde4b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 20 entries, 0 to 19\n", - "Data columns (total 2 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 opened_at 8 non-null object \n", - " 1 opened_at_clean 8 non-null datetime64[ns, UTC]\n", - "dtypes: datetime64[ns, UTC](1), object(1)\n", - "memory usage: 448.0+ bytes\n" - ] - } - ], + "outputs": [], "source": [ "# Créer un DataFrame exemple\n", "df_not_clean = df1_campaign_stats[['opened_at']].head(20)\n", @@ -2445,7 +537,7 @@ }, { "cell_type": "markdown", - "id": "27ecf058-23eb-4018-abbd-68c4ebe7c786", + "id": "818f69db", "metadata": {}, "source": [ "## Nettoyage, selection et fusion" @@ -2454,190 +546,9 @@ { "cell_type": "code", "execution_count": 23, - "id": "d887898c-6a21-41ed-901d-4d6fdbca5372", + "id": "c9654eda", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ticket_idproduct_idis_from_subscriptiontype_ofsupplier_namepurchase_datecustomer_id
013070859225251False1vente en ligne2018-12-28 14:47:50+00:0048187
113070860224914False1vente en ligne2018-12-28 14:47:50+00:0048187
213070861224914False1vente en ligne2018-12-28 14:47:50+00:0048187
313070862224914False1vente en ligne2018-12-28 14:47:50+00:0048187
413070863224914False1vente en ligne2018-12-28 14:47:50+00:0048187
........................
182666720662815405689False1vente en ligne2023-11-08 17:23:54+00:001256135
182666820662816403658False1vente en ligne2023-11-08 18:32:18+00:001256136
182666920662817403658False1vente en ligne2023-11-08 18:32:18+00:001256136
182667020662818403658False1vente en ligne2023-11-08 19:30:28+00:001256137
182667120662819403658False1vente en ligne2023-11-08 19:30:28+00:001256137
\n", - "

1826672 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " ticket_id product_id is_from_subscription type_of supplier_name \\\n", - "0 13070859 225251 False 1 vente en ligne \n", - "1 13070860 224914 False 1 vente en ligne \n", - "2 13070861 224914 False 1 vente en ligne \n", - "3 13070862 224914 False 1 vente en ligne \n", - "4 13070863 224914 False 1 vente en ligne \n", - "... ... ... ... ... ... \n", - "1826667 20662815 405689 False 1 vente en ligne \n", - "1826668 20662816 403658 False 1 vente en ligne \n", - "1826669 20662817 403658 False 1 vente en ligne \n", - "1826670 20662818 403658 False 1 vente en ligne \n", - "1826671 20662819 403658 False 1 vente en ligne \n", - "\n", - " purchase_date customer_id \n", - "0 2018-12-28 14:47:50+00:00 48187 \n", - "1 2018-12-28 14:47:50+00:00 48187 \n", - "2 2018-12-28 14:47:50+00:00 48187 \n", - "3 2018-12-28 14:47:50+00:00 48187 \n", - "4 2018-12-28 14:47:50+00:00 48187 \n", - "... ... ... \n", - "1826667 2023-11-08 17:23:54+00:00 1256135 \n", - "1826668 2023-11-08 18:32:18+00:00 1256136 \n", - "1826669 2023-11-08 18:32:18+00:00 1256136 \n", - "1826670 2023-11-08 19:30:28+00:00 1256137 \n", - "1826671 2023-11-08 19:30:28+00:00 1256137 \n", - "\n", - "[1826672 rows x 7 columns]" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_ticket_information" ] @@ -2645,37 +556,16 @@ { "cell_type": "code", "execution_count": 14, - "id": "ac9a6373-c1c6-46b5-873b-dc22f17bcbdb", + "id": "7f2b620c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 1826672 entries, 0 to 1826671\n", - "Data columns (total 7 columns):\n", - " # Column Dtype \n", - "--- ------ ----- \n", - " 0 ticket_id int64 \n", - " 1 product_id int64 \n", - " 2 is_from_subscription bool \n", - " 3 type_of int64 \n", - " 4 supplier_name object \n", - " 5 purchase_date datetime64[ns, UTC]\n", - " 6 customer_id int64 \n", - "dtypes: bool(1), datetime64[ns, UTC](1), int64(4), object(1)\n", - "memory usage: 85.4+ MB\n" - ] - } - ], + "outputs": [], "source": [ "df1_ticket_information.info()" ] }, { "cell_type": "markdown", - "id": "b1719943-89eb-4ba0-a107-2f96d5d01ec9", + "id": "637bdb72", "metadata": {}, "source": [ "# Customer information" @@ -2683,7 +573,7 @@ }, { "cell_type": "markdown", - "id": "a2132ee2-3f22-45fd-b65b-72689c8b672c", + "id": "14c52894", "metadata": { "jp-MarkdownHeadingCollapsed": true }, @@ -2693,15 +583,15 @@ }, { "cell_type": "code", - "execution_count": 60, - "id": "da5d4708-7147-4cc8-8686-52d4bcba5a7a", + "execution_count": 8, + "id": "d83abfbf", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_619/2625134041.py:3: SettingWithCopyWarning: \n", + "/tmp/ipykernel_15285/2625134041.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -2732,21 +622,9 @@ { "cell_type": "code", "execution_count": 62, - "id": "b4fa5fe3-ce8e-4b0a-af94-fb468d241bad", + "id": "90d71b2c", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 5.080902\n", - "dtype: float64" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_targets_test = df1_targets_full[['id', 'customer_id']].groupby(['customer_id']).count()\n", "len(df1_targets_test[df1_targets_test['id'] > 1]) / len(df1_targets_test)\n", @@ -2757,11 +635,9 @@ }, { "cell_type": "code", - "execution_count": 57, - "id": "8072bbb7-1360-4882-bb2b-2f43b6beea0d", - "metadata": { - "scrolled": true - }, + "execution_count": 10, + "id": "2301de1e", + "metadata": {}, "outputs": [ { "data": { @@ -2793,226 +669,42 @@ " \n", " \n", " \n", - " 8793\n", - " 4584599\n", - " 1\n", - " consentement optin jeune public\n", + " 0\n", + " 1184824\n", + " 645400\n", + " DDCP PROMO Réseau livres\n", " False\n", " manual_static_filter\n", " \n", " \n", - " 13249\n", - " 4567465\n", - " 1\n", - " DDCP rentrée culturelle 2023\n", + " 1\n", + " 210571\n", + " 2412\n", + " DDCP PROMO Réseau livres\n", " False\n", " manual_static_filter\n", " \n", " \n", - " 21424\n", - " 4544805\n", - " 1\n", - " spectateurs cine dimanche_cine concert_2122\n", + " 2\n", + " 210572\n", + " 4536\n", + " DDCP PROMO Réseau livres\n", " False\n", " manual_static_filter\n", " \n", " \n", - " 21665\n", - " 4544911\n", - " 1\n", - " DDCP Cine 2023\n", + " 3\n", + " 210573\n", + " 6736\n", + " DDCP PROMO Réseau livres\n", " False\n", " manual_static_filter\n", " \n", " \n", - " 22811\n", - " 4545766\n", - " 1\n", - " DDCP OLBJ! 2023\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 57305\n", - " 4457909\n", - " 1\n", - " ddcp_promo_visiteurs occasionnels_musee_8mois\n", - " False\n", - " manual_dynamic_filter\n", - " \n", - " \n", - " 58843\n", - " 3688872\n", - " 1\n", - " DDCP promo livemag\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 66813\n", - " 4313646\n", - " 1\n", - " DDCP spectateurs Classique mais pas que 2022\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 68367\n", - " 4547662\n", - " 1\n", - " ddcp_promo_musee_au moins 3 achats_dps8mois\n", - " False\n", - " manual_dynamic_filter\n", - " \n", - " \n", - " 77320\n", - " 4285520\n", - " 1\n", - " DDCP spectateurs Iminente\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 84350\n", - " 4037805\n", - " 1\n", - " DDCP spectateurs Marseille Jazz 18-19-21\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 85383\n", - " 4569504\n", - " 1\n", - " DDCP rendez-vous de septembre offre spéciale\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 92868\n", - " 4433064\n", - " 1\n", - " ddcp_promo_plein air_ateliers_jardins\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 99670\n", - " 3858684\n", - " 1\n", - " Acid Arab\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 105477\n", - " 4321810\n", - " 1\n", - " Arenametrix_bascule tel vers sib\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 169513\n", - " 3697992\n", - " 1\n", - " ddcp_achats billets nb dps 19052021\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 214421\n", - " 2925324\n", - " 1\n", - " consentement optout scolaires\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 234546\n", - " 4575957\n", - " 1\n", - " Portrait de Leila shahid\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 259808\n", - " 3722259\n", - " 1\n", - " consentement optin b2b\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 274380\n", - " 4510423\n", - " 1\n", - " DDCP_marseille_jazz_2023\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 307511\n", - " 5174466\n", - " 1\n", - " ddcp actoral 21-22\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 357509\n", - " 4442526\n", - " 1\n", - " ddcp musique barvalo\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 392920\n", - " 4390642\n", - " 1\n", - " ddcp_md_promo_spectateurs theatre contempo\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 449620\n", - " 4411897\n", - " 1\n", - " FORMATION _ acheteurs optin last year\n", - " False\n", - " manual_dynamic_filter\n", - " \n", - " \n", - " 503809\n", - " 4734591\n", - " 1\n", - " consentement optin mediation specialisee\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 651222\n", - " 3554426\n", - " 1\n", - " consentement optin b2c\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 654246\n", - " 5182212\n", - " 1\n", - " DDCP spectateurs Festival de Marseille 2023\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 654395\n", - " 5182456\n", - " 1\n", - " rencontres_echelle_spectateurs_2021_2023\n", + " 4\n", + " 210574\n", + " 38210\n", + " DDCP PROMO Réseau livres\n", " False\n", " manual_static_filter\n", " \n", @@ -3021,79 +713,238 @@ "" ], "text/plain": [ - " id customer_id target_name \\\n", - "8793 4584599 1 consentement optin jeune public \n", - "13249 4567465 1 DDCP rentrée culturelle 2023 \n", - "21424 4544805 1 spectateurs cine dimanche_cine concert_2122 \n", - "21665 4544911 1 DDCP Cine 2023 \n", - "22811 4545766 1 DDCP OLBJ! 2023 \n", - "57305 4457909 1 ddcp_promo_visiteurs occasionnels_musee_8mois \n", - "58843 3688872 1 DDCP promo livemag \n", - "66813 4313646 1 DDCP spectateurs Classique mais pas que 2022 \n", - "68367 4547662 1 ddcp_promo_musee_au moins 3 achats_dps8mois \n", - "77320 4285520 1 DDCP spectateurs Iminente \n", - "84350 4037805 1 DDCP spectateurs Marseille Jazz 18-19-21 \n", - "85383 4569504 1 DDCP rendez-vous de septembre offre spéciale \n", - "92868 4433064 1 ddcp_promo_plein air_ateliers_jardins \n", - "99670 3858684 1 Acid Arab \n", - "105477 4321810 1 Arenametrix_bascule tel vers sib \n", - "169513 3697992 1 ddcp_achats billets nb dps 19052021 \n", - "214421 2925324 1 consentement optout scolaires \n", - "234546 4575957 1 Portrait de Leila shahid \n", - "259808 3722259 1 consentement optin b2b \n", - "274380 4510423 1 DDCP_marseille_jazz_2023 \n", - "307511 5174466 1 ddcp actoral 21-22 \n", - "357509 4442526 1 ddcp musique barvalo \n", - "392920 4390642 1 ddcp_md_promo_spectateurs theatre contempo \n", - "449620 4411897 1 FORMATION _ acheteurs optin last year \n", - "503809 4734591 1 consentement optin mediation specialisee \n", - "651222 3554426 1 consentement optin b2c \n", - "654246 5182212 1 DDCP spectateurs Festival de Marseille 2023 \n", - "654395 5182456 1 rencontres_echelle_spectateurs_2021_2023 \n", + " id customer_id target_name target_type_is_import \\\n", + "0 1184824 645400 DDCP PROMO Réseau livres False \n", + "1 210571 2412 DDCP PROMO Réseau livres False \n", + "2 210572 4536 DDCP PROMO Réseau livres False \n", + "3 210573 6736 DDCP PROMO Réseau livres False \n", + "4 210574 38210 DDCP PROMO Réseau livres False \n", "\n", - " target_type_is_import target_type_name \n", - "8793 False manual_static_filter \n", - "13249 False manual_static_filter \n", - "21424 False manual_static_filter \n", - "21665 False manual_static_filter \n", - "22811 False manual_static_filter \n", - "57305 False manual_dynamic_filter \n", - "58843 False manual_static_filter \n", - "66813 False manual_static_filter \n", - "68367 False manual_dynamic_filter \n", - "77320 False manual_static_filter \n", - "84350 False manual_static_filter \n", - "85383 False manual_static_filter \n", - "92868 False manual_static_filter \n", - "99670 False manual_static_filter \n", - "105477 False manual_static_filter \n", - "169513 False manual_static_filter \n", - "214421 False manual_static_filter \n", - "234546 False manual_static_filter \n", - "259808 False manual_static_filter \n", - "274380 False manual_static_filter \n", - "307511 False manual_static_filter \n", - "357509 False manual_static_filter \n", - "392920 False manual_static_filter \n", - "449620 False manual_dynamic_filter \n", - "503809 False manual_static_filter \n", - "651222 False manual_static_filter \n", - "654246 False manual_static_filter \n", - "654395 False manual_static_filter " + " target_type_name \n", + "0 manual_static_filter \n", + "1 manual_static_filter \n", + "2 manual_static_filter \n", + "3 manual_static_filter \n", + "4 manual_static_filter " ] }, - "execution_count": 57, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df1_targets_full[df1_targets_full['customer_id'] == 1]" + "df1_targets_full.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "75fbc2f7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n", + "[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n", + "[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Catégorisation des target_name\n", + "import pandas as pd\n", + "import nltk\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.corpus import stopwords\n", + "from nltk.stem import WordNetLemmatizer\n", + "from nltk.probability import FreqDist\n", + "\n", + "# Téléchargement des ressources nécessaires\n", + "nltk.download('punkt')\n", + "nltk.download('stopwords')\n", + "nltk.download('wordnet')\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "55cddf92", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mots les plus fréquents:\n", + "consentement: 550777\n", + "optin: 463579\n", + "jeune: 155103\n", + "public: 155103\n", + "mediation: 150001\n" + ] + } + ], + "source": [ + "# Définition des fonctions de tokenisation, suppression des mots vides et lemmatisation\n", + "def preprocess_text(texte):\n", + " # Concaténation des éléments de la liste en une seule chaîne de caractères\n", + " texte_concat = ' '.join(texte)\n", + " \n", + " # Tokenisation des mots\n", + " tokens = word_tokenize(texte_concat.lower())\n", + " \n", + " # Suppression des mots vides (stopwords)\n", + " stop_words = set(stopwords.words('french'))\n", + " filtered_tokens = [word for word in tokens if word not in stop_words]\n", + " \n", + " # Lemmatisation des mots\n", + " lemmatizer = WordNetLemmatizer()\n", + " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n", + " \n", + " return lemmatized_tokens\n", + "\n", + "\n", + "# Appliquer le prétraitement à la colonne de texte\n", + "df1_targets_full['target_name_tokened'] = df1_targets_full['target_name'].apply(preprocess_text)\n", + "\n", + "# Concaténer les listes de mots pour obtenir une liste de tous les mots dans le corpus\n", + "all_words = [word for tokens in df1_targets_full['target_name_tokened'] for word in tokens]\n", + "\n", + "# Calculer la fréquence des mots\n", + "freq_dist = FreqDist(all_words)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "7fd98a85", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mots les plus fréquents:\n", + "consentement: 550777\n", + "optin: 463579\n", + "jeune: 155103\n", + "public: 155103\n", + "mediation: 150001\n", + "specialisee: 150001\n", + "b2c: 143432\n", + "optout: 97683\n", + "newsletter: 56022\n", + "(: 46084\n", + "): 46084\n", + "inscrits: 42296\n", + "nl: 42294\n", + "générale: 41037\n", + "generale: 40950\n" + ] + } + ], + "source": [ + "# Affichage des mots les plus fréquents\n", + "print(\"Mots les plus fréquents:\")\n", + "for mot, freq in freq_dist.most_common(15):\n", + " print(f\"{mot}: {freq}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "cf94bb1d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " texte \\\n", + "0 Le chat noir mange une souris. \n", + "1 Le chien blanc aboie. \n", + "\n", + " texte_preprocessed \n", + "0 [e, h, a, o, i, r, a, g, e, u, e, o, u, r, i, .] \n", + "1 [e, h, i, e, b, a, a, b, o, i, e, .] \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n", + "[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n", + "[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import nltk\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.corpus import stopwords\n", + "from nltk.stem import WordNetLemmatizer\n", + "\n", + "# Téléchargement des ressources nécessaires\n", + "nltk.download('punkt')\n", + "nltk.download('stopwords')\n", + "nltk.download('wordnet')\n", + "\n", + "# Création de la DataFrame d'exemple\n", + "data = {'texte': [\"Le chat noir mange une souris.\", \"Le chien blanc aboie.\"]}\n", + "df = pd.DataFrame(data)\n", + "\n", + "# Fonction pour prétraiter le texte\n", + "def preprocess_text(texte):\n", + " # Concaténation des éléments de la liste en une seule chaîne de caractères\n", + " texte_concat = ' '.join(texte)\n", + " \n", + " # Tokenisation des mots\n", + " tokens = word_tokenize(texte_concat.lower())\n", + " \n", + " # Suppression des mots vides (stopwords)\n", + " stop_words = set(stopwords.words('french'))\n", + " filtered_tokens = [word for word in tokens if word not in stop_words]\n", + " \n", + " # Lemmatisation des mots\n", + " lemmatizer = WordNetLemmatizer()\n", + " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n", + " \n", + " return lemmatized_tokens\n", + "\n", + "# Appliquer la fonction de prétraitement à la colonne de texte\n", + "df['texte_preprocessed'] = df['texte'].apply(preprocess_text)\n", + "\n", + "# Afficher le résultat\n", + "print(df)\n" ] }, { "cell_type": "markdown", - "id": "2f665824-a026-4acd-8358-b408a61854b4", + "id": "711d3884", "metadata": { "jp-MarkdownHeadingCollapsed": true }, @@ -3104,34 +955,9 @@ { "cell_type": "code", "execution_count": 52, - "id": "5d05203c-ea30-4208-a29f-fef7737c672e", + "id": "c25b5295", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n" - ] - } - ], + "outputs": [], "source": [ "# campaign_stats cleaning \n", "df1_campaign_stats_clean = df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n", @@ -3151,31 +977,9 @@ { "cell_type": "code", "execution_count": 53, - "id": "8ac634cf-2a30-4ccc-a34d-0fd401a49aaa", + "id": "2a3de6a5", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 6214808 entries, 0 to 6214807\n", - "Data columns (total 8 columns):\n", - " # Column Dtype \n", - "--- ------ ----- \n", - " 0 id int64 \n", - " 1 customer_id int64 \n", - " 2 opened_at datetime64[ns, UTC]\n", - " 3 sent_at datetime64[ns, UTC]\n", - " 4 delivered_at datetime64[ns, UTC]\n", - " 5 campaign_name object \n", - " 6 campaign_service_id int64 \n", - " 7 campaign_sent_at datetime64[ns, UTC]\n", - "dtypes: datetime64[ns, UTC](4), int64(3), object(1)\n", - "memory usage: 379.3+ MB\n" - ] - } - ], + "outputs": [], "source": [ "df1_campaigns_full.info()" ] @@ -3183,235 +987,16 @@ { "cell_type": "code", "execution_count": 56, - "id": "7d22cdd5-2060-4922-8e04-27b613d4ee27", + "id": "3fc1f446", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idopened_atsent_atdelivered_atcampaign_namecampaign_service_idcampaign_sent_at
019793112597NaT2021-03-28 16:01:09+00:002021-03-28 16:24:18+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
114211113666NaT2021-03-28 16:01:09+00:002021-03-28 16:21:02+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
213150280561NaT2021-03-28 16:00:59+00:002021-03-28 16:08:45+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
370731010072021-03-28 18:11:06+00:002021-03-28 16:00:59+00:002021-03-28 16:09:47+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
45175103972NaT2021-03-28 16:01:06+00:002021-03-28 16:05:03+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
...........................
621480383029942661552023-10-23 09:43:25+00:002023-10-23 09:32:33+00:002023-10-23 09:32:34+00:00dre_nov_202313182023-10-23 09:31:17+00:00
62148048303307213552023-10-23 09:44:02+00:002023-10-23 09:32:49+00:002023-10-23 09:32:49+00:00dre_nov_202313182023-10-23 09:31:17+00:00
62148058304346218492023-10-23 09:45:52+00:002023-10-23 09:33:28+00:002023-10-23 09:33:29+00:00dre_nov_202313182023-10-23 09:31:17+00:00
621480683020376677892023-10-23 09:47:32+00:002023-10-23 09:31:53+00:002023-10-23 09:31:54+00:00dre_nov_202313182023-10-23 09:31:17+00:00
62148078304939294154NaT2023-10-23 09:33:54+00:002023-10-23 09:33:55+00:00dre_nov_202313182023-10-23 09:31:17+00:00
\n", - "

6214808 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " id customer_id opened_at \\\n", - "0 19793 112597 NaT \n", - "1 14211 113666 NaT \n", - "2 13150 280561 NaT \n", - "3 7073 101007 2021-03-28 18:11:06+00:00 \n", - "4 5175 103972 NaT \n", - "... ... ... ... \n", - "6214803 8302994 266155 2023-10-23 09:43:25+00:00 \n", - "6214804 8303307 21355 2023-10-23 09:44:02+00:00 \n", - "6214805 8304346 21849 2023-10-23 09:45:52+00:00 \n", - "6214806 8302037 667789 2023-10-23 09:47:32+00:00 \n", - "6214807 8304939 294154 NaT \n", - "\n", - " sent_at delivered_at \\\n", - "0 2021-03-28 16:01:09+00:00 2021-03-28 16:24:18+00:00 \n", - "1 2021-03-28 16:01:09+00:00 2021-03-28 16:21:02+00:00 \n", - "2 2021-03-28 16:00:59+00:00 2021-03-28 16:08:45+00:00 \n", - "3 2021-03-28 16:00:59+00:00 2021-03-28 16:09:47+00:00 \n", - "4 2021-03-28 16:01:06+00:00 2021-03-28 16:05:03+00:00 \n", - "... ... ... \n", - "6214803 2023-10-23 09:32:33+00:00 2023-10-23 09:32:34+00:00 \n", - "6214804 2023-10-23 09:32:49+00:00 2023-10-23 09:32:49+00:00 \n", - "6214805 2023-10-23 09:33:28+00:00 2023-10-23 09:33:29+00:00 \n", - "6214806 2023-10-23 09:31:53+00:00 2023-10-23 09:31:54+00:00 \n", - "6214807 2023-10-23 09:33:54+00:00 2023-10-23 09:33:55+00:00 \n", - "\n", - " campaign_name campaign_service_id \\\n", - "0 Le Mucem chez vous, gardons le lien #22 404 \n", - "1 Le Mucem chez vous, gardons le lien #22 404 \n", - "2 Le Mucem chez vous, gardons le lien #22 404 \n", - "3 Le Mucem chez vous, gardons le lien #22 404 \n", - "4 Le Mucem chez vous, gardons le lien #22 404 \n", - "... ... ... \n", - "6214803 dre_nov_2023 1318 \n", - "6214804 dre_nov_2023 1318 \n", - "6214805 dre_nov_2023 1318 \n", - "6214806 dre_nov_2023 1318 \n", - "6214807 dre_nov_2023 1318 \n", - "\n", - " campaign_sent_at \n", - "0 2021-03-27 23:00:00+00:00 \n", - "1 2021-03-27 23:00:00+00:00 \n", - "2 2021-03-27 23:00:00+00:00 \n", - "3 2021-03-27 23:00:00+00:00 \n", - "4 2021-03-27 23:00:00+00:00 \n", - "... ... \n", - "6214803 2023-10-23 09:31:17+00:00 \n", - "6214804 2023-10-23 09:31:17+00:00 \n", - "6214805 2023-10-23 09:31:17+00:00 \n", - "6214806 2023-10-23 09:31:17+00:00 \n", - "6214807 2023-10-23 09:31:17+00:00 \n", - "\n", - "[6214808 rows x 8 columns]" - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_campaigns_information" ] }, { "cell_type": "markdown", - "id": "0a5b24f0-4bca-4cde-a6ba-eb130b38cac4", + "id": "20e69ee3", "metadata": { "jp-MarkdownHeadingCollapsed": true }, @@ -3422,264 +1007,9 @@ { "cell_type": "code", "execution_count": 37, - "id": "bc63bc4e-6cc1-4d35-9635-faf55339e186", + "id": "d9cbdbce", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
01319613newsletter enseignants janvier 20227212022-01-14 16:06:42.586321+01:002022-02-03 14:17:27.112963+01:00NaNNaN0.0Falseaba3b6fd5d186d28e06ff97135cade7f2022-01-14 00:00:00+01:00
11319586lsf_janvier_20227172022-01-07 11:30:35.315895+01:002022-02-03 14:17:27.116171+01:00NaNNaN0.0False788d986905533aba051261497ecffcbb2022-01-07 00:00:00+01:00
21319282Invitation à déjeuner au Mucem | Vernissage « ...5912021-09-28 12:50:24.448752+02:002022-02-03 14:17:27.119582+01:00NaNNaN0.0False3493894fa4ea036cfc6433c3e2ee63b02021-09-28 00:00:00+02:00
31319283Vacances de la Toussaint - centres des loisirs5902021-09-28 18:01:04.692073+02:002022-02-03 14:17:27.124408+01:00NaNNaN0.0False08b255a5d42b89b0585260b6f2360bdd2021-09-28 00:00:00+02:00
41319636ddcp_promo_md_livemag7302022-01-27 18:00:41.053069+01:002022-02-03 14:17:27.127607+01:00NaNNaN0.0Falsed5cfead94f5350c12c322b5b664544c12022-01-27 00:00:00+01:00
....................................
9521320072dre_gaza01068812022-05-26 09:01:35.523639+02:002022-12-02 17:51:22.614046+01:00NaNNaN0.0False7504adad8bb96320eb3afdd4df6e1f602022-05-26 00:00:00+02:00
953661398DDCP Plan Bis 4 - Marketing direct - MJ5C1832021-06-18 10:30:01.259578+02:002021-09-24 11:56:09.082785+02:00NaNNaN0.0Falsecedebb6e872f539bef8c3f919874e9d72020-07-27 00:00:00+02:00
9541320487Invitation portes ouvertes amitiés9882022-09-29 18:01:33.834090+02:002022-12-02 17:51:23.258324+01:00NaNNaN0.0False9908279ebbf1f9b250ba689db6a0222b2022-09-29 00:00:00+02:00
955906903DDCP PROMO La méditerranée des philosophes #3 ...3102021-07-19 14:07:16.177390+02:002021-09-24 11:56:09.086101+02:00NaNNaN0.0False06eb61b839a0cefee4967c67ccb099dc2020-12-23 00:00:00+01:00
956579313ddcp_promo_automation_manuel_pre_visit4812021-06-08 17:38:54.041310+02:002021-09-24 11:56:09.089394+02:00NaNNaN0.0False9461cce28ebe3e76fb4b931c35a169b02021-06-08 00:00:00+02:00
\n", - "

957 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " id name service_id \\\n", - "0 1319613 newsletter enseignants janvier 2022 721 \n", - "1 1319586 lsf_janvier_2022 717 \n", - "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n", - "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n", - "4 1319636 ddcp_promo_md_livemag 730 \n", - ".. ... ... ... \n", - "952 1320072 dre_gaza0106 881 \n", - "953 661398 DDCP Plan Bis 4 - Marketing direct - MJ5C 183 \n", - "954 1320487 Invitation portes ouvertes amitiés 988 \n", - "955 906903 DDCP PROMO La méditerranée des philosophes #3 ... 310 \n", - "956 579313 ddcp_promo_automation_manuel_pre_visit 481 \n", - "\n", - " created_at updated_at \\\n", - "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n", - "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n", - "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n", - "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n", - "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n", - ".. ... ... \n", - "952 2022-05-26 09:01:35.523639+02:00 2022-12-02 17:51:22.614046+01:00 \n", - "953 2021-06-18 10:30:01.259578+02:00 2021-09-24 11:56:09.082785+02:00 \n", - "954 2022-09-29 18:01:33.834090+02:00 2022-12-02 17:51:23.258324+01:00 \n", - "955 2021-07-19 14:07:16.177390+02:00 2021-09-24 11:56:09.086101+02:00 \n", - "956 2021-06-08 17:38:54.041310+02:00 2021-09-24 11:56:09.089394+02:00 \n", - "\n", - " process_id report_url category to_be_synced \\\n", - "0 NaN NaN 0.0 False \n", - "1 NaN NaN 0.0 False \n", - "2 NaN NaN 0.0 False \n", - "3 NaN NaN 0.0 False \n", - "4 NaN NaN 0.0 False \n", - ".. ... ... ... ... \n", - "952 NaN NaN 0.0 False \n", - "953 NaN NaN 0.0 False \n", - "954 NaN NaN 0.0 False \n", - "955 NaN NaN 0.0 False \n", - "956 NaN NaN 0.0 False \n", - "\n", - " identifier sent_at \n", - "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n", - "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n", - "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n", - "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n", - "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 \n", - ".. ... ... \n", - "952 7504adad8bb96320eb3afdd4df6e1f60 2022-05-26 00:00:00+02:00 \n", - "953 cedebb6e872f539bef8c3f919874e9d7 2020-07-27 00:00:00+02:00 \n", - "954 9908279ebbf1f9b250ba689db6a0222b 2022-09-29 00:00:00+02:00 \n", - "955 06eb61b839a0cefee4967c67ccb099dc 2020-12-23 00:00:00+01:00 \n", - "956 9461cce28ebe3e76fb4b931c35a169b0 2021-06-08 00:00:00+02:00 \n", - "\n", - "[957 rows x 11 columns]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_campaigns" ] @@ -3687,185 +1017,16 @@ { "cell_type": "code", "execution_count": 38, - "id": "c19b321f-65f9-4d6c-8c1f-edb2eb9d70e7", + "id": "c07459f0", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idclicked_atlink_idcustomer_idcreated_atupdated_at
012021-03-26 16:30:36+01:0012840332021-03-26 15:30:37.050161+01:002021-03-26 15:30:37.050161+01:00
122021-03-26 17:16:34+01:0021197682021-03-26 16:16:34.950871+01:002021-03-26 16:16:34.950871+01:00
22722021-03-28 20:03:32+02:00421131052021-03-28 18:03:32.736394+02:002021-03-28 18:03:32.736394+02:00
342021-03-26 17:43:19+01:0032722802021-03-26 16:43:19.338321+01:002021-03-26 16:43:19.338321+01:00
452021-03-26 17:46:00+01:0031050952021-03-26 16:46:00.502945+01:002021-03-26 16:46:00.502945+01:00
.....................
1510462435532023-11-09 16:34:27+01:00146669982023-11-09 15:34:29.425425+01:002023-11-09 15:34:29.425425+01:00
1510472435542023-11-09 16:34:35+01:00146709982023-11-09 15:34:37.505505+01:002023-11-09 15:34:37.505505+01:00
1510482435592023-11-09 16:51:15+01:0014686829232023-11-09 15:51:17.439518+01:002023-11-09 15:51:17.439518+01:00
1510492435612023-11-09 16:59:42+01:0014677829232023-11-09 15:59:44.030922+01:002023-11-09 15:59:44.030922+01:00
1510502435642023-11-09 17:16:41+01:001469112543552023-11-09 16:16:43.012932+01:002023-11-09 16:16:43.012932+01:00
\n", - "

151051 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " id clicked_at link_id customer_id \\\n", - "0 1 2021-03-26 16:30:36+01:00 1 284033 \n", - "1 2 2021-03-26 17:16:34+01:00 2 119768 \n", - "2 272 2021-03-28 20:03:32+02:00 42 113105 \n", - "3 4 2021-03-26 17:43:19+01:00 3 272280 \n", - "4 5 2021-03-26 17:46:00+01:00 3 105095 \n", - "... ... ... ... ... \n", - "151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n", - "151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n", - "151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n", - "151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n", - "151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n", - "\n", - " created_at updated_at \n", - "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n", - "1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n", - "2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n", - "3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n", - "4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n", - "... ... ... \n", - "151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n", - "151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n", - "151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n", - "151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n", - "151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n", - "\n", - "[151051 rows x 6 columns]" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1_link_stats" ] }, { "cell_type": "markdown", - "id": "96ea2523-38dc-47ef-a49e-2c2d9ad0b1c6", + "id": "80ae4c42", "metadata": {}, "source": [ "## Exploration variables" @@ -3874,7 +1035,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "aaa41688-ea7e-4dba-851c-1f0b0ec43c71", + "id": "b50b8f95", "metadata": {}, "outputs": [], "source": [ @@ -3897,7 +1058,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "2fecc2e1-113f-46ed-9065-0b9ee416166e", + "id": "7e292935", "metadata": {}, "outputs": [], "source": [ @@ -3907,7 +1068,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "55f6170a-36fb-4efb-9810-f982883660cf", + "id": "05b6f2b0", "metadata": {}, "outputs": [ { @@ -3966,7 +1127,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "0030fd02-09e3-42f5-9c83-290458a38c29", + "id": "c9324d80", "metadata": {}, "outputs": [], "source": [ @@ -3981,7 +1142,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "6b1736d1-8fd7-4fcc-9431-b8bf0c7b4f2b", + "id": "10304058", "metadata": {}, "outputs": [ { @@ -4005,7 +1166,7 @@ { "cell_type": "code", "execution_count": 32, - "id": "226b694b-0b00-4167-b69f-3178902254eb", + "id": "ffa423e5", "metadata": {}, "outputs": [], "source": [ @@ -4026,7 +1187,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f51d8836-6eef-47d5-873d-4327e12a3245", + "id": "70bdc88d", "metadata": {}, "outputs": [], "source": [] @@ -4034,7 +1195,7 @@ { "cell_type": "code", "execution_count": 45, - "id": "90b94363-a562-4633-ba27-622422e2368c", + "id": "6a0f567d", "metadata": {}, "outputs": [], "source": [ @@ -4053,7 +1214,7 @@ { "cell_type": "code", "execution_count": 63, - "id": "fedbfbd2-698b-4846-9618-84a3c8d087c7", + "id": "1522d8cd", "metadata": {}, "outputs": [], "source": [ @@ -4063,7 +1224,7 @@ { "cell_type": "code", "execution_count": 66, - "id": "8d365bb5-2ddc-4f68-b415-e21f960c2c0f", + "id": "b0e42a61", "metadata": {}, "outputs": [], "source": [ @@ -4077,7 +1238,7 @@ { "cell_type": "code", "execution_count": 68, - "id": "fc37348d-b282-42ad-b768-c882148d8f66", + "id": "d299ae91", "metadata": {}, "outputs": [ {