Ajout régression logistique

This commit is contained in:
Antoine JOUBREL 2024-02-12 22:49:13 +00:00
parent 197a847085
commit eea21201e9
3 changed files with 383 additions and 394 deletions

View File

@ -1710,7 +1710,7 @@
"def tickets_kpi_function(tickets_information = None):\n",
"\n",
" tickets_information_copy = tickets_information.copy()\n",
"\n",
" \n",
" # Dummy : Canal de vente en ligne\n",
" liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] # vad = vente à distance\n",
" tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n",
@ -2457,24 +2457,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 37,
"id": "a89fad43-ee68-4081-9384-3e9f08ec6a59",
"metadata": {},
"outputs": [],
"source": [
"df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
"print(\"shape : \", df1_customer_product.shape)\n",
"df1_customer_product.head()"
"# df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
"# print(\"shape : \", df1_customer_product.shape)\n",
"# df1_customer_product.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 38,
"id": "a19fec00-4ece-400c-937c-ce5cd8daccfd",
"metadata": {},
"outputs": [],
"source": [
"df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
"# df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
]
},
{
@ -2487,7 +2487,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 39,
"id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
"metadata": {},
"outputs": [],
@ -2501,7 +2501,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 40,
"id": "d53825e4-6453-45bc-94f2-7b2504ec4afb",
"metadata": {},
"outputs": [
@ -2707,7 +2707,7 @@
"[5 rows x 28 columns]"
]
},
"execution_count": 39,
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
@ -2718,7 +2718,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 41,
"id": "1e42a790-b215-4107-a969-85005da06ebd",
"metadata": {},
"outputs": [],
@ -2732,405 +2732,68 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 42,
"id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>event_type_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>...</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2.0</td>\n",
" <td>384226.0</td>\n",
" <td>194790.0</td>\n",
" <td>2686540.5</td>\n",
" <td>7.0</td>\n",
" <td>1.0</td>\n",
" <td>3262.190868</td>\n",
" <td>4.179306</td>\n",
" <td>3258.011562</td>\n",
" <td>...</td>\n",
" <td>1.956087</td>\n",
" <td>8821221.5</td>\n",
" <td>641472.0</td>\n",
" <td>2013-06-10 10:37:58+00:00</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>4.0</td>\n",
" <td>453242.0</td>\n",
" <td>228945.0</td>\n",
" <td>3248965.5</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>3698.198229</td>\n",
" <td>5.221840</td>\n",
" <td>3692.976389</td>\n",
" <td>...</td>\n",
" <td>1.956087</td>\n",
" <td>8821221.5</td>\n",
" <td>641472.0</td>\n",
" <td>2013-06-10 10:37:58+00:00</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>5.0</td>\n",
" <td>201750.0</td>\n",
" <td>107110.0</td>\n",
" <td>1459190.0</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>3803.369792</td>\n",
" <td>0.146331</td>\n",
" <td>3803.223461</td>\n",
" <td>...</td>\n",
" <td>1.956087</td>\n",
" <td>8821221.5</td>\n",
" <td>641472.0</td>\n",
" <td>2013-06-10 10:37:58+00:00</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>6.0</td>\n",
" <td>217356.0</td>\n",
" <td>111786.0</td>\n",
" <td>1435871.5</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>2502.715509</td>\n",
" <td>1408.715532</td>\n",
" <td>1093.999977</td>\n",
" <td>...</td>\n",
" <td>1.956087</td>\n",
" <td>8821221.5</td>\n",
" <td>641472.0</td>\n",
" <td>2013-06-10 10:37:58+00:00</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>2.0</td>\n",
" <td>143.0</td>\n",
" <td>143.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>2041.274549</td>\n",
" <td>1340.308160</td>\n",
" <td>700.966389</td>\n",
" <td>...</td>\n",
" <td>1.000000</td>\n",
" <td>0.0</td>\n",
" <td>307.0</td>\n",
" <td>2018-04-07 12:55:07+00:00</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156291</th>\n",
" <td>1256133</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>33.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.110521</td>\n",
" <td>0.110521</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156292</th>\n",
" <td>1256134</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>44.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.092095</td>\n",
" <td>0.092095</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156293</th>\n",
" <td>1256135</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.087894</td>\n",
" <td>0.087894</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156294</th>\n",
" <td>1256136</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>22.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.040394</td>\n",
" <td>0.040394</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156295</th>\n",
" <td>1256137</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>22.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>156296 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id event_type_id nb_tickets nb_purchases total_amount \\\n",
"0 1 2.0 384226.0 194790.0 2686540.5 \n",
"1 1 4.0 453242.0 228945.0 3248965.5 \n",
"2 1 5.0 201750.0 107110.0 1459190.0 \n",
"3 1 6.0 217356.0 111786.0 1435871.5 \n",
"4 2 2.0 143.0 143.0 0.0 \n",
"... ... ... ... ... ... \n",
"156291 1256133 5.0 3.0 1.0 33.0 \n",
"156292 1256134 5.0 4.0 1.0 44.0 \n",
"156293 1256135 5.0 1.0 1.0 11.0 \n",
"156294 1256136 5.0 2.0 1.0 22.0 \n",
"156295 1256137 5.0 2.0 1.0 22.0 \n",
"\n",
" nb_suppliers vente_internet_max purchase_date_min \\\n",
"0 7.0 1.0 3262.190868 \n",
"1 6.0 1.0 3698.198229 \n",
"2 6.0 1.0 3803.369792 \n",
"3 5.0 1.0 2502.715509 \n",
"4 1.0 0.0 2041.274549 \n",
"... ... ... ... \n",
"156291 1.0 1.0 0.110521 \n",
"156292 1.0 1.0 0.092095 \n",
"156293 1.0 1.0 0.087894 \n",
"156294 1.0 1.0 0.040394 \n",
"156295 1.0 1.0 0.000000 \n",
"\n",
" purchase_date_max time_between_purchase ... average_ticket_basket \\\n",
"0 4.179306 3258.011562 ... 1.956087 \n",
"1 5.221840 3692.976389 ... 1.956087 \n",
"2 0.146331 3803.223461 ... 1.956087 \n",
"3 1408.715532 1093.999977 ... 1.956087 \n",
"4 1340.308160 700.966389 ... 1.000000 \n",
"... ... ... ... ... \n",
"156291 0.110521 0.000000 ... NaN \n",
"156292 0.092095 0.000000 ... NaN \n",
"156293 0.087894 0.000000 ... NaN \n",
"156294 0.040394 0.000000 ... NaN \n",
"156295 0.000000 0.000000 ... NaN \n",
"\n",
" total_price purchase_count first_buying_date country age \\\n",
"0 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
"1 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
"2 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
"3 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
"4 0.0 307.0 2018-04-07 12:55:07+00:00 fr NaN \n",
"... ... ... ... ... .. \n",
"156291 NaN NaN NaT NaN NaN \n",
"156292 NaN NaN NaT NaN NaN \n",
"156293 NaN NaN NaT NaN NaN \n",
"156294 NaN NaN NaT NaN NaN \n",
"156295 NaN NaN NaT NaN NaN \n",
"\n",
" tenant_id nb_campaigns nb_campaigns_opened time_to_open \n",
"0 1311.0 0.0 0.0 NaT \n",
"1 1311.0 0.0 0.0 NaT \n",
"2 1311.0 0.0 0.0 NaT \n",
"3 1311.0 0.0 0.0 NaT \n",
"4 1311.0 4.0 0.0 NaT \n",
"... ... ... ... ... \n",
"156291 NaN NaN NaN NaT \n",
"156292 NaN NaN NaN NaT \n",
"156293 NaN NaN NaN NaT \n",
"156294 NaN NaN NaN NaT \n",
"156295 NaN NaN NaN NaT \n",
"\n",
"[156296 rows x 40 columns]"
"customer_id 0\n",
"event_type_id 78355\n",
"nb_tickets 0\n",
"nb_purchases 0\n",
"total_amount 0\n",
"nb_suppliers 0\n",
"vente_internet_max 0\n",
"purchase_date_min 78355\n",
"purchase_date_max 78355\n",
"time_between_purchase 78355\n",
"nb_tickets_internet 0\n",
"name_event_types 78355\n",
"avg_amount 78355\n",
"birthdate 149382\n",
"street_id 7\n",
"is_partner 7\n",
"gender 7\n",
"is_email_true 7\n",
"opt_in 7\n",
"structure_id 136874\n",
"profession 150011\n",
"language 155191\n",
"mcp_contact_id 53526\n",
"last_buying_date 78452\n",
"max_price 78452\n",
"ticket_sum 7\n",
"average_price 13127\n",
"fidelity 7\n",
"average_purchase_delay 78452\n",
"average_price_basket 78452\n",
"average_ticket_basket 78452\n",
"total_price 65332\n",
"purchase_count 7\n",
"first_buying_date 78452\n",
"country 8311\n",
"age 149382\n",
"tenant_id 7\n",
"nb_campaigns 7\n",
"nb_campaigns_opened 7\n",
"time_to_open 69024\n",
"dtype: int64"
]
},
"execution_count": 41,
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_customer_product"
"df1_customer_product.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 43,
"id": "ebf6d843-dcc0-4e83-b063-94806c0bac17",
"metadata": {},
"outputs": [],

View File

@ -80,3 +80,5 @@ def tickets_kpi_function(tickets_information = None):
return tickets_kpi

File diff suppressed because one or more lines are too long