Ajout régression logistique

This commit is contained in:
Antoine JOUBREL 2024-02-12 22:49:13 +00:00
parent 197a847085
commit eea21201e9
3 changed files with 383 additions and 394 deletions

View File

@ -1710,7 +1710,7 @@
"def tickets_kpi_function(tickets_information = None):\n", "def tickets_kpi_function(tickets_information = None):\n",
"\n", "\n",
" tickets_information_copy = tickets_information.copy()\n", " tickets_information_copy = tickets_information.copy()\n",
"\n", " \n",
" # Dummy : Canal de vente en ligne\n", " # Dummy : Canal de vente en ligne\n",
" liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] # vad = vente à distance\n", " liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] # vad = vente à distance\n",
" tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n", " tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n",
@ -2457,24 +2457,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 37,
"id": "a89fad43-ee68-4081-9384-3e9f08ec6a59", "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n", "# df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
"print(\"shape : \", df1_customer_product.shape)\n", "# print(\"shape : \", df1_customer_product.shape)\n",
"df1_customer_product.head()" "# df1_customer_product.head()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 38,
"id": "a19fec00-4ece-400c-937c-ce5cd8daccfd", "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"df1_customer_product.to_csv(\"customer_product.csv\", index = False)" "# df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
] ]
}, },
{ {
@ -2487,7 +2487,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 38, "execution_count": 39,
"id": "46de1912-4a66-46e5-8b9e-7768b2d2723b", "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -2501,7 +2501,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "execution_count": 40,
"id": "d53825e4-6453-45bc-94f2-7b2504ec4afb", "id": "d53825e4-6453-45bc-94f2-7b2504ec4afb",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -2707,7 +2707,7 @@
"[5 rows x 28 columns]" "[5 rows x 28 columns]"
] ]
}, },
"execution_count": 39, "execution_count": 40,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -2718,7 +2718,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": 41,
"id": "1e42a790-b215-4107-a969-85005da06ebd", "id": "1e42a790-b215-4107-a969-85005da06ebd",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -2732,405 +2732,68 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 41, "execution_count": 42,
"id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f", "id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>event_type_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>...</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2.0</td>\n",
" <td>384226.0</td>\n",
" <td>194790.0</td>\n",
" <td>2686540.5</td>\n",
" <td>7.0</td>\n",
" <td>1.0</td>\n",
" <td>3262.190868</td>\n",
" <td>4.179306</td>\n",
" <td>3258.011562</td>\n",
" <td>...</td>\n",
" <td>1.956087</td>\n",
" <td>8821221.5</td>\n",
" <td>641472.0</td>\n",
" <td>2013-06-10 10:37:58+00:00</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>4.0</td>\n",
" <td>453242.0</td>\n",
" <td>228945.0</td>\n",
" <td>3248965.5</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>3698.198229</td>\n",
" <td>5.221840</td>\n",
" <td>3692.976389</td>\n",
" <td>...</td>\n",
" <td>1.956087</td>\n",
" <td>8821221.5</td>\n",
" <td>641472.0</td>\n",
" <td>2013-06-10 10:37:58+00:00</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>5.0</td>\n",
" <td>201750.0</td>\n",
" <td>107110.0</td>\n",
" <td>1459190.0</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>3803.369792</td>\n",
" <td>0.146331</td>\n",
" <td>3803.223461</td>\n",
" <td>...</td>\n",
" <td>1.956087</td>\n",
" <td>8821221.5</td>\n",
" <td>641472.0</td>\n",
" <td>2013-06-10 10:37:58+00:00</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>6.0</td>\n",
" <td>217356.0</td>\n",
" <td>111786.0</td>\n",
" <td>1435871.5</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>2502.715509</td>\n",
" <td>1408.715532</td>\n",
" <td>1093.999977</td>\n",
" <td>...</td>\n",
" <td>1.956087</td>\n",
" <td>8821221.5</td>\n",
" <td>641472.0</td>\n",
" <td>2013-06-10 10:37:58+00:00</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>2.0</td>\n",
" <td>143.0</td>\n",
" <td>143.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>2041.274549</td>\n",
" <td>1340.308160</td>\n",
" <td>700.966389</td>\n",
" <td>...</td>\n",
" <td>1.000000</td>\n",
" <td>0.0</td>\n",
" <td>307.0</td>\n",
" <td>2018-04-07 12:55:07+00:00</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156291</th>\n",
" <td>1256133</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>33.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.110521</td>\n",
" <td>0.110521</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156292</th>\n",
" <td>1256134</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>44.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.092095</td>\n",
" <td>0.092095</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156293</th>\n",
" <td>1256135</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.087894</td>\n",
" <td>0.087894</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156294</th>\n",
" <td>1256136</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>22.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.040394</td>\n",
" <td>0.040394</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156295</th>\n",
" <td>1256137</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>22.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>156296 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [ "text/plain": [
" customer_id event_type_id nb_tickets nb_purchases total_amount \\\n", "customer_id 0\n",
"0 1 2.0 384226.0 194790.0 2686540.5 \n", "event_type_id 78355\n",
"1 1 4.0 453242.0 228945.0 3248965.5 \n", "nb_tickets 0\n",
"2 1 5.0 201750.0 107110.0 1459190.0 \n", "nb_purchases 0\n",
"3 1 6.0 217356.0 111786.0 1435871.5 \n", "total_amount 0\n",
"4 2 2.0 143.0 143.0 0.0 \n", "nb_suppliers 0\n",
"... ... ... ... ... ... \n", "vente_internet_max 0\n",
"156291 1256133 5.0 3.0 1.0 33.0 \n", "purchase_date_min 78355\n",
"156292 1256134 5.0 4.0 1.0 44.0 \n", "purchase_date_max 78355\n",
"156293 1256135 5.0 1.0 1.0 11.0 \n", "time_between_purchase 78355\n",
"156294 1256136 5.0 2.0 1.0 22.0 \n", "nb_tickets_internet 0\n",
"156295 1256137 5.0 2.0 1.0 22.0 \n", "name_event_types 78355\n",
"\n", "avg_amount 78355\n",
" nb_suppliers vente_internet_max purchase_date_min \\\n", "birthdate 149382\n",
"0 7.0 1.0 3262.190868 \n", "street_id 7\n",
"1 6.0 1.0 3698.198229 \n", "is_partner 7\n",
"2 6.0 1.0 3803.369792 \n", "gender 7\n",
"3 5.0 1.0 2502.715509 \n", "is_email_true 7\n",
"4 1.0 0.0 2041.274549 \n", "opt_in 7\n",
"... ... ... ... \n", "structure_id 136874\n",
"156291 1.0 1.0 0.110521 \n", "profession 150011\n",
"156292 1.0 1.0 0.092095 \n", "language 155191\n",
"156293 1.0 1.0 0.087894 \n", "mcp_contact_id 53526\n",
"156294 1.0 1.0 0.040394 \n", "last_buying_date 78452\n",
"156295 1.0 1.0 0.000000 \n", "max_price 78452\n",
"\n", "ticket_sum 7\n",
" purchase_date_max time_between_purchase ... average_ticket_basket \\\n", "average_price 13127\n",
"0 4.179306 3258.011562 ... 1.956087 \n", "fidelity 7\n",
"1 5.221840 3692.976389 ... 1.956087 \n", "average_purchase_delay 78452\n",
"2 0.146331 3803.223461 ... 1.956087 \n", "average_price_basket 78452\n",
"3 1408.715532 1093.999977 ... 1.956087 \n", "average_ticket_basket 78452\n",
"4 1340.308160 700.966389 ... 1.000000 \n", "total_price 65332\n",
"... ... ... ... ... \n", "purchase_count 7\n",
"156291 0.110521 0.000000 ... NaN \n", "first_buying_date 78452\n",
"156292 0.092095 0.000000 ... NaN \n", "country 8311\n",
"156293 0.087894 0.000000 ... NaN \n", "age 149382\n",
"156294 0.040394 0.000000 ... NaN \n", "tenant_id 7\n",
"156295 0.000000 0.000000 ... NaN \n", "nb_campaigns 7\n",
"\n", "nb_campaigns_opened 7\n",
" total_price purchase_count first_buying_date country age \\\n", "time_to_open 69024\n",
"0 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n", "dtype: int64"
"1 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
"2 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
"3 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
"4 0.0 307.0 2018-04-07 12:55:07+00:00 fr NaN \n",
"... ... ... ... ... .. \n",
"156291 NaN NaN NaT NaN NaN \n",
"156292 NaN NaN NaT NaN NaN \n",
"156293 NaN NaN NaT NaN NaN \n",
"156294 NaN NaN NaT NaN NaN \n",
"156295 NaN NaN NaT NaN NaN \n",
"\n",
" tenant_id nb_campaigns nb_campaigns_opened time_to_open \n",
"0 1311.0 0.0 0.0 NaT \n",
"1 1311.0 0.0 0.0 NaT \n",
"2 1311.0 0.0 0.0 NaT \n",
"3 1311.0 0.0 0.0 NaT \n",
"4 1311.0 4.0 0.0 NaT \n",
"... ... ... ... ... \n",
"156291 NaN NaN NaN NaT \n",
"156292 NaN NaN NaN NaT \n",
"156293 NaN NaN NaN NaT \n",
"156294 NaN NaN NaN NaT \n",
"156295 NaN NaN NaN NaT \n",
"\n",
"[156296 rows x 40 columns]"
] ]
}, },
"execution_count": 41, "execution_count": 42,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"df1_customer_product" "df1_customer_product.isna().sum()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 42, "execution_count": 43,
"id": "ebf6d843-dcc0-4e83-b063-94806c0bac17", "id": "ebf6d843-dcc0-4e83-b063-94806c0bac17",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],

View File

@ -79,4 +79,6 @@ def tickets_kpi_function(tickets_information = None):
tickets_kpi = tickets_kpi.merge(avg_amount, how='left', on= 'event_type_id') tickets_kpi = tickets_kpi.merge(avg_amount, how='left', on= 'event_type_id')
return tickets_kpi return tickets_kpi

File diff suppressed because one or more lines are too long