Ajout régression logistique
This commit is contained in:
parent
197a847085
commit
eea21201e9
|
@ -2457,24 +2457,24 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 37,
|
||||
"id": "a89fad43-ee68-4081-9384-3e9f08ec6a59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
|
||||
"print(\"shape : \", df1_customer_product.shape)\n",
|
||||
"df1_customer_product.head()"
|
||||
"# df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
|
||||
"# print(\"shape : \", df1_customer_product.shape)\n",
|
||||
"# df1_customer_product.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 38,
|
||||
"id": "a19fec00-4ece-400c-937c-ce5cd8daccfd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
|
||||
"# df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2487,7 +2487,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 39,
|
||||
"id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -2501,7 +2501,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 40,
|
||||
"id": "d53825e4-6453-45bc-94f2-7b2504ec4afb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -2707,7 +2707,7 @@
|
|||
"[5 rows x 28 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 39,
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -2718,7 +2718,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 41,
|
||||
"id": "1e42a790-b215-4107-a969-85005da06ebd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -2732,405 +2732,68 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 42,
|
||||
"id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>customer_id</th>\n",
|
||||
" <th>event_type_id</th>\n",
|
||||
" <th>nb_tickets</th>\n",
|
||||
" <th>nb_purchases</th>\n",
|
||||
" <th>total_amount</th>\n",
|
||||
" <th>nb_suppliers</th>\n",
|
||||
" <th>vente_internet_max</th>\n",
|
||||
" <th>purchase_date_min</th>\n",
|
||||
" <th>purchase_date_max</th>\n",
|
||||
" <th>time_between_purchase</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>average_ticket_basket</th>\n",
|
||||
" <th>total_price</th>\n",
|
||||
" <th>purchase_count</th>\n",
|
||||
" <th>first_buying_date</th>\n",
|
||||
" <th>country</th>\n",
|
||||
" <th>age</th>\n",
|
||||
" <th>tenant_id</th>\n",
|
||||
" <th>nb_campaigns</th>\n",
|
||||
" <th>nb_campaigns_opened</th>\n",
|
||||
" <th>time_to_open</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>384226.0</td>\n",
|
||||
" <td>194790.0</td>\n",
|
||||
" <td>2686540.5</td>\n",
|
||||
" <td>7.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>3262.190868</td>\n",
|
||||
" <td>4.179306</td>\n",
|
||||
" <td>3258.011562</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.956087</td>\n",
|
||||
" <td>8821221.5</td>\n",
|
||||
" <td>641472.0</td>\n",
|
||||
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
||||
" <td>fr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1311.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>453242.0</td>\n",
|
||||
" <td>228945.0</td>\n",
|
||||
" <td>3248965.5</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>3698.198229</td>\n",
|
||||
" <td>5.221840</td>\n",
|
||||
" <td>3692.976389</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.956087</td>\n",
|
||||
" <td>8821221.5</td>\n",
|
||||
" <td>641472.0</td>\n",
|
||||
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
||||
" <td>fr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1311.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>201750.0</td>\n",
|
||||
" <td>107110.0</td>\n",
|
||||
" <td>1459190.0</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>3803.369792</td>\n",
|
||||
" <td>0.146331</td>\n",
|
||||
" <td>3803.223461</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.956087</td>\n",
|
||||
" <td>8821221.5</td>\n",
|
||||
" <td>641472.0</td>\n",
|
||||
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
||||
" <td>fr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1311.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>217356.0</td>\n",
|
||||
" <td>111786.0</td>\n",
|
||||
" <td>1435871.5</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2502.715509</td>\n",
|
||||
" <td>1408.715532</td>\n",
|
||||
" <td>1093.999977</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.956087</td>\n",
|
||||
" <td>8821221.5</td>\n",
|
||||
" <td>641472.0</td>\n",
|
||||
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
||||
" <td>fr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1311.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>143.0</td>\n",
|
||||
" <td>143.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>2041.274549</td>\n",
|
||||
" <td>1340.308160</td>\n",
|
||||
" <td>700.966389</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>307.0</td>\n",
|
||||
" <td>2018-04-07 12:55:07+00:00</td>\n",
|
||||
" <td>fr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1311.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>156291</th>\n",
|
||||
" <td>1256133</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>33.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.110521</td>\n",
|
||||
" <td>0.110521</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>156292</th>\n",
|
||||
" <td>1256134</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>44.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.092095</td>\n",
|
||||
" <td>0.092095</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>156293</th>\n",
|
||||
" <td>1256135</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>11.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.087894</td>\n",
|
||||
" <td>0.087894</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>156294</th>\n",
|
||||
" <td>1256136</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>22.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.040394</td>\n",
|
||||
" <td>0.040394</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>156295</th>\n",
|
||||
" <td>1256137</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>22.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>156296 rows × 40 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" customer_id event_type_id nb_tickets nb_purchases total_amount \\\n",
|
||||
"0 1 2.0 384226.0 194790.0 2686540.5 \n",
|
||||
"1 1 4.0 453242.0 228945.0 3248965.5 \n",
|
||||
"2 1 5.0 201750.0 107110.0 1459190.0 \n",
|
||||
"3 1 6.0 217356.0 111786.0 1435871.5 \n",
|
||||
"4 2 2.0 143.0 143.0 0.0 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"156291 1256133 5.0 3.0 1.0 33.0 \n",
|
||||
"156292 1256134 5.0 4.0 1.0 44.0 \n",
|
||||
"156293 1256135 5.0 1.0 1.0 11.0 \n",
|
||||
"156294 1256136 5.0 2.0 1.0 22.0 \n",
|
||||
"156295 1256137 5.0 2.0 1.0 22.0 \n",
|
||||
"\n",
|
||||
" nb_suppliers vente_internet_max purchase_date_min \\\n",
|
||||
"0 7.0 1.0 3262.190868 \n",
|
||||
"1 6.0 1.0 3698.198229 \n",
|
||||
"2 6.0 1.0 3803.369792 \n",
|
||||
"3 5.0 1.0 2502.715509 \n",
|
||||
"4 1.0 0.0 2041.274549 \n",
|
||||
"... ... ... ... \n",
|
||||
"156291 1.0 1.0 0.110521 \n",
|
||||
"156292 1.0 1.0 0.092095 \n",
|
||||
"156293 1.0 1.0 0.087894 \n",
|
||||
"156294 1.0 1.0 0.040394 \n",
|
||||
"156295 1.0 1.0 0.000000 \n",
|
||||
"\n",
|
||||
" purchase_date_max time_between_purchase ... average_ticket_basket \\\n",
|
||||
"0 4.179306 3258.011562 ... 1.956087 \n",
|
||||
"1 5.221840 3692.976389 ... 1.956087 \n",
|
||||
"2 0.146331 3803.223461 ... 1.956087 \n",
|
||||
"3 1408.715532 1093.999977 ... 1.956087 \n",
|
||||
"4 1340.308160 700.966389 ... 1.000000 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"156291 0.110521 0.000000 ... NaN \n",
|
||||
"156292 0.092095 0.000000 ... NaN \n",
|
||||
"156293 0.087894 0.000000 ... NaN \n",
|
||||
"156294 0.040394 0.000000 ... NaN \n",
|
||||
"156295 0.000000 0.000000 ... NaN \n",
|
||||
"\n",
|
||||
" total_price purchase_count first_buying_date country age \\\n",
|
||||
"0 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
|
||||
"1 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
|
||||
"2 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
|
||||
"3 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
|
||||
"4 0.0 307.0 2018-04-07 12:55:07+00:00 fr NaN \n",
|
||||
"... ... ... ... ... .. \n",
|
||||
"156291 NaN NaN NaT NaN NaN \n",
|
||||
"156292 NaN NaN NaT NaN NaN \n",
|
||||
"156293 NaN NaN NaT NaN NaN \n",
|
||||
"156294 NaN NaN NaT NaN NaN \n",
|
||||
"156295 NaN NaN NaT NaN NaN \n",
|
||||
"\n",
|
||||
" tenant_id nb_campaigns nb_campaigns_opened time_to_open \n",
|
||||
"0 1311.0 0.0 0.0 NaT \n",
|
||||
"1 1311.0 0.0 0.0 NaT \n",
|
||||
"2 1311.0 0.0 0.0 NaT \n",
|
||||
"3 1311.0 0.0 0.0 NaT \n",
|
||||
"4 1311.0 4.0 0.0 NaT \n",
|
||||
"... ... ... ... ... \n",
|
||||
"156291 NaN NaN NaN NaT \n",
|
||||
"156292 NaN NaN NaN NaT \n",
|
||||
"156293 NaN NaN NaN NaT \n",
|
||||
"156294 NaN NaN NaN NaT \n",
|
||||
"156295 NaN NaN NaN NaT \n",
|
||||
"\n",
|
||||
"[156296 rows x 40 columns]"
|
||||
"customer_id 0\n",
|
||||
"event_type_id 78355\n",
|
||||
"nb_tickets 0\n",
|
||||
"nb_purchases 0\n",
|
||||
"total_amount 0\n",
|
||||
"nb_suppliers 0\n",
|
||||
"vente_internet_max 0\n",
|
||||
"purchase_date_min 78355\n",
|
||||
"purchase_date_max 78355\n",
|
||||
"time_between_purchase 78355\n",
|
||||
"nb_tickets_internet 0\n",
|
||||
"name_event_types 78355\n",
|
||||
"avg_amount 78355\n",
|
||||
"birthdate 149382\n",
|
||||
"street_id 7\n",
|
||||
"is_partner 7\n",
|
||||
"gender 7\n",
|
||||
"is_email_true 7\n",
|
||||
"opt_in 7\n",
|
||||
"structure_id 136874\n",
|
||||
"profession 150011\n",
|
||||
"language 155191\n",
|
||||
"mcp_contact_id 53526\n",
|
||||
"last_buying_date 78452\n",
|
||||
"max_price 78452\n",
|
||||
"ticket_sum 7\n",
|
||||
"average_price 13127\n",
|
||||
"fidelity 7\n",
|
||||
"average_purchase_delay 78452\n",
|
||||
"average_price_basket 78452\n",
|
||||
"average_ticket_basket 78452\n",
|
||||
"total_price 65332\n",
|
||||
"purchase_count 7\n",
|
||||
"first_buying_date 78452\n",
|
||||
"country 8311\n",
|
||||
"age 149382\n",
|
||||
"tenant_id 7\n",
|
||||
"nb_campaigns 7\n",
|
||||
"nb_campaigns_opened 7\n",
|
||||
"time_to_open 69024\n",
|
||||
"dtype: int64"
|
||||
]
|
||||
},
|
||||
"execution_count": 41,
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df1_customer_product"
|
||||
"df1_customer_product.isna().sum()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 43,
|
||||
"id": "ebf6d843-dcc0-4e83-b063-94806c0bac17",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
|
@ -80,3 +80,5 @@ def tickets_kpi_function(tickets_information = None):
|
|||
|
||||
return tickets_kpi
|
||||
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user