This commit is contained in:
Fanta RODRIGUE 2024-03-05 01:44:01 +00:00
parent dbb90fb364
commit 1ec5b8743f

View File

@ -310,7 +310,50 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 10,
"id": "f8546992-f425-4d1e-ad75-ad26a8052a18",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'projet' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mprojet\u001b[49m\u001b[38;5;241m-\u001b[39mbdc2324\u001b[38;5;241m-\u001b[39mteam1\u001b[38;5;241m/\u001b[39mGeneralization\u001b[38;5;241m/\u001b[39mmusique\u001b[38;5;241m/\u001b[39mTrain_test\n",
"\u001b[0;31mNameError\u001b[0m: name 'projet' is not defined"
]
}
],
"source": [
"projet-bdc2324-team1/Generalization/musique/Train_test"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "0dd34710-6da2-4438-9e1d-0ac092c1d28c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(343126, 41)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "a3bfeeb6-2db0-4f1d-866c-8721343e97c5", "id": "a3bfeeb6-2db0-4f1d-866c-8721343e97c5",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -361,7 +404,7 @@
"dtype: float64" "dtype: float64"
] ]
}, },
"execution_count": 6, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -372,422 +415,25 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 8,
"id": "a4c4e994-231b-4467-aa1b-0a5283c59dd5", "id": "75f9a672-641f-49a2-a8d6-7673845506f5",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>14_1</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>70.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1464.938449</td>\n",
" <td>1464.938021</td>\n",
" <td>0.000428</td>\n",
" <td>2.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>9.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 00:36:13</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>14_2</td>\n",
" <td>7.0</td>\n",
" <td>3.0</td>\n",
" <td>145.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>1466.030116</td>\n",
" <td>365.335000</td>\n",
" <td>1100.695116</td>\n",
" <td>7.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>9.0</td>\n",
" <td>4.0</td>\n",
" <td>0 days 02:30:09.250000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>14_3</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>70.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1476.907894</td>\n",
" <td>1476.907662</td>\n",
" <td>0.000231</td>\n",
" <td>2.0</td>\n",
" <td>...</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 20:58:45</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14_4</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>32.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1465.907894</td>\n",
" <td>1465.907465</td>\n",
" <td>0.000428</td>\n",
" <td>2.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14_5</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>70.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1465.373866</td>\n",
" <td>1465.373819</td>\n",
" <td>0.000046</td>\n",
" <td>2.0</td>\n",
" <td>...</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>7.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343121</th>\n",
" <td>14_6884748</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343122</th>\n",
" <td>14_6884749</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343123</th>\n",
" <td>14_6884750</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343124</th>\n",
" <td>14_6884751</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343125</th>\n",
" <td>14_6884753</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>343126 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 14_1 2.0 2.0 70.0 1.0 \n",
"1 14_2 7.0 3.0 145.0 2.0 \n",
"2 14_3 2.0 2.0 70.0 1.0 \n",
"3 14_4 2.0 2.0 32.0 1.0 \n",
"4 14_5 2.0 2.0 70.0 1.0 \n",
"... ... ... ... ... ... \n",
"343121 14_6884748 0.0 0.0 0.0 0.0 \n",
"343122 14_6884749 0.0 0.0 0.0 0.0 \n",
"343123 14_6884750 0.0 0.0 0.0 0.0 \n",
"343124 14_6884751 0.0 0.0 0.0 0.0 \n",
"343125 14_6884753 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 1.0 1464.938449 1464.938021 \n",
"1 1.0 1466.030116 365.335000 \n",
"2 1.0 1476.907894 1476.907662 \n",
"3 1.0 1465.907894 1465.907465 \n",
"4 1.0 1465.373866 1465.373819 \n",
"... ... ... ... \n",
"343121 0.0 NaN NaN \n",
"343122 0.0 NaN NaN \n",
"343123 0.0 NaN NaN \n",
"343124 0.0 NaN NaN \n",
"343125 0.0 NaN NaN \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
"0 0.000428 2.0 ... other \n",
"1 1100.695116 7.0 ... male \n",
"2 0.000231 2.0 ... female \n",
"3 0.000428 2.0 ... male \n",
"4 0.000046 2.0 ... female \n",
"... ... ... ... ... \n",
"343121 NaN 0.0 ... male \n",
"343122 NaN 0.0 ... male \n",
"343123 NaN 0.0 ... male \n",
"343124 NaN 0.0 ... female \n",
"343125 NaN 0.0 ... male \n",
"\n",
" gender_female gender_male gender_other country_fr has_tags \\\n",
"0 0 0 1 1.0 0 \n",
"1 0 1 0 1.0 1 \n",
"2 1 0 0 1.0 0 \n",
"3 0 1 0 1.0 0 \n",
"4 1 0 0 1.0 0 \n",
"... ... ... ... ... ... \n",
"343121 0 1 0 1.0 0 \n",
"343122 0 1 0 1.0 0 \n",
"343123 0 1 0 1.0 0 \n",
"343124 1 0 0 1.0 0 \n",
"343125 0 1 0 1.0 0 \n",
"\n",
" nb_campaigns nb_campaigns_opened time_to_open \\\n",
"0 9.0 1.0 0 days 00:36:13 \n",
"1 9.0 4.0 0 days 02:30:09.250000 \n",
"2 6.0 1.0 0 days 20:58:45 \n",
"3 6.0 0.0 NaT \n",
"4 7.0 0.0 NaT \n",
"... ... ... ... \n",
"343121 0.0 0.0 NaT \n",
"343122 0.0 0.0 NaT \n",
"343123 0.0 0.0 NaT \n",
"343124 0.0 0.0 NaT \n",
"343125 0.0 0.0 NaT \n",
"\n",
" y_has_purchased \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"343121 NaN \n",
"343122 NaN \n",
"343123 NaN \n",
"343124 NaN \n",
"343125 NaN \n",
"\n",
"[343126 rows x 41 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"dataset_test" "#Creation de la variable dependante fictive: 1 si l'individu a effectué un achat au cours de la periode de train et 0 sinon\n",
"\n",
"dataset_train_modif=dataset_train\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "75f9a672-641f-49a2-a8d6-7673845506f5", "id": "c121c1e2-d8e4-4b93-a882-9385581b63c9",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"dataset_train_modif[\""
]
} }
], ],
"metadata": { "metadata": {