This commit is contained in:
Fanta RODRIGUE 2024-03-05 01:43:40 +00:00
parent 66754f957e
commit dbb90fb364

View File

@ -3855,163 +3855,74 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 229, "execution_count": 22,
"id": "4c02ae98-9184-41d9-a9ea-ed9eaf719bd3", "id": "d64979ba-fccf-45f2-8a15-40ef1b49c74f",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "name": "stderr",
"text/html": [ "output_type": "stream",
"<div>\n", "text": [
"<style scoped>\n", "/tmp/ipykernel_430/3239820253.py:6: DtypeWarning: Columns (39) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" .dataframe tbody tr th:only-of-type {\n", " dataset_train = pd.read_csv(file_in, sep=\",\")\n"
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19482</td>\n",
" <td>88</td>\n",
" <td>29</td>\n",
" <td>872.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2643.092500</td>\n",
" <td>718.149398</td>\n",
" <td>1924.943102</td>\n",
" <td>8.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19484</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>62.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1745.021736</td>\n",
" <td>1743.045035</td>\n",
" <td>1.976701</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19485</td>\n",
" <td>131</td>\n",
" <td>21</td>\n",
" <td>1878.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2649.044745</td>\n",
" <td>85.240845</td>\n",
" <td>2563.803900</td>\n",
" <td>84.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19486</td>\n",
" <td>10</td>\n",
" <td>4</td>\n",
" <td>96.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1944.077604</td>\n",
" <td>1742.794225</td>\n",
" <td>201.283380</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19487</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>33.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1742.877766</td>\n",
" <td>1742.877766</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 19482 88 29 872.0 2 \n",
"1 19484 3 2 62.0 1 \n",
"2 19485 131 21 1878.0 2 \n",
"3 19486 10 4 96.0 1 \n",
"4 19487 2 1 33.0 1 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 1 2643.092500 718.149398 \n",
"1 0 1745.021736 1743.045035 \n",
"2 1 2649.044745 85.240845 \n",
"3 0 1944.077604 1742.794225 \n",
"4 0 1742.877766 1742.877766 \n",
"\n",
" time_between_purchase nb_tickets_internet number_compagny \n",
"0 1924.943102 8.0 10 \n",
"1 1.976701 0.0 10 \n",
"2 2563.803900 84.0 10 \n",
"3 201.283380 0.0 10 \n",
"4 0.000000 0.0 10 "
] ]
},
"execution_count": 229,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
"products_purchased_reduced_spectacle.head()" "#base d'entrainement\n",
"\n",
"#FILE_PATH_S3='projet-bdc2324-team1/Generalization/musique/Train_test/dataset_train14.csv'\n",
"\n",
"#with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" #dataset_train = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "635d60cd-2dbc-49da-b0f4-94e16667882f",
"metadata": {},
"outputs": [],
"source": [
"#Creation de la variable dependante fictive: 1 si l'individu a effectué un achat au cours de la periode de train et 0 sinon\n",
"\n",
"#dataset_train_modif=dataset_train\n",
"\n",
"#dataset_train_modif[\"y_purchase_fictive\"]=np.random.randint(2, size=dataset_train_modif.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "ea63e5d6-70f9-4685-8b08-673a47108954",
"metadata": {},
"outputs": [],
"source": [
"#dataset_train_modif[\"y_purchase_fictive\"].value_counts(normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "98f7645f-ffe6-4d7b-8032-15e65f36af87",
"metadata": {},
"outputs": [],
"source": [
"\n",
"#dataset_train_modif[\"y_purchase_fictive\"]=dataset_train_modif[\"y_purchase_fictive\"].replace([0,1],[\"Purchase_train\",\"no_purchase_train\"])"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "e1a95b8f-6539-48bd-b09d-6f8f63d25fb2", "id": "eb6355e0-3f8c-47d9-a5ee-d349040dcf51",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#nombre de ticket par compagnie\n", "#repartion Chiffre d'affaire selon le numero de la compagnie\n",
"\n", "\n",
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n", "sns.boxplot(data=chiffre_affaire_categ, y=\"chiffre_affaire\",x=\"categ\",showfliers=False,showmeans=True)\n",
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n", "plt.title(\"Boite à moustache du chiffre d'affaire selon les categories de livre\")"
"company_campaigns_stats"
] ]
}, },
{ {
@ -4240,7 +4151,15 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "0a180f0a-c6de-4e66-9ae8-fdbfdf8837c9", "id": "93a8ae1f-6fbd-4210-a857-728ae472d1c5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "904cbf32-77b6-49dd-a96c-9e7e5a0175c3",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []