diff --git a/Spectacle/Stat_desc.ipynb b/Spectacle/Stat_desc.ipynb
index 2fb2c0d..c8e2dce 100644
--- a/Spectacle/Stat_desc.ipynb
+++ b/Spectacle/Stat_desc.ipynb
@@ -3855,163 +3855,74 @@
},
{
"cell_type": "code",
- "execution_count": 229,
- "id": "4c02ae98-9184-41d9-a9ea-ed9eaf719bd3",
+ "execution_count": 22,
+ "id": "d64979ba-fccf-45f2-8a15-40ef1b49c74f",
"metadata": {},
"outputs": [
{
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- " nb_tickets | \n",
- " nb_purchases | \n",
- " total_amount | \n",
- " nb_suppliers | \n",
- " vente_internet_max | \n",
- " purchase_date_min | \n",
- " purchase_date_max | \n",
- " time_between_purchase | \n",
- " nb_tickets_internet | \n",
- " number_compagny | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 19482 | \n",
- " 88 | \n",
- " 29 | \n",
- " 872.0 | \n",
- " 2 | \n",
- " 1 | \n",
- " 2643.092500 | \n",
- " 718.149398 | \n",
- " 1924.943102 | \n",
- " 8.0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 19484 | \n",
- " 3 | \n",
- " 2 | \n",
- " 62.0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1745.021736 | \n",
- " 1743.045035 | \n",
- " 1.976701 | \n",
- " 0.0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 19485 | \n",
- " 131 | \n",
- " 21 | \n",
- " 1878.0 | \n",
- " 2 | \n",
- " 1 | \n",
- " 2649.044745 | \n",
- " 85.240845 | \n",
- " 2563.803900 | \n",
- " 84.0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 19486 | \n",
- " 10 | \n",
- " 4 | \n",
- " 96.0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1944.077604 | \n",
- " 1742.794225 | \n",
- " 201.283380 | \n",
- " 0.0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 19487 | \n",
- " 2 | \n",
- " 1 | \n",
- " 33.0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1742.877766 | \n",
- " 1742.877766 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
- "0 19482 88 29 872.0 2 \n",
- "1 19484 3 2 62.0 1 \n",
- "2 19485 131 21 1878.0 2 \n",
- "3 19486 10 4 96.0 1 \n",
- "4 19487 2 1 33.0 1 \n",
- "\n",
- " vente_internet_max purchase_date_min purchase_date_max \\\n",
- "0 1 2643.092500 718.149398 \n",
- "1 0 1745.021736 1743.045035 \n",
- "2 1 2649.044745 85.240845 \n",
- "3 0 1944.077604 1742.794225 \n",
- "4 0 1742.877766 1742.877766 \n",
- "\n",
- " time_between_purchase nb_tickets_internet number_compagny \n",
- "0 1924.943102 8.0 10 \n",
- "1 1.976701 0.0 10 \n",
- "2 2563.803900 84.0 10 \n",
- "3 201.283380 0.0 10 \n",
- "4 0.000000 0.0 10 "
- ]
- },
- "execution_count": 229,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_430/3239820253.py:6: DtypeWarning: Columns (39) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " dataset_train = pd.read_csv(file_in, sep=\",\")\n"
+ ]
}
],
"source": [
- "products_purchased_reduced_spectacle.head()"
+ "#base d'entrainement\n",
+ "\n",
+ "#FILE_PATH_S3='projet-bdc2324-team1/Generalization/musique/Train_test/dataset_train14.csv'\n",
+ "\n",
+ "#with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
+ " #dataset_train = pd.read_csv(file_in, sep=\",\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "635d60cd-2dbc-49da-b0f4-94e16667882f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Creation de la variable dependante fictive: 1 si l'individu a effectué un achat au cours de la periode de train et 0 sinon\n",
+ "\n",
+ "#dataset_train_modif=dataset_train\n",
+ "\n",
+ "#dataset_train_modif[\"y_purchase_fictive\"]=np.random.randint(2, size=dataset_train_modif.shape[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "ea63e5d6-70f9-4685-8b08-673a47108954",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#dataset_train_modif[\"y_purchase_fictive\"].value_counts(normalize=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "98f7645f-ffe6-4d7b-8032-15e65f36af87",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "#dataset_train_modif[\"y_purchase_fictive\"]=dataset_train_modif[\"y_purchase_fictive\"].replace([0,1],[\"Purchase_train\",\"no_purchase_train\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "e1a95b8f-6539-48bd-b09d-6f8f63d25fb2",
+ "id": "eb6355e0-3f8c-47d9-a5ee-d349040dcf51",
"metadata": {},
"outputs": [],
"source": [
- "#nombre de ticket par compagnie\n",
+ "#repartion Chiffre d'affaire selon le numero de la compagnie\n",
"\n",
- "company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
- "company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
- "company_campaigns_stats"
+ "sns.boxplot(data=chiffre_affaire_categ, y=\"chiffre_affaire\",x=\"categ\",showfliers=False,showmeans=True)\n",
+ "plt.title(\"Boite à moustache du chiffre d'affaire selon les categories de livre\")"
]
},
{
@@ -4240,7 +4151,15 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "0a180f0a-c6de-4e66-9ae8-fdbfdf8837c9",
+ "id": "93a8ae1f-6fbd-4210-a857-728ae472d1c5",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "904cbf32-77b6-49dd-a96c-9e7e5a0175c3",
"metadata": {},
"outputs": [],
"source": []