diff --git a/Spectacle/Stat_desc.ipynb b/Spectacle/Stat_desc.ipynb index 1ed0aba..7882b36 100644 --- a/Spectacle/Stat_desc.ipynb +++ b/Spectacle/Stat_desc.ipynb @@ -80,6 +80,8 @@ "metadata": {}, "outputs": [], "source": [ + "# test avec company 10\n", + "\n", "dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n", "for nom_base in dic_base:\n", " FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n", @@ -964,7 +966,9 @@ " customerplus_clean_spectacle=df_customerplus_clean\n", " campaigns_information_spectacle=df_campaigns_kpi\n", " products_purchased_reduced_spectacle=df_tickets_kpi\n", - " target_information_spectacle=df_target_information" + " target_information_spectacle=df_target_information\n", + "\n", + " print(f\"Tables imported for tenant {directory_path}\")" ] }, { @@ -1295,9 +1299,160 @@ } ], "source": [ - "outlier_detection(directory_path=\"10\")" + "# outlier à enlever (dépend des stats desc !)\n", + "outlier_detection(directory_path=\"10\") # mettre 2 si on veut le 1er client non anonyme" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f08c082e-f76f-41f3-9530-3e6700eb74d9", + "metadata": {}, + "outputs": [], + "source": [ + "# boucle pour identifier les outliers de chaque compagnie (et le client principal non anonyme)\n", + "\n", + "# nb_compagnie=['10','11','12','13','14']\n", + "for company_number in nb_compagnie :\n", + " print(f\"outlier for tenant {company_number}\")\n", + " outlier_detection(directory_path=company_number, coupure = 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbe1af6a-79e9-45c7-a810-c6df3bf647f7", + "metadata": {}, + "outputs": [], + "source": [ + "# print(products_purchased_reduced_spectacle.loc[products_purchased_reduced_spectacle[\"number_compagny\"]==10][\"total_amount\"].describe())\n", + "\n", + "products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==10) & \n", + "(products_purchased_reduced_spectacle[\"customer_id\"]==19521)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20e2b8a2-f31c-42a4-8ea5-7ad67ab66915", + "metadata": {}, + "outputs": [], + "source": [ + "# company 11 \n", + "# etrange, pas de vente sur internet, et un seul supplier. Plus de 9k achats\n", + "products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==11) & \n", + "(products_purchased_reduced_spectacle[\"customer_id\"]==36)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5dbce57c-d091-4ce2-92f9-1201deb2462e", + "metadata": {}, + "outputs": [], + "source": [ + "# company 12\n", + "products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==12) & \n", + "(products_purchased_reduced_spectacle[\"customer_id\"]==1706757)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a243b57-19da-4e29-a53d-bb8d03e2ab77", + "metadata": {}, + "outputs": [], + "source": [ + "# company 13\n", + "products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==13) & \n", + "(products_purchased_reduced_spectacle[\"customer_id\"]==8422)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d9b01bc-9584-4882-bd06-7de8acb8a88f", + "metadata": {}, + "outputs": [], + "source": [ + "# company 14\n", + "# a-t-on vrmt un outlier ? A acheté quasi 3k tickets, pr 96 achats\n", + "products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==14) & \n", + "(products_purchased_reduced_spectacle[\"customer_id\"]==6354)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "033c1e00-52bd-4651-b893-57bda531760e", + "metadata": {}, + "outputs": [], + "source": [ + "# verifs dans les tables customerplus (outlier incertain pr 11 et 14)\n", + "\n", + "customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==36) &\n", + "(customerplus_clean_spectacle[\"number_compagny\"]==11)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28ac8cda-32fa-4fb7-a75b-e1cc24871c39", + "metadata": {}, + "outputs": [], + "source": [ + "customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==6354) &\n", + "(customerplus_clean_spectacle[\"number_compagny\"]==14)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3faea297-2cc5-4704-af85-77d95f600cc1", + "metadata": {}, + "outputs": [], + "source": [ + "customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==8422) &\n", + "(customerplus_clean_spectacle[\"number_compagny\"]==13)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b165ea79-347b-46fb-8217-635d9e888c65", + "metadata": {}, + "outputs": [], + "source": [ + "customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==19521) &\n", + "(customerplus_clean_spectacle[\"number_compagny\"]==10)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "282b0a96-5e78-48aa-9c2c-7d00d3907add", + "metadata": {}, + "outputs": [], + "source": [ + "customerplus_clean_spectacle.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4918db6e-249b-412e-b646-9a6686989b79", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e866edce-f4bc-4627-89d3-3ec7d9ef26e3", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "id": "42f8171c-e80d-4faa-b278-21fcbe3b242c",