From da1f16d8ec237f78c04dbdb0d38addc663c3e0ab Mon Sep 17 00:00:00 2001 From: ajoubrel-ensae Date: Tue, 5 Mar 2024 10:57:40 +0000 Subject: [PATCH] Ajout de statistiques sur les tags --- 1_Descriptive_Statistics_Museum.ipynb | 228 +++++++++++++++++++++++++- Exploration_billet_AJ.ipynb | 4 +- 2 files changed, 226 insertions(+), 6 deletions(-) diff --git a/1_Descriptive_Statistics_Museum.ipynb b/1_Descriptive_Statistics_Museum.ipynb index d21b7b4..1023f39 100644 --- a/1_Descriptive_Statistics_Museum.ipynb +++ b/1_Descriptive_Statistics_Museum.ipynb @@ -91,6 +91,18 @@ " return df" ] }, + { + "cell_type": "code", + "execution_count": 56, + "id": "09daec01-9927-45c7-a6d4-9b9d0340ee02", + "metadata": {}, + "outputs": [], + "source": [ + "companies = {'musee' : ['1', '2', '3', '4', '101'],\n", + " 'sport': ['5', '6', '7', '8', '9'],\n", + " 'musique' : ['10', '11', '12', '13', '14']}" + ] + }, { "cell_type": "markdown", "id": "ae3c0c33-55a7-4a28-9a62-3ce13496917a", @@ -3767,13 +3779,223 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "id": "d679204b-f3e8-4502-8de9-3bf4180da3bd", + "metadata": {}, + "source": [ + "# 2 - Autres informations sur client " + ] + }, { "cell_type": "code", - "execution_count": null, - "id": "dc071992-cf4d-4b9f-9c3b-3f0e98e20eff", + "execution_count": 57, + "id": "1df2a145-f47f-4511-aa76-0df7531dd2ec", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def tags_information(tenant_id, first_tags = 20):\n", + "\n", + " customersplus = load_dataset_2(tenant_id, \"customersplus\")[['id', 'structure_id']]\n", + " customersplus.rename(columns = {'id' : 'customer_id'}, inplace = True)\n", + " tags = load_dataset_2(tenant_id, \"tags\")[['id', 'name']]\n", + " tags.rename(columns = {'id' : 'tag_id', 'name' : 'tag_name'}, inplace = True)\n", + " structure_tag_mappings = load_dataset_2(tenant_id, \"structure_tag_mappings\")[['structure_id', 'tag_id']]\n", + " \n", + " customer_tags = pd.merge(customersplus, structure_tag_mappings, on = 'structure_id', how = 'left')\n", + " customer_tags = pd.merge(customer_tags, tags, on = 'tag_id', how = 'inner')\n", + " \n", + " nb_customers_with_tag = customer_tags['customer_id'].nunique()\n", + " \n", + " # print('Nombre de client avec tag : ', nb_customers_with_tag)\n", + " # print('Proportion de clients avec tags : ', nb_customers_with_tag/len(customersplus))\n", + " # print('Moyenne de tags par client : ', len(customer_tags)/nb_customers_with_tag)\n", + " \n", + " # info = customer_tags.groupby(['tag_id', 'tag_name'])['customer_id'].count().reset_index().sort_values('customer_id', ascending = False).head(first_tags)\n", + "\n", + " tags_informations = pd.DataFrame({'company_number' : tenant_id,\n", + " 'nb_customers_with_tags' : [nb_customers_with_tag],\n", + " 'prop_customers_with_tags' : [nb_customers_with_tag/len(customersplus)],\n", + " 'mean_tags_per_customers' : [len(customer_tags)/nb_customers_with_tag]})\n", + " \n", + " return tags_informations" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "c4ecbb15-0f55-46dc-a3df-6e8c4ae44ebd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nombre de client avec tag : 13320\n", + "Proportion de clients avec tags : 0.0877089012682233\n", + "Moyenne de tags par client : 2.1725975975975977\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_467/1769900082.py:8: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(file_in, sep=\",\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nombre de client avec tag : 5953\n", + "Proportion de clients avec tags : 0.021598421025897787\n", + "Moyenne de tags par client : 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_467/1769900082.py:8: DtypeWarning: Columns (19,20) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(file_in, sep=\",\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nombre de client avec tag : 23659\n", + "Proportion de clients avec tags : 0.09207484608139978\n", + "Moyenne de tags par client : 3.0620482691576143\n", + "Nombre de client avec tag : 10495\n", + "Proportion de clients avec tags : 0.03271416949025744\n", + "Moyenne de tags par client : 5.298427822772749\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_467/1769900082.py:8: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(file_in, sep=\",\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nombre de client avec tag : 532342\n", + "Proportion de clients avec tags : 0.18660686931118298\n", + "Moyenne de tags par client : 24.114082676174338\n" + ] + } + ], + "source": [ + "tags_comparaison = pd.DataFrame()\n", + "\n", + "for tenant_id in companies['musee'] : \n", + " \n", + " tags_comparaison = pd.concat([tags_comparaison, tags_information(tenant_id)])" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "bd2dd513-3375-4073-a12a-fa0e9f20571e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
company_numbernb_customers_with_tagsprop_customers_with_tagsmean_tags_per_customers
01133200.0877092.172598
0259530.0215981.000000
03236590.0920753.062048
04104950.0327145.298428
01015323420.18660724.114083
\n", + "
" + ], + "text/plain": [ + " company_number nb_customers_with_tags prop_customers_with_tags \\\n", + "0 1 13320 0.087709 \n", + "0 2 5953 0.021598 \n", + "0 3 23659 0.092075 \n", + "0 4 10495 0.032714 \n", + "0 101 532342 0.186607 \n", + "\n", + " mean_tags_per_customers \n", + "0 2.172598 \n", + "0 1.000000 \n", + "0 3.062048 \n", + "0 5.298428 \n", + "0 24.114083 " + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tags_comparaison" + ] } ], "metadata": { diff --git a/Exploration_billet_AJ.ipynb b/Exploration_billet_AJ.ipynb index a152880..310bca5 100644 --- a/Exploration_billet_AJ.ipynb +++ b/Exploration_billet_AJ.ipynb @@ -473,9 +473,7 @@ { "cell_type": "markdown", "id": "605cced5-052f-4a99-ac26-020c5d2ab633", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ "## KPI sur tags" ]