Ajout de statistiques sur les tags

This commit is contained in:
Antoine JOUBREL 2024-03-05 10:57:40 +00:00
parent 1667f99a83
commit da1f16d8ec
2 changed files with 226 additions and 6 deletions

View File

@ -91,6 +91,18 @@
" return df" " return df"
] ]
}, },
{
"cell_type": "code",
"execution_count": 56,
"id": "09daec01-9927-45c7-a6d4-9b9d0340ee02",
"metadata": {},
"outputs": [],
"source": [
"companies = {'musee' : ['1', '2', '3', '4', '101'],\n",
" 'sport': ['5', '6', '7', '8', '9'],\n",
" 'musique' : ['10', '11', '12', '13', '14']}"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "ae3c0c33-55a7-4a28-9a62-3ce13496917a", "id": "ae3c0c33-55a7-4a28-9a62-3ce13496917a",
@ -3767,13 +3779,223 @@
"plt.show()" "plt.show()"
] ]
}, },
{
"cell_type": "markdown",
"id": "d679204b-f3e8-4502-8de9-3bf4180da3bd",
"metadata": {},
"source": [
"# 2 - Autres informations sur client "
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 57,
"id": "dc071992-cf4d-4b9f-9c3b-3f0e98e20eff", "id": "1df2a145-f47f-4511-aa76-0df7531dd2ec",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"def tags_information(tenant_id, first_tags = 20):\n",
"\n",
" customersplus = load_dataset_2(tenant_id, \"customersplus\")[['id', 'structure_id']]\n",
" customersplus.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
" tags = load_dataset_2(tenant_id, \"tags\")[['id', 'name']]\n",
" tags.rename(columns = {'id' : 'tag_id', 'name' : 'tag_name'}, inplace = True)\n",
" structure_tag_mappings = load_dataset_2(tenant_id, \"structure_tag_mappings\")[['structure_id', 'tag_id']]\n",
" \n",
" customer_tags = pd.merge(customersplus, structure_tag_mappings, on = 'structure_id', how = 'left')\n",
" customer_tags = pd.merge(customer_tags, tags, on = 'tag_id', how = 'inner')\n",
" \n",
" nb_customers_with_tag = customer_tags['customer_id'].nunique()\n",
" \n",
" # print('Nombre de client avec tag : ', nb_customers_with_tag)\n",
" # print('Proportion de clients avec tags : ', nb_customers_with_tag/len(customersplus))\n",
" # print('Moyenne de tags par client : ', len(customer_tags)/nb_customers_with_tag)\n",
" \n",
" # info = customer_tags.groupby(['tag_id', 'tag_name'])['customer_id'].count().reset_index().sort_values('customer_id', ascending = False).head(first_tags)\n",
"\n",
" tags_informations = pd.DataFrame({'company_number' : tenant_id,\n",
" 'nb_customers_with_tags' : [nb_customers_with_tag],\n",
" 'prop_customers_with_tags' : [nb_customers_with_tag/len(customersplus)],\n",
" 'mean_tags_per_customers' : [len(customer_tags)/nb_customers_with_tag]})\n",
" \n",
" return tags_informations"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "c4ecbb15-0f55-46dc-a3df-6e8c4ae44ebd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de client avec tag : 13320\n",
"Proportion de clients avec tags : 0.0877089012682233\n",
"Moyenne de tags par client : 2.1725975975975977\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_467/1769900082.py:8: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\")\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de client avec tag : 5953\n",
"Proportion de clients avec tags : 0.021598421025897787\n",
"Moyenne de tags par client : 1.0\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_467/1769900082.py:8: DtypeWarning: Columns (19,20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\")\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de client avec tag : 23659\n",
"Proportion de clients avec tags : 0.09207484608139978\n",
"Moyenne de tags par client : 3.0620482691576143\n",
"Nombre de client avec tag : 10495\n",
"Proportion de clients avec tags : 0.03271416949025744\n",
"Moyenne de tags par client : 5.298427822772749\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_467/1769900082.py:8: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\")\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de client avec tag : 532342\n",
"Proportion de clients avec tags : 0.18660686931118298\n",
"Moyenne de tags par client : 24.114082676174338\n"
]
}
],
"source": [
"tags_comparaison = pd.DataFrame()\n",
"\n",
"for tenant_id in companies['musee'] : \n",
" \n",
" tags_comparaison = pd.concat([tags_comparaison, tags_information(tenant_id)])"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "bd2dd513-3375-4073-a12a-fa0e9f20571e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>company_number</th>\n",
" <th>nb_customers_with_tags</th>\n",
" <th>prop_customers_with_tags</th>\n",
" <th>mean_tags_per_customers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>13320</td>\n",
" <td>0.087709</td>\n",
" <td>2.172598</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>5953</td>\n",
" <td>0.021598</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
" <td>23659</td>\n",
" <td>0.092075</td>\n",
" <td>3.062048</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4</td>\n",
" <td>10495</td>\n",
" <td>0.032714</td>\n",
" <td>5.298428</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>101</td>\n",
" <td>532342</td>\n",
" <td>0.186607</td>\n",
" <td>24.114083</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" company_number nb_customers_with_tags prop_customers_with_tags \\\n",
"0 1 13320 0.087709 \n",
"0 2 5953 0.021598 \n",
"0 3 23659 0.092075 \n",
"0 4 10495 0.032714 \n",
"0 101 532342 0.186607 \n",
"\n",
" mean_tags_per_customers \n",
"0 2.172598 \n",
"0 1.000000 \n",
"0 3.062048 \n",
"0 5.298428 \n",
"0 24.114083 "
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tags_comparaison"
]
} }
], ],
"metadata": { "metadata": {

View File

@ -473,9 +473,7 @@
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "605cced5-052f-4a99-ac26-020c5d2ab633", "id": "605cced5-052f-4a99-ac26-020c5d2ab633",
"metadata": { "metadata": {},
"jp-MarkdownHeadingCollapsed": true
},
"source": [ "source": [
"## KPI sur tags" "## KPI sur tags"
] ]