Ajout indicatrice canal vente en ligne

This commit is contained in:
Antoine JOUBREL 2024-02-10 12:23:44 +00:00
parent 5d4dde93f3
commit 89b05d9366

View File

@ -69,54 +69,6 @@
"liste_database = fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "aaf64d60-bf92-470c-8210-d09abd6a653e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['bdc2324-data/1/1campaign_stats.csv',\n",
" 'bdc2324-data/1/1campaigns.csv',\n",
" 'bdc2324-data/1/1categories.csv',\n",
" 'bdc2324-data/1/1countries.csv',\n",
" 'bdc2324-data/1/1currencies.csv',\n",
" 'bdc2324-data/1/1customer_target_mappings.csv',\n",
" 'bdc2324-data/1/1customersplus.csv',\n",
" 'bdc2324-data/1/1event_types.csv',\n",
" 'bdc2324-data/1/1events.csv',\n",
" 'bdc2324-data/1/1facilities.csv',\n",
" 'bdc2324-data/1/1link_stats.csv',\n",
" 'bdc2324-data/1/1pricing_formulas.csv',\n",
" 'bdc2324-data/1/1product_packs.csv',\n",
" 'bdc2324-data/1/1products.csv',\n",
" 'bdc2324-data/1/1products_groups.csv',\n",
" 'bdc2324-data/1/1purchases.csv',\n",
" 'bdc2324-data/1/1representation_category_capacities.csv',\n",
" 'bdc2324-data/1/1representations.csv',\n",
" 'bdc2324-data/1/1seasons.csv',\n",
" 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
" 'bdc2324-data/1/1suppliers.csv',\n",
" 'bdc2324-data/1/1tags.csv',\n",
" 'bdc2324-data/1/1target_types.csv',\n",
" 'bdc2324-data/1/1targets.csv',\n",
" 'bdc2324-data/1/1tickets.csv',\n",
" 'bdc2324-data/1/1type_of_categories.csv',\n",
" 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
" 'bdc2324-data/1/1type_ofs.csv']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"liste_database"
]
},
{
"cell_type": "code",
"execution_count": 5,
@ -143,7 +95,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 6,
"id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
"metadata": {},
"outputs": [
@ -151,7 +103,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_683/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
"/tmp/ipykernel_445/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in)\n"
]
}
@ -175,7 +127,9 @@
{
"cell_type": "markdown",
"id": "f01e4530-1a61-49cb-a6b0-aa188cf1c0e0",
"metadata": {},
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## customersplus.csv"
]
@ -1252,7 +1206,9 @@
{
"cell_type": "markdown",
"id": "78453f3c-4f89-44ed-a6c6-2a7443b72b52",
"metadata": {},
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## suppliers.csv"
]
@ -2197,7 +2153,9 @@
{
"cell_type": "markdown",
"id": "53227600-c1c5-48aa-9f5d-db5a23a8a22a",
"metadata": {},
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Fusion de l'ensemble des données billétiques"
]
@ -2443,7 +2401,9 @@
{
"cell_type": "markdown",
"id": "ad2d0059-76d3-44b9-b0eb-0b0ca4d4ba75",
"metadata": {},
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"# Utilisation de fonctions"
]
@ -2724,7 +2684,9 @@
{
"cell_type": "markdown",
"id": "a2132ee2-3f22-45fd-b65b-72689c8b672c",
"metadata": {},
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Target area"
]
@ -3132,7 +3094,9 @@
{
"cell_type": "markdown",
"id": "2f665824-a026-4acd-8358-b408a61854b4",
"metadata": {},
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Campaign area"
]
@ -3902,16 +3866,14 @@
{
"cell_type": "markdown",
"id": "96ea2523-38dc-47ef-a49e-2c2d9ad0b1c6",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"metadata": {},
"source": [
"## Exploration variables"
]
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 7,
"id": "aaa41688-ea7e-4dba-851c-1f0b0ec43c71",
"metadata": {},
"outputs": [],
@ -3934,7 +3896,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 8,
"id": "2fecc2e1-113f-46ed-9065-0b9ee416166e",
"metadata": {},
"outputs": [],
@ -3944,7 +3906,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 9,
"id": "55f6170a-36fb-4efb-9810-f982883660cf",
"metadata": {},
"outputs": [
@ -3992,7 +3954,7 @@
"0 9 100.0 100.0 100.0"
]
},
"execution_count": 30,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@ -4003,7 +3965,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 10,
"id": "0030fd02-09e3-42f5-9c83-290458a38c29",
"metadata": {},
"outputs": [],
@ -4018,7 +3980,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 11,
"id": "6b1736d1-8fd7-4fcc-9431-b8bf0c7b4f2b",
"metadata": {},
"outputs": [
@ -4042,7 +4004,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 32,
"id": "226b694b-0b00-4167-b69f-3178902254eb",
"metadata": {},
"outputs": [],
@ -4051,19 +4013,103 @@
"def database_loading(database_name = None):\n",
" files_path = database_name\n",
" \n",
" client_number = files_path[0].split(\"/\")[1]\n",
" client_number = files_path.split(\"/\")[1]\n",
" df_prefix = \"df\" + str(client_number) + \"_\"\n",
" \n",
" for i in range(len(files_path)) :\n",
" current_path = files_path[i]\n",
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in)\n",
" # the pattern of the name is df1xxx\n",
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
" globals()[nom_dataframe] = df\n",
" current_path = files_path\n",
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in)\n",
"\n",
" return df, client_number"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f51d8836-6eef-47d5-873d-4327e12a3245",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 45,
"id": "90b94363-a562-4633-ba27-622422e2368c",
"metadata": {},
"outputs": [],
"source": [
"df_all = pd.DataFrame()\n",
"\n",
"for link in liste_suppliers:\n",
" \n",
" df_supplier, tenant_id = database_loading(link)\n",
" \n",
" df_supplier['tenant_id'] = int(tenant_id)\n",
"\n",
" df_all = pd.concat([df_all, df_supplier], axis = 0)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "fedbfbd2-698b-4846-9618-84a3c8d087c7",
"metadata": {},
"outputs": [],
"source": [
"# df_all[df_all['tenant_id'] == 101]['name'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "8d365bb5-2ddc-4f68-b415-e21f960c2c0f",
"metadata": {},
"outputs": [],
"source": [
"liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] \n",
"# vad = vente à distance\n",
"df_all['name'] = df_all['name'].fillna('')\n",
"\n",
"df_all['canal_vente_internet'] = df_all['name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "fc37348d-b282-42ad-b768-c882148d8f66",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tenant_id\n",
"1 1\n",
"2 1\n",
"3 1\n",
"4 1\n",
"5 1\n",
"6 1\n",
"7 1\n",
"8 1\n",
"9 1\n",
"10 1\n",
"11 1\n",
"12 1\n",
"13 1\n",
"14 1\n",
"101 1\n",
"Name: canal_vente_internet, dtype: int64"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_all.groupby('tenant_id')['canal_vente_internet'].max()"
]
}
],
"metadata": {