BDC-team-1/Descriptive_statistics/debug.ipynb

104 lines
3.3 KiB
Plaintext
Raw Normal View History

2024-03-14 22:14:40 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "718d4e6d-b90a-4955-90ee-c1518246c07c",
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"Choisissez le type de compagnie : sport ? musique ? musee ? sport\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_5/customerplus_cleaned.csv\n",
"File path : projet-bdc2324-team1/0_Input/Company_5/campaigns_information.csv\n",
"File path : projet-bdc2324-team1/0_Input/Company_5/products_purchased_reduced.csv\n",
"File path : projet-bdc2324-team1/0_Input/Company_5/target_information.csv\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re\n",
"import warnings\n",
"\n",
"# Ignore warning\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"exec(open('../0_KPI_functions.py').read())\n",
"exec(open('plot.py').read())\n",
"\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"companies = {'musee' : ['1', '2', '3', '4'], # , '101'\n",
" 'sport': ['5', '6'],\n",
" 'musique' : ['10', '11', '12', '13', '14']}\n",
"\n",
"\n",
"type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')\n",
"list_of_comp = companies[type_of_activity] \n",
"\n",
"# Load files\n",
"customer, campaigns_kpi, campaigns_brut, tickets, products = load_files(list_of_comp)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b967f70a-e3ae-423e-9fb0-edfc00ddf826",
"metadata": {},
"outputs": [],
"source": [
"# Identify anonymous customer for each company and remove them from our datasets\n",
"outlier_list = outlier_detection(tickets, list_of_comp)\n",
"\n",
"# Identify valid customer (customer who bought tickets after starting date or received mails after starting date)\n",
"customer_valid_list = valid_customer_detection(products, campaigns_brut)\n",
"\n",
"# Identify customer who bought during the period of y\n",
"consumer_target_period = identify_purchase_during_target_periode(products)\n",
"\n",
"databases = [customer, campaigns_kpi, campaigns_brut, tickets, products]\n",
"\n",
"for dataset in databases:\n",
" dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier\n",
" dataset['customer_id'] = dataset['customer_id'].isin(customer_valid_list) # keep only valid customer\n",
" dataset['has_purchased_target_period'] = np.where(dataset['customer_id'].isin(customer_valid_list), 1, 0)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}