events_theme #2

Merged
ajoubrel-ensae merged 9 commits from events_theme into main 2024-02-05 19:29:12 +01:00
11 changed files with 9416 additions and 5042 deletions
Showing only changes of commit e6694a278d - Show all commits

188
.gitignore vendored
View File

@ -1 +1,187 @@
.ipynb_checkpoints/Clean-Notebook-checkpoint.ipynb
# Created by https://www.toptal.com/developers/gitignore/api/jupyternotebooks,python
# Edit at https://www.toptal.com/developers/gitignore?templates=jupyternotebooks,python
### JupyterNotebooks ###
# gitignore template for Jupyter Notebooks
# website: http://jupyter.org/
.ipynb_checkpoints
*/.ipynb_checkpoints/*
# IPython
profile_default/
ipython_config.py
# Remove previous ipynb_checkpoints
# git rm -r .ipynb_checkpoints/
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
# IPython
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml
# ruff
.ruff_cache/
# LSP config files
pyrightconfig.json
# End of https://www.toptal.com/developers/gitignore/api/jupyternotebooks,python

View File

@ -1,187 +0,0 @@
# Created by https://www.toptal.com/developers/gitignore/api/jupyternotebooks,python
# Edit at https://www.toptal.com/developers/gitignore?templates=jupyternotebooks,python
### JupyterNotebooks ###
# gitignore template for Jupyter Notebooks
# website: http://jupyter.org/
.ipynb_checkpoints
*/.ipynb_checkpoints/*
# IPython
profile_default/
ipython_config.py
# Remove previous ipynb_checkpoints
# git rm -r .ipynb_checkpoints/
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
# IPython
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml
# ruff
.ruff_cache/
# LSP config files
pyrightconfig.json
# End of https://www.toptal.com/developers/gitignore/api/jupyternotebooks,python

View File

@ -1,76 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3ba1f385-2a2f-4b0c-be79-66f618469a9f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['bdc2324-data/1',\n",
" 'bdc2324-data/10',\n",
" 'bdc2324-data/101',\n",
" 'bdc2324-data/11',\n",
" 'bdc2324-data/12',\n",
" 'bdc2324-data/13',\n",
" 'bdc2324-data/14',\n",
" 'bdc2324-data/2',\n",
" 'bdc2324-data/3',\n",
" 'bdc2324-data/4',\n",
" 'bdc2324-data/5',\n",
" 'bdc2324-data/6',\n",
" 'bdc2324-data/7',\n",
" 'bdc2324-data/8',\n",
" 'bdc2324-data/9']"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"import s3fs\n",
"\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"bdc2324-data\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "023bfa2b-97c2-4d53-80fb-e2290c73b92f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

1465
0_Cleaning_and_merge.ipynb Normal file

File diff suppressed because it is too large Load Diff

695
Brouillon_AJ.ipynb Normal file
View File

@ -0,0 +1,695 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "8c8e008c-9b92-41f1-88c1-8ec462e4ecab",
"metadata": {},
"source": [
"# Business Data Challenge - Team 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88af2795-8bf9-4df0-a059-be7c28fb4289",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "e05cd2c9-3f76-48e3-b4a6-5055445af2e4",
"metadata": {},
"source": [
"Configuration de l'accès aux données"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ba1f385-2a2f-4b0c-be79-66f618469a9f",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import s3fs\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"bdc2324-data\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ba9d04ad-6cc1-4bac-b1a0-44bedfb09763",
"metadata": {},
"outputs": [],
"source": [
"# Chargement des fichiers campaign_stats.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/1/1campaign_stats.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" campaign_stats_1 = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"FILE_PATH_S3 = 'bdc2324-data/2/2campaign_stats.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" campaign_stats_2 = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"FILE_PATH_S3 = 'bdc2324-data/3/3campaign_stats.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" campaign_stats_3 = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cacaecc1-4d8a-4e20-8cd3-b452cf17db56",
"metadata": {},
"outputs": [],
"source": [
"# Conversion des dates 'sent_at'\n",
"campaign_stats_1['sent_at'] = pd.to_datetime(campaign_stats_1['sent_at'], format = 'ISO8601', utc = True)\n",
"campaign_stats_2['sent_at'] = pd.to_datetime(campaign_stats_2['sent_at'], format = 'ISO8601', utc = True)\n",
"campaign_stats_3['sent_at'] = pd.to_datetime(campaign_stats_3['sent_at'], format = 'ISO8601', utc = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ec4b583-dc64-43e9-b3ae-6bbaee0bc135",
"metadata": {},
"outputs": [],
"source": [
"# Chaque unites correspond à une période ? --> Non, les dossiers ont juste pour but de réduire la taille des fichiers\n",
"print(campaign_stats_1['sent_at'].max())\n",
"print(campaign_stats_1['sent_at'].min())\n",
"\n",
"print(campaign_stats_2['sent_at'].max())\n",
"print(campaign_stats_2['sent_at'].min())\n",
"\n",
"print(campaign_stats_3['sent_at'].max())\n",
"print(campaign_stats_3['sent_at'].min())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77894273-b3e5-4f29-bd63-9f4df8082b9b",
"metadata": {},
"outputs": [],
"source": [
"campaign_stats_1['sent_at']"
]
},
{
"cell_type": "markdown",
"id": "31f2edbf-5661-4516-9835-06d4da615c13",
"metadata": {},
"source": [
"### Customersplus.csv"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4223c873-cbd3-46d1-ac96-c9a3b9e97092",
"metadata": {},
"outputs": [],
"source": [
"FILE_PATH_S3 = 'bdc2324-data/1/1customersplus.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" customers_plus_1 = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"FILE_PATH_S3 = 'bdc2324-data/2/2customersplus.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" customers_plus_2 = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "460f853a-68c0-42a7-9877-b83d3aaec813",
"metadata": {},
"outputs": [],
"source": [
"customers_plus_1.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5a9398f-72fc-4548-9f53-b20b372144b2",
"metadata": {},
"outputs": [],
"source": [
"customers_plus_1.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7467ddbe-0bd4-44cc-8a16-84aa41853638",
"metadata": {},
"outputs": [],
"source": [
"customers_plus_1['id'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e15f05f8-3a89-4fc3-84a9-dae70e168440",
"metadata": {},
"outputs": [],
"source": [
"customers_plus_2['id'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b40a653e-013f-48d0-8b57-0284587b36c5",
"metadata": {},
"outputs": [],
"source": [
"common_id = set(customers_plus_2['id']).intersection(customers_plus_1['id'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "32fa2215-3c79-40b5-8643-755865959fc7",
"metadata": {},
"outputs": [],
"source": [
"common_id = set(customers_plus_2['id']).intersection(customers_plus_1['id'])\n",
"# Exemple id commun = caractéristiques communes\n",
"print(customers_plus_2[customers_plus_2['id'] == list(common_id)[0]])\n",
"\n",
"print(customers_plus_1[customers_plus_1['id'] == list(common_id)[0]])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0eb345e4-69f5-4e16-ac57-e33674c6c43d",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"customers_plus_1.isna().mean()*100"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f6ce60d-0912-497d-9108-330acccef394",
"metadata": {},
"outputs": [],
"source": [
"# Chargement de toutes les données\n",
"liste_base = ['customer_target_mappings', 'customersplus', 'target_types', 'tags', 'events', 'tickets', 'representations', 'purchases', 'products']\n",
"\n",
"for nom_base in liste_base:\n",
" FILE_PATH_S3 = 'bdc2324-data/11/11' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa8ee17d-5092-40ac-8a0a-3790b016dd4e",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Jointure\n",
"merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']]\n",
"merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[['id_x', 'customer_id', 'representation_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price']]\n",
"merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[['id_x', 'customer_id', 'event_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time']]\n",
"merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[['id_x', 'customer_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'name']]\n",
"merge_4 = merge_4.rename(columns={'name': 'event_name'})\n",
"df_customer_event = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = 'inner')[['id_x', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'event_name']]\n",
"df_customer_event"
]
},
{
"cell_type": "markdown",
"id": "f1d4aeb8-ec74-4d49-989a-9116e01afe2f",
"metadata": {},
"source": [
"# Fusion et exploration"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22bfad2b-d52a-4077-9b39-bee35004e01c",
"metadata": {},
"outputs": [],
"source": [
"# Jointure\n",
"var_choosed = ['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']\n",
"merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[var_choosed]\n",
"\n",
"var_choosed.extend(['amount', 'is_full_price', 'representation_id'])\n",
"merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[var_choosed]\n",
"\n",
"var_choosed.remove('representation_id')\n",
"var_choosed.extend(['start_date_time', 'event_id'])\n",
"merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[var_choosed]\n",
"\n",
"var_choosed.remove('event_id')\n",
"var_choosed.extend(['name', 'customer_id'])\n",
"merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[var_choosed]\n",
"\n",
"# Changement de nom\n",
"merge_4 = merge_4.rename(columns={'name': 'event_name'})\n",
"var_choosed[var_choosed.index('name')] = \"event_name\"\n",
"\n",
"# Base finale\n",
"var_choosed.extend(['age', 'gender', 'country', 'fidelity', 'profession'])\n",
"df_customer_event = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = 'inner')[var_choosed]\n",
"df_customer_event"
]
},
{
"cell_type": "markdown",
"id": "4cb08d7a-ff04-4951-863d-20aaf33f0b31",
"metadata": {},
"source": [
"## Type de client au globale"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f47ba14a-8601-4b91-9712-223a5ed8a1d1",
"metadata": {},
"outputs": [],
"source": [
"# Client\n",
"print(customer_target_mappings.columns)\n",
"print(customer_target_mappings.shape)\n",
"customer_target_mappings.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f11f829e-66b1-4fd0-a46f-5ae7cb78073f",
"metadata": {},
"outputs": [],
"source": [
"customer_target_mappings['extra_field'].unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c240ab80-c746-4a64-ac6a-be8382c4f0ec",
"metadata": {},
"outputs": [],
"source": [
"customer_target_mappings['name'].unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c03c0597-3f21-4673-8a0f-24d7d9bc5ce4",
"metadata": {},
"outputs": [],
"source": [
"# Segmentation existante\n",
"print(target_types.columns)\n",
"print(target_types.shape)\n",
"target_types.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5adb1773-648d-4683-bc08-d1f2298c1283",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"target_types"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d65f74e-47fc-4296-b493-a1ebefb91cde",
"metadata": {},
"outputs": [],
"source": [
"# Tags = clients\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11tags.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" tags = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(tags.columns)\n",
"print(tags.shape)\n",
"tags.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a689a63-165b-4c4e-bbb0-695b661048d9",
"metadata": {},
"outputs": [],
"source": [
"tags"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "69e38c52-0570-4531-aebb-9deb6db8c40b",
"metadata": {},
"outputs": [],
"source": [
"# Structure = clients\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11structure_tag_mappings.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" structure_tag_mappings = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(structure_tag_mappings.columns)\n",
"print(structure_tag_mappings.shape)\n",
"structure_tag_mappings.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74dc34ad-375b-48df-a900-40d92c5fff13",
"metadata": {},
"outputs": [],
"source": [
"structure_tag_mappings"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a479ceeb-0135-4899-9cbc-90ed7bf941fe",
"metadata": {},
"outputs": [],
"source": [
"# Tags = clients\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11customersplus.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" customersplus = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(customersplus.columns)\n",
"print(customersplus.shape)\n",
"customersplus.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "383e892c-606a-45ce-bdd6-b503b3e0be33",
"metadata": {},
"outputs": [],
"source": [
"customersplus"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70324d06-b855-4386-a7de-eef1eb13dfdf",
"metadata": {},
"outputs": [],
"source": [
"# But : lier les caractéristiques socio-demo et les comportements d'achat\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4bbd743d-51fe-4786-8ad3-5a4a4d09439c",
"metadata": {},
"outputs": [],
"source": [
"# tickets\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11tickets.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" tickets = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(tickets.columns)\n",
"print(tickets.shape)\n",
"tickets.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea83ea5c-3d47-4a66-a523-04b69b149a20",
"metadata": {},
"outputs": [],
"source": [
"tickets"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ba15708e-eb84-4b5d-a86c-05ebed188cf6",
"metadata": {},
"outputs": [],
"source": [
"tickets['type_of'].unique()"
]
},
{
"cell_type": "markdown",
"id": "bc192b08-30a5-486a-8bea-93e765dbfce6",
"metadata": {},
"source": [
"## Types d'évenement et client"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e14dcf62-2def-4ed5-834b-cf21abbc2894",
"metadata": {},
"outputs": [],
"source": [
"# Evenement = events.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11events.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" events = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(events.columns)\n",
"print(events.shape)\n",
"events.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1a1d63c-d7de-4b63-93a8-1c734eb5b316",
"metadata": {},
"outputs": [],
"source": [
"events"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af80eee8-f717-4159-a0fd-09d47ec96621",
"metadata": {},
"outputs": [],
"source": [
"events['name'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6afc6f3d-4292-4a92-a4d6-14f1edc25df2",
"metadata": {},
"outputs": [],
"source": [
"# Représentation des évenements = representations.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11representations.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" representations = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(representations.columns)\n",
"print(representations.shape)\n",
"representations.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1487402a-a49b-4737-b7d7-40c764d2f0b4",
"metadata": {},
"outputs": [],
"source": [
"representations"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99b27418-2c15-4a6e-bcf5-d329ca492085",
"metadata": {},
"outputs": [],
"source": [
"# Produits vendues = products.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11products.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" products = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(products.columns)\n",
"print(products.shape)\n",
"products.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c49bcd47-672f-4e0f-aee9-a7475151b97f",
"metadata": {},
"outputs": [],
"source": [
"products"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4aec5ce-d0c9-4625-bb29-9ac154818621",
"metadata": {},
"outputs": [],
"source": [
"# Lieu = facilities.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11facilities.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" facilities = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(facilities.columns)\n",
"print(facilities.shape)\n",
"facilities.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3642483-2879-442a-ad69-efcd2331a200",
"metadata": {},
"outputs": [],
"source": [
"facilities"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da1e9807-2a8d-4be7-a785-55cffd734f36",
"metadata": {},
"outputs": [],
"source": [
"# Saisons = seasons.csv période sur deux années consécutives\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11seasons.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" seasons = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(seasons.columns)\n",
"print(seasons.shape)\n",
"seasons.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec8a37b5-2d78-4b1c-aa47-bd923fdc2ba9",
"metadata": {},
"outputs": [],
"source": [
"seasons['name'].unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abb3aa20-774b-4761-983a-df5eb2bc51c6",
"metadata": {},
"outputs": [],
"source": [
"# Achats = purchases.csv \n",
"FILE_PATH_S3 = 'bdc2324-data/11/11purchases.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" purchases = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(purchases.columns)\n",
"print(purchases.shape)\n",
"purchases.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "30e204ab-4f63-430c-a818-5c8035b6e17b",
"metadata": {},
"outputs": [],
"source": [
"purchases"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

3406
Exploration_billet_AJ.ipynb Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,823 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "8c8e008c-9b92-41f1-88c1-8ec462e4ecab",
"metadata": {},
"source": [
"# Business Data Challenge - Team 1"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "88af2795-8bf9-4df0-a059-be7c28fb4289",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "e05cd2c9-3f76-48e3-b4a6-5055445af2e4",
"metadata": {},
"source": [
"Configuration de l'accès aux données"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3ba1f385-2a2f-4b0c-be79-66f618469a9f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['bdc2324-data/1',\n",
" 'bdc2324-data/10',\n",
" 'bdc2324-data/101',\n",
" 'bdc2324-data/11',\n",
" 'bdc2324-data/12',\n",
" 'bdc2324-data/13',\n",
" 'bdc2324-data/14',\n",
" 'bdc2324-data/2',\n",
" 'bdc2324-data/3',\n",
" 'bdc2324-data/4',\n",
" 'bdc2324-data/5',\n",
" 'bdc2324-data/6',\n",
" 'bdc2324-data/7',\n",
" 'bdc2324-data/8',\n",
" 'bdc2324-data/9']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"import s3fs\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"bdc2324-data\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ba9d04ad-6cc1-4bac-b1a0-44bedfb09763",
"metadata": {},
"outputs": [],
"source": [
"# Chargement des fichiers campaign_stats.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/1/1campaign_stats.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" campaign_stats_1 = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"FILE_PATH_S3 = 'bdc2324-data/2/2campaign_stats.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" campaign_stats_2 = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"FILE_PATH_S3 = 'bdc2324-data/3/3campaign_stats.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" campaign_stats_3 = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "cacaecc1-4d8a-4e20-8cd3-b452cf17db56",
"metadata": {},
"outputs": [],
"source": [
"# Conversion des dates 'sent_at'\n",
"campaign_stats_1['sent_at'] = pd.to_datetime(campaign_stats_1['sent_at'], format = 'ISO8601', utc = True)\n",
"campaign_stats_2['sent_at'] = pd.to_datetime(campaign_stats_2['sent_at'], format = 'ISO8601', utc = True)\n",
"campaign_stats_3['sent_at'] = pd.to_datetime(campaign_stats_3['sent_at'], format = 'ISO8601', utc = True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "2ec4b583-dc64-43e9-b3ae-6bbaee0bc135",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2023-11-09 18:10:45+00:00\n",
"2020-06-02 08:24:08+00:00\n",
"2023-10-12 01:39:48+00:00\n",
"2023-10-10 17:06:29+00:00\n",
"2023-11-01 09:20:48+00:00\n",
"2021-03-31 14:59:02+00:00\n"
]
}
],
"source": [
"# Chaque unites correspond à une période ? --> Non, les dossiers ont juste pour but de réduire la taille des fichiers\n",
"print(campaign_stats_1['sent_at'].max())\n",
"print(campaign_stats_1['sent_at'].min())\n",
"\n",
"print(campaign_stats_2['sent_at'].max())\n",
"print(campaign_stats_2['sent_at'].min())\n",
"\n",
"print(campaign_stats_3['sent_at'].max())\n",
"print(campaign_stats_3['sent_at'].min())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "77894273-b3e5-4f29-bd63-9f4df8082b9b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 2021-03-28 16:01:09+00:00\n",
"1 2021-03-28 16:01:09+00:00\n",
"2 2021-03-28 16:00:59+00:00\n",
"3 2021-03-28 16:00:59+00:00\n",
"4 2021-03-28 16:01:06+00:00\n",
" ... \n",
"6214803 2023-10-23 09:32:33+00:00\n",
"6214804 2023-10-23 09:32:49+00:00\n",
"6214805 2023-10-23 09:33:28+00:00\n",
"6214806 2023-10-23 09:31:53+00:00\n",
"6214807 2023-10-23 09:33:54+00:00\n",
"Name: sent_at, Length: 6214808, dtype: datetime64[ns, UTC]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"campaign_stats_1['sent_at']"
]
},
{
"cell_type": "markdown",
"id": "31f2edbf-5661-4516-9835-06d4da615c13",
"metadata": {},
"source": [
"### Customersplus.csv"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4223c873-cbd3-46d1-ac96-c9a3b9e97092",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1362/4118060109.py:9: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" customers_plus_2 = pd.read_csv(file_in, sep=\",\")\n"
]
}
],
"source": [
"FILE_PATH_S3 = 'bdc2324-data/1/1customersplus.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" customers_plus_1 = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"FILE_PATH_S3 = 'bdc2324-data/2/2customersplus.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" customers_plus_2 = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "460f853a-68c0-42a7-9877-b83d3aaec813",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['id', 'lastname', 'firstname', 'birthdate', 'email', 'street_id',\n",
" 'created_at', 'updated_at', 'civility', 'is_partner', 'extra',\n",
" 'deleted_at', 'reference', 'gender', 'is_email_true', 'extra_field',\n",
" 'identifier', 'opt_in', 'structure_id', 'note', 'profession',\n",
" 'language', 'mcp_contact_id', 'need_reload', 'last_buying_date',\n",
" 'max_price', 'ticket_sum', 'average_price', 'fidelity',\n",
" 'average_purchase_delay', 'average_price_basket',\n",
" 'average_ticket_basket', 'total_price', 'preferred_category',\n",
" 'preferred_supplier', 'preferred_formula', 'purchase_count',\n",
" 'first_buying_date', 'last_visiting_date', 'zipcode', 'country', 'age',\n",
" 'tenant_id'],\n",
" dtype='object')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customers_plus_1.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5a9398f-72fc-4548-9f53-b20b372144b2",
"metadata": {},
"outputs": [],
"source": [
"customers_plus_1.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7467ddbe-0bd4-44cc-8a16-84aa41853638",
"metadata": {},
"outputs": [],
"source": [
"customers_plus_1['id'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e15f05f8-3a89-4fc3-84a9-dae70e168440",
"metadata": {},
"outputs": [],
"source": [
"customers_plus_2['id'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b40a653e-013f-48d0-8b57-0284587b36c5",
"metadata": {},
"outputs": [],
"source": [
"common_id = set(customers_plus_2['id']).intersection(customers_plus_1['id'])"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "32fa2215-3c79-40b5-8643-755865959fc7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_id = set(customers_plus_2['id']).intersection(customers_plus_1['id'])\n",
"# Exemple id commun = caractéristiques communes\n",
"print(customers_plus_2[customers_plus_2['id'] == list(common_id)[0]])\n",
"\n",
"print(customers_plus_1[customers_plus_1['id'] == list(common_id)[0]])"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "0eb345e4-69f5-4e16-ac57-e33674c6c43d",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id 0.000000\n",
"lastname 43.461341\n",
"firstname 44.995588\n",
"birthdate 96.419870\n",
"email 8.622075\n",
"street_id 0.000000\n",
"created_at 0.000000\n",
"updated_at 0.000000\n",
"civility 100.000000\n",
"is_partner 0.000000\n",
"extra 100.000000\n",
"deleted_at 100.000000\n",
"reference 100.000000\n",
"gender 0.000000\n",
"is_email_true 0.000000\n",
"extra_field 100.000000\n",
"identifier 0.000000\n",
"opt_in 0.000000\n",
"structure_id 88.072380\n",
"note 99.403421\n",
"profession 95.913503\n",
"language 99.280945\n",
"mcp_contact_id 34.876141\n",
"need_reload 0.000000\n",
"last_buying_date 51.653431\n",
"max_price 51.653431\n",
"ticket_sum 0.000000\n",
"average_price 8.639195\n",
"fidelity 0.000000\n",
"average_purchase_delay 51.653431\n",
"average_price_basket 51.653431\n",
"average_ticket_basket 51.653431\n",
"total_price 43.014236\n",
"preferred_category 100.000000\n",
"preferred_supplier 100.000000\n",
"preferred_formula 100.000000\n",
"purchase_count 0.000000\n",
"first_buying_date 51.653431\n",
"last_visiting_date 100.000000\n",
"zipcode 71.176564\n",
"country 5.459418\n",
"age 96.419870\n",
"tenant_id 0.000000\n",
"dtype: float64\n"
]
}
],
"source": [
"pd.DataFrame(customers_plus_1.isna().mean()*100)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "6f6ce60d-0912-497d-9108-330acccef394",
"metadata": {},
"outputs": [],
"source": [
"# Chargement de toutes les données\n",
"liste_base = ['customer_target_mappings', 'customersplus', 'target_types', 'tags', 'events', 'tickets', 'representations', 'purchases', 'products']\n",
"\n",
"for nom_base in liste_base:\n",
" FILE_PATH_S3 = 'bdc2324-data/11/11' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "fa8ee17d-5092-40ac-8a0a-3790b016dd4e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>...</th>\n",
" <th>tenant_id</th>\n",
" <th>id_x</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_date</th>\n",
" <th>type_of</th>\n",
" <th>is_from_subscription</th>\n",
" <th>amount</th>\n",
" <th>is_full_price</th>\n",
" <th>start_date_time</th>\n",
" <th>event_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>405082</td>\n",
" <td>lastname405082</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6</td>\n",
" <td>2023-01-12 06:30:31.197484+01:00</td>\n",
" <td>2023-01-12 06:30:31.197484+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1556</td>\n",
" <td>992423</td>\n",
" <td>405082</td>\n",
" <td>2023-01-11 17:08:41+01:00</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" <td>13.0</td>\n",
" <td>False</td>\n",
" <td>2023-02-06 20:00:00+01:00</td>\n",
" <td>zaide</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>405082</td>\n",
" <td>lastname405082</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6</td>\n",
" <td>2023-01-12 06:30:31.197484+01:00</td>\n",
" <td>2023-01-12 06:30:31.197484+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1556</td>\n",
" <td>992423</td>\n",
" <td>405082</td>\n",
" <td>2023-01-11 17:08:41+01:00</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" <td>13.0</td>\n",
" <td>False</td>\n",
" <td>2023-02-06 20:00:00+01:00</td>\n",
" <td>zaide</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>411168</td>\n",
" <td>lastname411168</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6</td>\n",
" <td>2023-03-17 06:30:35.431967+01:00</td>\n",
" <td>2023-03-17 06:30:35.431967+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1556</td>\n",
" <td>1053934</td>\n",
" <td>411168</td>\n",
" <td>2023-03-16 16:23:10+01:00</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" <td>62.0</td>\n",
" <td>False</td>\n",
" <td>2023-03-19 16:00:00+01:00</td>\n",
" <td>luisa miller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>411168</td>\n",
" <td>lastname411168</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6</td>\n",
" <td>2023-03-17 06:30:35.431967+01:00</td>\n",
" <td>2023-03-17 06:30:35.431967+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1556</td>\n",
" <td>1053934</td>\n",
" <td>411168</td>\n",
" <td>2023-03-16 16:23:10+01:00</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" <td>62.0</td>\n",
" <td>False</td>\n",
" <td>2023-03-19 16:00:00+01:00</td>\n",
" <td>luisa miller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4380</td>\n",
" <td>lastname4380</td>\n",
" <td>firstname4380</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2021-04-22 14:51:55.432952+02:00</td>\n",
" <td>2022-04-14 11:41:33.738500+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1556</td>\n",
" <td>1189141</td>\n",
" <td>4380</td>\n",
" <td>2020-11-26 13:12:53+01:00</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" <td>51.3</td>\n",
" <td>False</td>\n",
" <td>2020-12-01 20:00:00+01:00</td>\n",
" <td>iphigenie en tauride</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>318964</th>\n",
" <td>19095</td>\n",
" <td>lastname19095</td>\n",
" <td>firstname19095</td>\n",
" <td>1979-07-16</td>\n",
" <td>email19095</td>\n",
" <td>6</td>\n",
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1556</td>\n",
" <td>1090839</td>\n",
" <td>19095</td>\n",
" <td>2019-05-19 21:18:36+02:00</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" <td>4.5</td>\n",
" <td>False</td>\n",
" <td>2019-05-27 20:00:00+02:00</td>\n",
" <td>entre femmes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>318965</th>\n",
" <td>19095</td>\n",
" <td>lastname19095</td>\n",
" <td>firstname19095</td>\n",
" <td>1979-07-16</td>\n",
" <td>email19095</td>\n",
" <td>6</td>\n",
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1556</td>\n",
" <td>1090839</td>\n",
" <td>19095</td>\n",
" <td>2019-05-19 21:18:36+02:00</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" <td>4.5</td>\n",
" <td>False</td>\n",
" <td>2019-05-27 20:00:00+02:00</td>\n",
" <td>entre femmes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>318966</th>\n",
" <td>19095</td>\n",
" <td>lastname19095</td>\n",
" <td>firstname19095</td>\n",
" <td>1979-07-16</td>\n",
" <td>email19095</td>\n",
" <td>6</td>\n",
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1556</td>\n",
" <td>1090839</td>\n",
" <td>19095</td>\n",
" <td>2019-05-19 21:18:36+02:00</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" <td>4.5</td>\n",
" <td>False</td>\n",
" <td>2019-05-27 20:00:00+02:00</td>\n",
" <td>entre femmes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>318967</th>\n",
" <td>19095</td>\n",
" <td>lastname19095</td>\n",
" <td>firstname19095</td>\n",
" <td>1979-07-16</td>\n",
" <td>email19095</td>\n",
" <td>6</td>\n",
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1556</td>\n",
" <td>1244277</td>\n",
" <td>19095</td>\n",
" <td>2019-12-31 11:04:07+01:00</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" <td>5.5</td>\n",
" <td>False</td>\n",
" <td>2020-02-03 20:00:00+01:00</td>\n",
" <td>a boire et a manger</td>\n",
" </tr>\n",
" <tr>\n",
" <th>318968</th>\n",
" <td>19095</td>\n",
" <td>lastname19095</td>\n",
" <td>firstname19095</td>\n",
" <td>1979-07-16</td>\n",
" <td>email19095</td>\n",
" <td>6</td>\n",
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1556</td>\n",
" <td>1244277</td>\n",
" <td>19095</td>\n",
" <td>2019-12-31 11:04:07+01:00</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" <td>5.5</td>\n",
" <td>False</td>\n",
" <td>2020-02-03 20:00:00+01:00</td>\n",
" <td>a boire et a manger</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>318969 rows × 52 columns</p>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email \\\n",
"0 405082 lastname405082 NaN NaN NaN \n",
"1 405082 lastname405082 NaN NaN NaN \n",
"2 411168 lastname411168 NaN NaN NaN \n",
"3 411168 lastname411168 NaN NaN NaN \n",
"4 4380 lastname4380 firstname4380 NaN NaN \n",
"... ... ... ... ... ... \n",
"318964 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
"318965 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
"318966 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
"318967 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
"318968 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
"\n",
" street_id created_at \\\n",
"0 6 2023-01-12 06:30:31.197484+01:00 \n",
"1 6 2023-01-12 06:30:31.197484+01:00 \n",
"2 6 2023-03-17 06:30:35.431967+01:00 \n",
"3 6 2023-03-17 06:30:35.431967+01:00 \n",
"4 1 2021-04-22 14:51:55.432952+02:00 \n",
"... ... ... \n",
"318964 6 2021-04-22 15:06:30.120537+02:00 \n",
"318965 6 2021-04-22 15:06:30.120537+02:00 \n",
"318966 6 2021-04-22 15:06:30.120537+02:00 \n",
"318967 6 2021-04-22 15:06:30.120537+02:00 \n",
"318968 6 2021-04-22 15:06:30.120537+02:00 \n",
"\n",
" updated_at civility is_partner ... \\\n",
"0 2023-01-12 06:30:31.197484+01:00 NaN False ... \n",
"1 2023-01-12 06:30:31.197484+01:00 NaN False ... \n",
"2 2023-03-17 06:30:35.431967+01:00 NaN False ... \n",
"3 2023-03-17 06:30:35.431967+01:00 NaN False ... \n",
"4 2022-04-14 11:41:33.738500+02:00 NaN False ... \n",
"... ... ... ... ... \n",
"318964 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
"318965 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
"318966 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
"318967 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
"318968 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
"\n",
" tenant_id id_x customer_id purchase_date type_of \\\n",
"0 1556 992423 405082 2023-01-11 17:08:41+01:00 3 \n",
"1 1556 992423 405082 2023-01-11 17:08:41+01:00 3 \n",
"2 1556 1053934 411168 2023-03-16 16:23:10+01:00 3 \n",
"3 1556 1053934 411168 2023-03-16 16:23:10+01:00 3 \n",
"4 1556 1189141 4380 2020-11-26 13:12:53+01:00 3 \n",
"... ... ... ... ... ... \n",
"318964 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
"318965 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
"318966 1556 1090839 19095 2019-05-19 21:18:36+02:00 1 \n",
"318967 1556 1244277 19095 2019-12-31 11:04:07+01:00 1 \n",
"318968 1556 1244277 19095 2019-12-31 11:04:07+01:00 1 \n",
"\n",
" is_from_subscription amount is_full_price start_date_time \\\n",
"0 False 13.0 False 2023-02-06 20:00:00+01:00 \n",
"1 False 13.0 False 2023-02-06 20:00:00+01:00 \n",
"2 False 62.0 False 2023-03-19 16:00:00+01:00 \n",
"3 False 62.0 False 2023-03-19 16:00:00+01:00 \n",
"4 False 51.3 False 2020-12-01 20:00:00+01:00 \n",
"... ... ... ... ... \n",
"318964 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
"318965 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
"318966 False 4.5 False 2019-05-27 20:00:00+02:00 \n",
"318967 False 5.5 False 2020-02-03 20:00:00+01:00 \n",
"318968 False 5.5 False 2020-02-03 20:00:00+01:00 \n",
"\n",
" event_name \n",
"0 zaide \n",
"1 zaide \n",
"2 luisa miller \n",
"3 luisa miller \n",
"4 iphigenie en tauride \n",
"... ... \n",
"318964 entre femmes \n",
"318965 entre femmes \n",
"318966 entre femmes \n",
"318967 a boire et a manger \n",
"318968 a boire et a manger \n",
"\n",
"[318969 rows x 52 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Jointure\n",
"merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']]\n",
"merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[['id_x', 'customer_id', 'representation_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price']]\n",
"merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[['id_x', 'customer_id', 'event_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time']]\n",
"merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[['id_x', 'customer_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'name']]\n",
"merge_4 = merge_4.rename(columns={'name': 'event_name'})\n",
"df_customer_event = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = 'inner')[['id_x', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'event_name']]\n",
"df_customer_event"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -6103,6 +6103,403 @@
"representation_theme.head()"
]
},
{
"cell_type": "markdown",
"id": "e274e3cc-1b41-43e0-8412-1563166060cb",
"metadata": {},
"source": [
"## Price Table"
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "c52621e7-01de-48dc-b572-2974542a8be5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : bdc2324-data/1/1product_packs.csv\n",
"Shape : (1, 6)\n",
"Number of columns : 4\n",
"Columns : Index(['id', 'identifier', 'name', 'type_of'], dtype='object')\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>type_of</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id name type_of\n",
"0 1 NaN 0"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"product_packs = load_dataset(\"1product_packs.csv\")\n",
"product_packs.head()"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "9e4f60ab-9a2c-4090-b0c4-f9a1530b2d39",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : bdc2324-data/1/1pricing_formulas.csv\n",
"Shape : (556, 6)\n",
"Number of columns : 4\n",
"Columns : Index(['id', 'identifier', 'name', 'extra_field'], dtype='object')\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>extra_field</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>41909</td>\n",
" <td>visite mécènes 1h30</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>502</td>\n",
" <td>entree mucem tp( expo picasso)</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>504</td>\n",
" <td>nombre de personnes cinema</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>117</td>\n",
" <td>spectacle tarif e famille tr</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1496</td>\n",
" <td>billet nb famille mecene 1a</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id name extra_field\n",
"0 41909 visite mécènes 1h30 NaN\n",
"1 502 entree mucem tp( expo picasso) NaN\n",
"2 504 nombre de personnes cinema NaN\n",
"3 117 spectacle tarif e famille tr NaN\n",
"4 1496 billet nb famille mecene 1a NaN"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pricing_formula = load_dataset(\"1pricing_formulas.csv\")\n",
"pricing_formula.head()"
]
},
{
"cell_type": "code",
"execution_count": 115,
"id": "247b5c45-a18a-4cfd-86b4-d3453e157bcd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : bdc2324-data/1/1type_of_pricing_formulas.csv\n",
"Shape : (568, 6)\n",
"Number of columns : 4\n",
"Columns : Index(['id', 'type_of_id', 'pricing_formula_id', 'identifier'], dtype='object')\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type_of_id</th>\n",
" <th>pricing_formula_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>127</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2937</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id type_of_id pricing_formula_id\n",
"0 1 1 127\n",
"1 2 1 2425\n",
"2 3 1 2937\n",
"3 4 1 48\n",
"4 5 1 7"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type_pricing_formula = load_dataset(\"1type_of_pricing_formulas.csv\")\n",
"type_pricing_formula.head()"
]
},
{
"cell_type": "code",
"execution_count": 117,
"id": "4b48f7b3-0f06-4ef6-9355-5016af82f49c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : bdc2324-data/1/1products_groups.csv\n",
"Shape : (92973, 9)\n",
"Number of columns : 7\n",
"Columns : Index(['id', 'category_id', 'pricing_formula_id', 'representation_id',\n",
" 'percent_price', 'max_price', 'min_price'],\n",
" dtype='object')\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>category_id</th>\n",
" <th>pricing_formula_id</th>\n",
" <th>representation_id</th>\n",
" <th>percent_price</th>\n",
" <th>max_price</th>\n",
" <th>min_price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2735</td>\n",
" <td>8</td>\n",
" <td>97</td>\n",
" <td>1534</td>\n",
" <td>100.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>156773</td>\n",
" <td>5</td>\n",
" <td>9</td>\n",
" <td>82519</td>\n",
" <td>100.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>14387</td>\n",
" <td>16</td>\n",
" <td>79</td>\n",
" <td>8046</td>\n",
" <td>100.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2770</td>\n",
" <td>2</td>\n",
" <td>37</td>\n",
" <td>1563</td>\n",
" <td>100.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>27179</td>\n",
" <td>13</td>\n",
" <td>119</td>\n",
" <td>14192</td>\n",
" <td>100.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id category_id pricing_formula_id representation_id percent_price \\\n",
"0 2735 8 97 1534 100.0 \n",
"1 156773 5 9 82519 100.0 \n",
"2 14387 16 79 8046 100.0 \n",
"3 2770 2 37 1563 100.0 \n",
"4 27179 13 119 14192 100.0 \n",
"\n",
" max_price min_price \n",
"0 0.0 0.0 \n",
"1 0.0 0.0 \n",
"2 0.0 0.0 \n",
"3 0.0 0.0 \n",
"4 0.0 0.0 "
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"product_groups = load_dataset(\"1products_groups.csv\")\n",
"product_groups.head()"
]
},
{
"cell_type": "markdown",
"id": "71c26a38-6818-42df-8aee-0135681a5563",
@ -6741,6 +7138,9 @@
"outputs": [],
"source": [
"def uniform_product_df():\n",
" \"\"\"\n",
" This function returns the uniform product dataset\n",
" \"\"\"\n",
" print(\"Products theme columns : \", products_theme.columns)\n",
" print(\"\\n Representation theme columns : \", representation_theme.columns)\n",
" print(\"\\n Events theme columns : \", events_theme.columns)\n",

1760
TP_merge_tables_clean.ipynb Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff