Exploration suppliers.csv
This commit is contained in:
parent
ce65bf37ff
commit
d508eb0173
|
@ -10,13 +10,16 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 12,
|
||||
"id": "15103481-8d74-404c-aa09-7601fe7730da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n"
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"import s3fs\n",
|
||||
"import re"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -29,16 +32,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 13,
|
||||
"id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import s3fs\n",
|
||||
"# Create filesystem object\n",
|
||||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n"
|
||||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -46,69 +47,500 @@
|
|||
"id": "f99da24f-0d93-4618-92bc-3ba81dc0445c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exemple sur bdc2324-data/11"
|
||||
"# Exemple sur Company 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9d74b68f-ba07-4a15-9a27-dae931762d70",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chargement données"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 14,
|
||||
"id": "699664b9-eee4-4f8d-a207-e524526560c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"BUCKET = \"bdc2324-data/1\"\n",
|
||||
"liste_database = fs.ls(BUCKET)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "0cb92854-903b-4efd-ac1b-197e29f044b4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['bdc2324-data/1/1campaign_stats.csv', 'bdc2324-data/1/1campaigns.csv', 'bdc2324-data/1/1customer_target_mappings.csv', 'bdc2324-data/1/1customersplus.csv', 'bdc2324-data/1/1event_types.csv', 'bdc2324-data/1/1events.csv', 'bdc2324-data/1/1product_packs.csv', 'bdc2324-data/1/1products.csv', 'bdc2324-data/1/1products_groups.csv', 'bdc2324-data/1/1purchases.csv', 'bdc2324-data/1/1suppliers.csv', 'bdc2324-data/1/1target_types.csv', 'bdc2324-data/1/1targets.csv', 'bdc2324-data/1/1tickets.csv']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"liste_database_select = ['suppliers', 'ticket', 'purchase', 'consumption', 'customer', 'event', 'target', 'prod', 'campa']\n",
|
||||
"\n",
|
||||
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
|
||||
"liste_database_filtered = [element for element in liste_database if any(element_part in element for element_part in liste_database_select)]\n",
|
||||
"\n",
|
||||
"# Afficher le résultat\n",
|
||||
"print(liste_database_filtered)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_4561/4135596479.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||||
" df = pd.read_csv(file_in)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# loop to create dataframes from file 2\n",
|
||||
"files_path = liste_database_filtered\n",
|
||||
"\n",
|
||||
"client_number = files_path[0].split(\"/\")[1]\n",
|
||||
"df_prefix = \"df\" + str(client_number) + \"_\"\n",
|
||||
"\n",
|
||||
"for i in range(len(files_path)) :\n",
|
||||
" current_path = files_path[i]\n",
|
||||
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
|
||||
" df = pd.read_csv(file_in)\n",
|
||||
" # the pattern of the name is df1xxx\n",
|
||||
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
|
||||
" globals()[nom_dataframe] = df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "7d1da9df-f423-4a9f-a2a6-6d8ceeab1c34",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"_\n",
|
||||
"__\n",
|
||||
"___\n",
|
||||
"df\n",
|
||||
"df1_purchases\n",
|
||||
"df1_suppliers\n",
|
||||
"df1_tickets\n",
|
||||
"dataframe\n",
|
||||
"_7\n",
|
||||
"_10\n",
|
||||
"_11\n",
|
||||
"_18\n",
|
||||
"_20\n",
|
||||
"df1_customer_target_mappings\n",
|
||||
"df1_customersplus\n",
|
||||
"df1_event_types\n",
|
||||
"df1_events\n",
|
||||
"df1_target_types\n",
|
||||
"df1_targets\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Obtenir toutes les variables globales\n",
|
||||
"variables_globales = globals()\n",
|
||||
"\n",
|
||||
"# Filtrer les variables pour obtenir uniquement les DataFrames\n",
|
||||
"dataframes = {nom: variable for nom, variable in variables_globales.items() if isinstance(variable, pd.DataFrame)}\n",
|
||||
"\n",
|
||||
"# Afficher les noms et les DataFrames\n",
|
||||
"for nom, dataframe in dataframes.items():\n",
|
||||
" print(f\"{nom}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "78453f3c-4f89-44ed-a6c6-2a7443b72b52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## suppliers.csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "2e0dada0-9457-484c-aa55-77e44613ecca",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th>name</th>\n",
|
||||
" <th>manually_added</th>\n",
|
||||
" <th>label</th>\n",
|
||||
" <th>itr</th>\n",
|
||||
" <th>updated_at</th>\n",
|
||||
" <th>created_at</th>\n",
|
||||
" <th>commission</th>\n",
|
||||
" <th>identifier</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1617</td>\n",
|
||||
" <td>j4 administration</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2021-07-29 09:21:37.325772+02:00</td>\n",
|
||||
" <td>2021-07-29 09:21:37.325772+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>5958b2a060ac3e31678b438892a1bd2e</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>8</td>\n",
|
||||
" <td>non défini</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2020-09-03 13:16:35.329062+02:00</td>\n",
|
||||
" <td>2020-09-03 13:16:35.329062+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>52ff3466787b4d538407372e5f7afe0f</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>vad</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2020-09-03 13:11:23.896992+02:00</td>\n",
|
||||
" <td>2020-09-03 13:11:23.896992+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1225483c97b36018cab2bea14ab78ea6</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>fort saint jean</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2020-09-03 13:11:23.833073+02:00</td>\n",
|
||||
" <td>2020-09-03 13:11:23.833073+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>001b9b4a524fe407150b8235b304d4ec</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>j4</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2020-09-03 13:11:23.888993+02:00</td>\n",
|
||||
" <td>2020-09-03 13:11:23.888993+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>6a0cf6edf20060344b465706b61719aa</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>revendeur</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2020-09-03 13:11:23.900987+02:00</td>\n",
|
||||
" <td>2020-09-03 13:11:23.900987+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>931239d4acb6214d7e5c98edecfb4916</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>vente en ligne</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2020-09-03 13:11:23.893097+02:00</td>\n",
|
||||
" <td>2020-09-03 13:11:23.893097+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>bde8f2ccff510df8572d3214d86b837d</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>7</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>ccr</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2020-09-03 13:11:23.904974+02:00</td>\n",
|
||||
" <td>2020-09-03 13:11:23.904974+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>b48ec279411f7dbbb68393c61a9724d9</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8</th>\n",
|
||||
" <td>7</td>\n",
|
||||
" <td>dab</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2020-09-03 13:11:23.908970+02:00</td>\n",
|
||||
" <td>2020-09-03 13:11:23.908970+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>11c6d471fa4e354e62e684d293694202</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"['bdc2324-data/11/11campaign_stats.csv',\n",
|
||||
" 'bdc2324-data/11/11campaigns.csv',\n",
|
||||
" 'bdc2324-data/11/11categories.csv',\n",
|
||||
" 'bdc2324-data/11/11countries.csv',\n",
|
||||
" 'bdc2324-data/11/11currencies.csv',\n",
|
||||
" 'bdc2324-data/11/11customer_target_mappings.csv',\n",
|
||||
" 'bdc2324-data/11/11customersplus.csv',\n",
|
||||
" 'bdc2324-data/11/11event_types.csv',\n",
|
||||
" 'bdc2324-data/11/11events.csv',\n",
|
||||
" 'bdc2324-data/11/11facilities.csv',\n",
|
||||
" 'bdc2324-data/11/11link_stats.csv',\n",
|
||||
" 'bdc2324-data/11/11pricing_formulas.csv',\n",
|
||||
" 'bdc2324-data/11/11product_packs.csv',\n",
|
||||
" 'bdc2324-data/11/11products.csv',\n",
|
||||
" 'bdc2324-data/11/11products_groups.csv',\n",
|
||||
" 'bdc2324-data/11/11purchases.csv',\n",
|
||||
" 'bdc2324-data/11/11representation_category_capacities.csv',\n",
|
||||
" 'bdc2324-data/11/11representations.csv',\n",
|
||||
" 'bdc2324-data/11/11seasons.csv',\n",
|
||||
" 'bdc2324-data/11/11structure_tag_mappings.csv',\n",
|
||||
" 'bdc2324-data/11/11suppliers.csv',\n",
|
||||
" 'bdc2324-data/11/11tags.csv',\n",
|
||||
" 'bdc2324-data/11/11target_types.csv',\n",
|
||||
" 'bdc2324-data/11/11targets.csv',\n",
|
||||
" 'bdc2324-data/11/11tickets.csv']"
|
||||
" id name manually_added label itr \\\n",
|
||||
"0 1617 j4 administration False NaN NaN \n",
|
||||
"1 8 non défini False NaN NaN \n",
|
||||
"2 4 vad False NaN NaN \n",
|
||||
"3 1 fort saint jean False NaN NaN \n",
|
||||
"4 2 j4 False NaN NaN \n",
|
||||
"5 5 revendeur False NaN NaN \n",
|
||||
"6 3 vente en ligne False NaN NaN \n",
|
||||
"7 6 ccr False NaN NaN \n",
|
||||
"8 7 dab False NaN NaN \n",
|
||||
"\n",
|
||||
" updated_at created_at \\\n",
|
||||
"0 2021-07-29 09:21:37.325772+02:00 2021-07-29 09:21:37.325772+02:00 \n",
|
||||
"1 2020-09-03 13:16:35.329062+02:00 2020-09-03 13:16:35.329062+02:00 \n",
|
||||
"2 2020-09-03 13:11:23.896992+02:00 2020-09-03 13:11:23.896992+02:00 \n",
|
||||
"3 2020-09-03 13:11:23.833073+02:00 2020-09-03 13:11:23.833073+02:00 \n",
|
||||
"4 2020-09-03 13:11:23.888993+02:00 2020-09-03 13:11:23.888993+02:00 \n",
|
||||
"5 2020-09-03 13:11:23.900987+02:00 2020-09-03 13:11:23.900987+02:00 \n",
|
||||
"6 2020-09-03 13:11:23.893097+02:00 2020-09-03 13:11:23.893097+02:00 \n",
|
||||
"7 2020-09-03 13:11:23.904974+02:00 2020-09-03 13:11:23.904974+02:00 \n",
|
||||
"8 2020-09-03 13:11:23.908970+02:00 2020-09-03 13:11:23.908970+02:00 \n",
|
||||
"\n",
|
||||
" commission identifier \n",
|
||||
"0 NaN 5958b2a060ac3e31678b438892a1bd2e \n",
|
||||
"1 NaN 52ff3466787b4d538407372e5f7afe0f \n",
|
||||
"2 NaN 1225483c97b36018cab2bea14ab78ea6 \n",
|
||||
"3 NaN 001b9b4a524fe407150b8235b304d4ec \n",
|
||||
"4 NaN 6a0cf6edf20060344b465706b61719aa \n",
|
||||
"5 NaN 931239d4acb6214d7e5c98edecfb4916 \n",
|
||||
"6 NaN bde8f2ccff510df8572d3214d86b837d \n",
|
||||
"7 NaN b48ec279411f7dbbb68393c61a9724d9 \n",
|
||||
"8 NaN 11c6d471fa4e354e62e684d293694202 "
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"BUCKET = \"bdc2324-data/11\"\n",
|
||||
"fs.ls(BUCKET)"
|
||||
"# Restriction aux DataFrame : ticket, purchase, consumption, suppliers\n",
|
||||
"df1_suppliers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "6d6201cd-a00b-4984-bcd8-72838717ad13",
|
||||
"execution_count": 19,
|
||||
"id": "b583be02-ab60-4e14-9325-0204f203a1af",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 9 entries, 0 to 8\n",
|
||||
"Data columns (total 9 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 id 9 non-null int64 \n",
|
||||
" 1 name 9 non-null object \n",
|
||||
" 2 manually_added 9 non-null bool \n",
|
||||
" 3 label 0 non-null float64\n",
|
||||
" 4 itr 0 non-null float64\n",
|
||||
" 5 updated_at 9 non-null object \n",
|
||||
" 6 created_at 9 non-null object \n",
|
||||
" 7 commission 0 non-null float64\n",
|
||||
" 8 identifier 9 non-null object \n",
|
||||
"dtypes: bool(1), float64(3), int64(1), object(4)\n",
|
||||
"memory usage: 713.0+ bytes\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df1_suppliers.info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "6d7f338e-e4d3-422b-9cdc-dec967c0b28e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>0</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>id</th>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>name</th>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>manually_added</th>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>label</th>\n",
|
||||
" <td>100.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>itr</th>\n",
|
||||
" <td>100.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>updated_at</th>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>created_at</th>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>commission</th>\n",
|
||||
" <td>100.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>identifier</th>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" 0\n",
|
||||
"id 0.0\n",
|
||||
"name 0.0\n",
|
||||
"manually_added 0.0\n",
|
||||
"label 100.0\n",
|
||||
"itr 100.0\n",
|
||||
"updated_at 0.0\n",
|
||||
"created_at 0.0\n",
|
||||
"commission 100.0\n",
|
||||
"identifier 0.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pd.DataFrame(df1_suppliers.isna().mean()*100)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "676a9869-9a8b-4cd2-8b1c-0644b5229c72",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## purchases.csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "14f4158e-c9c0-4beb-826a-5e0f949434a4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "79c9eb43-002e-460d-acb2-206ebb2ab6dd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## tickets.csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f3c35394-b586-4ae4-b5ab-b03bb01bb618",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Chargement de toutes les données\n",
|
||||
"liste_base = ['customer_target_mappings', 'customersplus', 'target_types', 'tags', 'events', 'tickets', 'representations', 'purchases', 'products']\n",
|
||||
"\n",
|
||||
"for nom_base in liste_base:\n",
|
||||
" FILE_PATH_S3 = 'bdc2324-data/11/11' + nom_base + '.csv'\n",
|
||||
" with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
|
||||
"df1_purchases\n",
|
||||
"df1_tickets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "355f5489-7904-4161-a85b-6eb70b3a4c89",
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true
|
||||
},
|
||||
"source": [
|
||||
"# Fusion et exploration"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue
Block a user