This commit is contained in:
Antoine JOUBREL 2024-01-02 18:06:00 +00:00
parent 91b4195078
commit 363d03c0b8

View File

@ -29,7 +29,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 2,
"id": "5d83bb1a-d341-446e-91f6-1c428607f6d4", "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -51,7 +51,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 3,
"id": "699664b9-eee4-4f8d-a207-e524526560c5", "id": "699664b9-eee4-4f8d-a207-e524526560c5",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -87,7 +87,7 @@
" 'bdc2324-data/2/2tickets.csv']" " 'bdc2324-data/2/2tickets.csv']"
] ]
}, },
"execution_count": 23, "execution_count": 3,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -107,7 +107,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 4,
"id": "d22aa131-5069-43d4-a42e-24f38cc7240d", "id": "d22aa131-5069-43d4-a42e-24f38cc7240d",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -1551,6 +1551,472 @@
"source": [ "source": [
"# But : lier les caractéristiques socio-demo et les comportements d'achat\n" "# But : lier les caractéristiques socio-demo et les comportements d'achat\n"
] ]
},
{
"cell_type": "markdown",
"id": "b88808fe-3b4e-49ed-9885-d52910b6f211",
"metadata": {},
"source": [
"## Types d'évenement et client"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ecb03a47-1418-4fb1-8c78-cd222d38b7fd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'created_at', 'updated_at', 'season_id', 'facility_id', 'name',\n",
" 'event_type_id', 'manual_added', 'is_display', 'event_type_key_id',\n",
" 'facility_key_id', 'identifier'],\n",
" dtype='object')\n",
"(403, 12)\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 403 entries, 0 to 402\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 403 non-null int64 \n",
" 1 created_at 403 non-null object\n",
" 2 updated_at 403 non-null object\n",
" 3 season_id 403 non-null int64 \n",
" 4 facility_id 403 non-null int64 \n",
" 5 name 403 non-null object\n",
" 6 event_type_id 403 non-null int64 \n",
" 7 manual_added 403 non-null bool \n",
" 8 is_display 403 non-null bool \n",
" 9 event_type_key_id 403 non-null int64 \n",
" 10 facility_key_id 403 non-null int64 \n",
" 11 identifier 403 non-null object\n",
"dtypes: bool(2), int64(6), object(4)\n",
"memory usage: 32.4+ KB\n"
]
}
],
"source": [
"# Evenement = events.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11events.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" events = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(events.columns)\n",
"print(events.shape)\n",
"events.info()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "19706610-9e90-4e6f-8bd0-da124b87cff7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>season_id</th>\n",
" <th>facility_id</th>\n",
" <th>name</th>\n",
" <th>event_type_id</th>\n",
" <th>manual_added</th>\n",
" <th>is_display</th>\n",
" <th>event_type_key_id</th>\n",
" <th>facility_key_id</th>\n",
" <th>identifier</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20367</td>\n",
" <td>2023-09-13 03:42:45.214293+02:00</td>\n",
" <td>2023-09-13 03:54:30.086969+02:00</td>\n",
" <td>1865</td>\n",
" <td>1054</td>\n",
" <td>marelle</td>\n",
" <td>1055</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>1055</td>\n",
" <td>1054</td>\n",
" <td>26d1e9a4acad18b9cf79244334c86c93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20371</td>\n",
" <td>2023-09-13 03:42:45.218728+02:00</td>\n",
" <td>2023-09-13 03:54:30.103943+02:00</td>\n",
" <td>1865</td>\n",
" <td>1054</td>\n",
" <td>dialogues</td>\n",
" <td>1055</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>1055</td>\n",
" <td>1054</td>\n",
" <td>60356fc5e8ed6c9c1be9c5ec67e77766</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20570</td>\n",
" <td>2023-10-05 04:48:29.374504+02:00</td>\n",
" <td>2023-10-05 04:48:36.562528+02:00</td>\n",
" <td>1865</td>\n",
" <td>1054</td>\n",
" <td>les grandes epopees</td>\n",
" <td>1055</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>1055</td>\n",
" <td>1054</td>\n",
" <td>f8ab088e06252bf34e1b12ad2ce1a403</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20757</td>\n",
" <td>2023-11-01 03:55:20.846196+01:00</td>\n",
" <td>2023-11-01 03:55:28.412457+01:00</td>\n",
" <td>1865</td>\n",
" <td>1054</td>\n",
" <td>scolaire marelle</td>\n",
" <td>1055</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>1055</td>\n",
" <td>1054</td>\n",
" <td>447fa80f9a793b7587bb85ebbda6442c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20364</td>\n",
" <td>2023-09-13 03:42:45.196791+02:00</td>\n",
" <td>2023-09-13 03:54:30.075456+02:00</td>\n",
" <td>1865</td>\n",
" <td>1054</td>\n",
" <td>le couronnement de poppee</td>\n",
" <td>1055</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>1055</td>\n",
" <td>1054</td>\n",
" <td>3b37f5d2cd354cbc422868621ac7ebc2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>398</th>\n",
" <td>15603</td>\n",
" <td>2023-09-12 17:42:25.327618+02:00</td>\n",
" <td>2023-09-12 19:00:00.893400+02:00</td>\n",
" <td>1706</td>\n",
" <td>1054</td>\n",
" <td>marelle</td>\n",
" <td>1055</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>1055</td>\n",
" <td>1054</td>\n",
" <td>fde88b72fb82b1fe42fbbfbfc3d6b4d3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>399</th>\n",
" <td>15621</td>\n",
" <td>2023-09-12 17:42:25.335792+02:00</td>\n",
" <td>2023-09-12 19:00:00.899622+02:00</td>\n",
" <td>1708</td>\n",
" <td>1054</td>\n",
" <td>cartes d'adhesion</td>\n",
" <td>1055</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>1055</td>\n",
" <td>1054</td>\n",
" <td>051b96aad2b720bad4450a59ed7dfbf6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>400</th>\n",
" <td>15740</td>\n",
" <td>2023-09-12 17:47:05.112101+02:00</td>\n",
" <td>2023-09-12 19:00:00.906123+02:00</td>\n",
" <td>1711</td>\n",
" <td>1054</td>\n",
" <td>repetition le medecin malgre lui</td>\n",
" <td>1055</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>1055</td>\n",
" <td>1054</td>\n",
" <td>addd6885bea5ddf60ec3539dfc3e79e8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>401</th>\n",
" <td>15520</td>\n",
" <td>2023-09-12 17:42:25.290280+02:00</td>\n",
" <td>2023-09-12 19:00:00.835625+02:00</td>\n",
" <td>1708</td>\n",
" <td>1054</td>\n",
" <td>opera au village</td>\n",
" <td>1055</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>1055</td>\n",
" <td>1054</td>\n",
" <td>94f250d10d4a56358ceab23b384439ff</td>\n",
" </tr>\n",
" <tr>\n",
" <th>402</th>\n",
" <td>15439</td>\n",
" <td>2023-09-12 17:42:25.252747+02:00</td>\n",
" <td>2023-09-12 19:00:00.735990+02:00</td>\n",
" <td>1708</td>\n",
" <td>1054</td>\n",
" <td>florilege</td>\n",
" <td>1055</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>1055</td>\n",
" <td>1054</td>\n",
" <td>4f015946bcbd856aa573cadb7ac42b9f</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>403 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" id created_at \\\n",
"0 20367 2023-09-13 03:42:45.214293+02:00 \n",
"1 20371 2023-09-13 03:42:45.218728+02:00 \n",
"2 20570 2023-10-05 04:48:29.374504+02:00 \n",
"3 20757 2023-11-01 03:55:20.846196+01:00 \n",
"4 20364 2023-09-13 03:42:45.196791+02:00 \n",
".. ... ... \n",
"398 15603 2023-09-12 17:42:25.327618+02:00 \n",
"399 15621 2023-09-12 17:42:25.335792+02:00 \n",
"400 15740 2023-09-12 17:47:05.112101+02:00 \n",
"401 15520 2023-09-12 17:42:25.290280+02:00 \n",
"402 15439 2023-09-12 17:42:25.252747+02:00 \n",
"\n",
" updated_at season_id facility_id \\\n",
"0 2023-09-13 03:54:30.086969+02:00 1865 1054 \n",
"1 2023-09-13 03:54:30.103943+02:00 1865 1054 \n",
"2 2023-10-05 04:48:36.562528+02:00 1865 1054 \n",
"3 2023-11-01 03:55:28.412457+01:00 1865 1054 \n",
"4 2023-09-13 03:54:30.075456+02:00 1865 1054 \n",
".. ... ... ... \n",
"398 2023-09-12 19:00:00.893400+02:00 1706 1054 \n",
"399 2023-09-12 19:00:00.899622+02:00 1708 1054 \n",
"400 2023-09-12 19:00:00.906123+02:00 1711 1054 \n",
"401 2023-09-12 19:00:00.835625+02:00 1708 1054 \n",
"402 2023-09-12 19:00:00.735990+02:00 1708 1054 \n",
"\n",
" name event_type_id manual_added \\\n",
"0 marelle 1055 False \n",
"1 dialogues 1055 False \n",
"2 les grandes epopees 1055 False \n",
"3 scolaire marelle 1055 False \n",
"4 le couronnement de poppee 1055 False \n",
".. ... ... ... \n",
"398 marelle 1055 False \n",
"399 cartes d'adhesion 1055 False \n",
"400 repetition le medecin malgre lui 1055 False \n",
"401 opera au village 1055 False \n",
"402 florilege 1055 False \n",
"\n",
" is_display event_type_key_id facility_key_id \\\n",
"0 True 1055 1054 \n",
"1 True 1055 1054 \n",
"2 True 1055 1054 \n",
"3 True 1055 1054 \n",
"4 True 1055 1054 \n",
".. ... ... ... \n",
"398 True 1055 1054 \n",
"399 True 1055 1054 \n",
"400 True 1055 1054 \n",
"401 True 1055 1054 \n",
"402 True 1055 1054 \n",
"\n",
" identifier \n",
"0 26d1e9a4acad18b9cf79244334c86c93 \n",
"1 60356fc5e8ed6c9c1be9c5ec67e77766 \n",
"2 f8ab088e06252bf34e1b12ad2ce1a403 \n",
"3 447fa80f9a793b7587bb85ebbda6442c \n",
"4 3b37f5d2cd354cbc422868621ac7ebc2 \n",
".. ... \n",
"398 fde88b72fb82b1fe42fbbfbfc3d6b4d3 \n",
"399 051b96aad2b720bad4450a59ed7dfbf6 \n",
"400 addd6885bea5ddf60ec3539dfc3e79e8 \n",
"401 94f250d10d4a56358ceab23b384439ff \n",
"402 4f015946bcbd856aa573cadb7ac42b9f \n",
"\n",
"[403 rows x 12 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"events"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a52b0973-be86-4661-86f3-f433d0987f00",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c10297e8-a8f9-45f9-8553-17e3fdb6f8c1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'name', 'created_at', 'updated_at', 'fidelity_delay',\n",
" 'identifier'],\n",
" dtype='object')\n",
"(1, 6)\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 1 entries, 0 to 0\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 1 non-null int64 \n",
" 1 name 0 non-null float64\n",
" 2 created_at 1 non-null object \n",
" 3 updated_at 1 non-null object \n",
" 4 fidelity_delay 1 non-null int64 \n",
" 5 identifier 1 non-null object \n",
"dtypes: float64(1), int64(2), object(3)\n",
"memory usage: 176.0+ bytes\n"
]
}
],
"source": [
"# Type d'évenement = representation_types.csv\n",
"FILE_PATH_S3 = 'bdc2324-data/11/11representation_types.csv'\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" representation_types = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(representation_types.columns)\n",
"print(representation_types.shape)\n",
"representation_types.info()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "41ef6a1b-e99e-4c73-a2ae-ba7d438d90c2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>fidelity_delay</th>\n",
" <th>identifier</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1055</td>\n",
" <td>NaN</td>\n",
" <td>2023-09-12 17:42:25.216901+02:00</td>\n",
" <td>2023-09-12 17:42:25.216901+02:00</td>\n",
" <td>36</td>\n",
" <td>d41d8cd98f00b204e9800998ecf8427e</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id name created_at \\\n",
"0 1055 NaN 2023-09-12 17:42:25.216901+02:00 \n",
"\n",
" updated_at fidelity_delay \\\n",
"0 2023-09-12 17:42:25.216901+02:00 36 \n",
"\n",
" identifier \n",
"0 d41d8cd98f00b204e9800998ecf8427e "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"event_types"
]
} }
], ],
"metadata": { "metadata": {