diff --git a/Clean-Notebook.ipynb b/Clean-Notebook.ipynb index ad9d465..ef5984f 100644 --- a/Clean-Notebook.ipynb +++ b/Clean-Notebook.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4", "metadata": {}, "outputs": [], @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 3, "id": "699664b9-eee4-4f8d-a207-e524526560c5", "metadata": {}, "outputs": [ @@ -87,7 +87,7 @@ " 'bdc2324-data/2/2tickets.csv']" ] }, - "execution_count": 23, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -107,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "id": "d22aa131-5069-43d4-a42e-24f38cc7240d", "metadata": {}, "outputs": [ @@ -1551,6 +1551,472 @@ "source": [ "# But : lier les caractéristiques socio-demo et les comportements d'achat\n" ] + }, + { + "cell_type": "markdown", + "id": "b88808fe-3b4e-49ed-9885-d52910b6f211", + "metadata": {}, + "source": [ + "## Types d'évenement et client" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ecb03a47-1418-4fb1-8c78-cd222d38b7fd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'created_at', 'updated_at', 'season_id', 'facility_id', 'name',\n", + " 'event_type_id', 'manual_added', 'is_display', 'event_type_key_id',\n", + " 'facility_key_id', 'identifier'],\n", + " dtype='object')\n", + "(403, 12)\n", + "\n", + "RangeIndex: 403 entries, 0 to 402\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 403 non-null int64 \n", + " 1 created_at 403 non-null object\n", + " 2 updated_at 403 non-null object\n", + " 3 season_id 403 non-null int64 \n", + " 4 facility_id 403 non-null int64 \n", + " 5 name 403 non-null object\n", + " 6 event_type_id 403 non-null int64 \n", + " 7 manual_added 403 non-null bool \n", + " 8 is_display 403 non-null bool \n", + " 9 event_type_key_id 403 non-null int64 \n", + " 10 facility_key_id 403 non-null int64 \n", + " 11 identifier 403 non-null object\n", + "dtypes: bool(2), int64(6), object(4)\n", + "memory usage: 32.4+ KB\n" + ] + } + ], + "source": [ + "# Evenement = events.csv\n", + "FILE_PATH_S3 = 'bdc2324-data/11/11events.csv'\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " events = pd.read_csv(file_in, sep=\",\")\n", + "\n", + "print(events.columns)\n", + "print(events.shape)\n", + "events.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "19706610-9e90-4e6f-8bd0-da124b87cff7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreated_atupdated_atseason_idfacility_idnameevent_type_idmanual_addedis_displayevent_type_key_idfacility_key_ididentifier
0203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055105426d1e9a4acad18b9cf79244334c86c93
1203712023-09-13 03:42:45.218728+02:002023-09-13 03:54:30.103943+02:0018651054dialogues1055FalseTrue1055105460356fc5e8ed6c9c1be9c5ec67e77766
2205702023-10-05 04:48:29.374504+02:002023-10-05 04:48:36.562528+02:0018651054les grandes epopees1055FalseTrue10551054f8ab088e06252bf34e1b12ad2ce1a403
3207572023-11-01 03:55:20.846196+01:002023-11-01 03:55:28.412457+01:0018651054scolaire marelle1055FalseTrue10551054447fa80f9a793b7587bb85ebbda6442c
4203642023-09-13 03:42:45.196791+02:002023-09-13 03:54:30.075456+02:0018651054le couronnement de poppee1055FalseTrue105510543b37f5d2cd354cbc422868621ac7ebc2
.......................................
398156032023-09-12 17:42:25.327618+02:002023-09-12 19:00:00.893400+02:0017061054marelle1055FalseTrue10551054fde88b72fb82b1fe42fbbfbfc3d6b4d3
399156212023-09-12 17:42:25.335792+02:002023-09-12 19:00:00.899622+02:0017081054cartes d'adhesion1055FalseTrue10551054051b96aad2b720bad4450a59ed7dfbf6
400157402023-09-12 17:47:05.112101+02:002023-09-12 19:00:00.906123+02:0017111054repetition le medecin malgre lui1055FalseTrue10551054addd6885bea5ddf60ec3539dfc3e79e8
401155202023-09-12 17:42:25.290280+02:002023-09-12 19:00:00.835625+02:0017081054opera au village1055FalseTrue1055105494f250d10d4a56358ceab23b384439ff
402154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue105510544f015946bcbd856aa573cadb7ac42b9f
\n", + "

403 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " id created_at \\\n", + "0 20367 2023-09-13 03:42:45.214293+02:00 \n", + "1 20371 2023-09-13 03:42:45.218728+02:00 \n", + "2 20570 2023-10-05 04:48:29.374504+02:00 \n", + "3 20757 2023-11-01 03:55:20.846196+01:00 \n", + "4 20364 2023-09-13 03:42:45.196791+02:00 \n", + ".. ... ... \n", + "398 15603 2023-09-12 17:42:25.327618+02:00 \n", + "399 15621 2023-09-12 17:42:25.335792+02:00 \n", + "400 15740 2023-09-12 17:47:05.112101+02:00 \n", + "401 15520 2023-09-12 17:42:25.290280+02:00 \n", + "402 15439 2023-09-12 17:42:25.252747+02:00 \n", + "\n", + " updated_at season_id facility_id \\\n", + "0 2023-09-13 03:54:30.086969+02:00 1865 1054 \n", + "1 2023-09-13 03:54:30.103943+02:00 1865 1054 \n", + "2 2023-10-05 04:48:36.562528+02:00 1865 1054 \n", + "3 2023-11-01 03:55:28.412457+01:00 1865 1054 \n", + "4 2023-09-13 03:54:30.075456+02:00 1865 1054 \n", + ".. ... ... ... \n", + "398 2023-09-12 19:00:00.893400+02:00 1706 1054 \n", + "399 2023-09-12 19:00:00.899622+02:00 1708 1054 \n", + "400 2023-09-12 19:00:00.906123+02:00 1711 1054 \n", + "401 2023-09-12 19:00:00.835625+02:00 1708 1054 \n", + "402 2023-09-12 19:00:00.735990+02:00 1708 1054 \n", + "\n", + " name event_type_id manual_added \\\n", + "0 marelle 1055 False \n", + "1 dialogues 1055 False \n", + "2 les grandes epopees 1055 False \n", + "3 scolaire marelle 1055 False \n", + "4 le couronnement de poppee 1055 False \n", + ".. ... ... ... \n", + "398 marelle 1055 False \n", + "399 cartes d'adhesion 1055 False \n", + "400 repetition le medecin malgre lui 1055 False \n", + "401 opera au village 1055 False \n", + "402 florilege 1055 False \n", + "\n", + " is_display event_type_key_id facility_key_id \\\n", + "0 True 1055 1054 \n", + "1 True 1055 1054 \n", + "2 True 1055 1054 \n", + "3 True 1055 1054 \n", + "4 True 1055 1054 \n", + ".. ... ... ... \n", + "398 True 1055 1054 \n", + "399 True 1055 1054 \n", + "400 True 1055 1054 \n", + "401 True 1055 1054 \n", + "402 True 1055 1054 \n", + "\n", + " identifier \n", + "0 26d1e9a4acad18b9cf79244334c86c93 \n", + "1 60356fc5e8ed6c9c1be9c5ec67e77766 \n", + "2 f8ab088e06252bf34e1b12ad2ce1a403 \n", + "3 447fa80f9a793b7587bb85ebbda6442c \n", + "4 3b37f5d2cd354cbc422868621ac7ebc2 \n", + ".. ... \n", + "398 fde88b72fb82b1fe42fbbfbfc3d6b4d3 \n", + "399 051b96aad2b720bad4450a59ed7dfbf6 \n", + "400 addd6885bea5ddf60ec3539dfc3e79e8 \n", + "401 94f250d10d4a56358ceab23b384439ff \n", + "402 4f015946bcbd856aa573cadb7ac42b9f \n", + "\n", + "[403 rows x 12 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "events" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a52b0973-be86-4661-86f3-f433d0987f00", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c10297e8-a8f9-45f9-8553-17e3fdb6f8c1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'name', 'created_at', 'updated_at', 'fidelity_delay',\n", + " 'identifier'],\n", + " dtype='object')\n", + "(1, 6)\n", + "\n", + "RangeIndex: 1 entries, 0 to 0\n", + "Data columns (total 6 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 1 non-null int64 \n", + " 1 name 0 non-null float64\n", + " 2 created_at 1 non-null object \n", + " 3 updated_at 1 non-null object \n", + " 4 fidelity_delay 1 non-null int64 \n", + " 5 identifier 1 non-null object \n", + "dtypes: float64(1), int64(2), object(3)\n", + "memory usage: 176.0+ bytes\n" + ] + } + ], + "source": [ + "# Type d'évenement = representation_types.csv\n", + "FILE_PATH_S3 = 'bdc2324-data/11/11representation_types.csv'\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " representation_types = pd.read_csv(file_in, sep=\",\")\n", + "\n", + "print(representation_types.columns)\n", + "print(representation_types.shape)\n", + "representation_types.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "41ef6a1b-e99e-4c73-a2ae-ba7d438d90c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecreated_atupdated_atfidelity_delayidentifier
01055NaN2023-09-12 17:42:25.216901+02:002023-09-12 17:42:25.216901+02:0036d41d8cd98f00b204e9800998ecf8427e
\n", + "
" + ], + "text/plain": [ + " id name created_at \\\n", + "0 1055 NaN 2023-09-12 17:42:25.216901+02:00 \n", + "\n", + " updated_at fidelity_delay \\\n", + "0 2023-09-12 17:42:25.216901+02:00 36 \n", + "\n", + " identifier \n", + "0 d41d8cd98f00b204e9800998ecf8427e " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "event_types" + ] } ], "metadata": {