diff --git a/Clean-Notebook.ipynb b/Clean-Notebook.ipynb index ef5984f..be9a507 100644 --- a/Clean-Notebook.ipynb +++ b/Clean-Notebook.ipynb @@ -105,6 +105,49 @@ "## Type de client au globale" ] }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7c89d25f-ee42-4478-9ff0-ee64b781d5c8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'customer_id', 'target_id', 'created_at', 'updated_at', 'name',\n", + " 'extra_field'],\n", + " dtype='object')\n", + "(124302, 7)\n", + "\n", + "RangeIndex: 124302 entries, 0 to 124301\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 124302 non-null int64 \n", + " 1 customer_id 124302 non-null int64 \n", + " 2 target_id 124302 non-null int64 \n", + " 3 created_at 124296 non-null object \n", + " 4 updated_at 124296 non-null object \n", + " 5 name 0 non-null float64\n", + " 6 extra_field 0 non-null float64\n", + "dtypes: float64(2), int64(3), object(2)\n", + "memory usage: 6.6+ MB\n" + ] + } + ], + "source": [ + "# Client\n", + "FILE_PATH_S3 = 'bdc2324-data/11/11customer_target_mappings.csv'\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " customer_target_mappings = pd.read_csv(file_in, sep=\",\")\n", + "\n", + "print(customer_target_mappings.columns)\n", + "print(customer_target_mappings.shape)\n", + "customer_target_mappings.info()" + ] + }, { "cell_type": "code", "execution_count": 4, @@ -1552,6 +1595,340 @@ "# But : lier les caractéristiques socio-demo et les comportements d'achat\n" ] }, + { + "cell_type": "code", + "execution_count": 29, + "id": "8259ae6c-353f-43a6-add3-f974fac6e5d4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'number', 'created_at', 'updated_at', 'purchase_id', 'product_id',\n", + " 'is_from_subscription', 'type_of', 'supplier_id', 'barcode',\n", + " 'identifier'],\n", + " dtype='object')\n", + "(318969, 11)\n", + "\n", + "RangeIndex: 318969 entries, 0 to 318968\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 318969 non-null int64 \n", + " 1 number 318969 non-null object \n", + " 2 created_at 318969 non-null object \n", + " 3 updated_at 318969 non-null object \n", + " 4 purchase_id 318969 non-null int64 \n", + " 5 product_id 318969 non-null int64 \n", + " 6 is_from_subscription 318969 non-null bool \n", + " 7 type_of 318969 non-null int64 \n", + " 8 supplier_id 318969 non-null int64 \n", + " 9 barcode 0 non-null float64\n", + " 10 identifier 318969 non-null object \n", + "dtypes: bool(1), float64(1), int64(5), object(4)\n", + "memory usage: 24.6+ MB\n" + ] + } + ], + "source": [ + "# tickets\n", + "FILE_PATH_S3 = 'bdc2324-data/11/11tickets.csv'\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " tickets = pd.read_csv(file_in, sep=\",\")\n", + "\n", + "print(tickets.columns)\n", + "print(tickets.shape)\n", + "tickets.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "f54830cb-1f95-4f71-9b04-358c745fb454", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnumbercreated_atupdated_atpurchase_idproduct_idis_from_subscriptiontype_ofsupplier_idbarcodeidentifier
021190811433_136_212_683562023-09-12 17:42:45.396336+02:002023-09-12 17:42:45.396336+02:00861764209879False11702NaNf694c255855ce5643c6fcc7fed5e9237
121190821433_136_194_683562023-09-12 17:42:45.409056+02:002023-09-12 17:42:45.409056+02:00861763209879False11702NaN838d6101db2fc8bc80536d8b91b49859
2211908333158_158_343_683572023-09-12 17:42:45.409824+02:002023-09-12 17:42:45.409824+02:00861769209880False11702NaN8a8d938d66a4dc57bcb44c2773c6fdfa
3211908433158_158_297_683572023-09-12 17:42:45.410447+02:002023-09-12 17:42:45.410447+02:00861767209880False11702NaNb7a3dd0794c0957c942d45b8913e5b96
4211908533158_158_318_683572023-09-12 17:42:45.411059+02:002023-09-12 17:42:45.411059+02:00861768209880False11702NaNd7ea7e443581ebe520dd13f6cad31af7
....................................
318964256402144247_204_239_892782023-09-12 18:59:48.750953+02:002023-09-12 18:59:48.750953+02:001244281210158False11702NaN82c9af8b2167f7ac34a5e834242b0239
318965256402244247_204_299_892782023-09-12 18:59:48.751441+02:002023-09-12 18:59:48.751441+02:001244284210158False11702NaN235e8e608f066cb72949bbd397d0a76f
318966256402344247_204_259_892782023-09-12 18:59:48.751924+02:002023-09-12 18:59:48.751924+02:001244282210158False11702NaNec22fa828931f030f7e79a4cc5478c4b
318967256402444247_204_279_892782023-09-12 18:59:48.752425+02:002023-09-12 18:59:48.752425+02:001244283210158False11702NaN31ec4deaf718e04caf193e1ff8d621ef
31896825131564854_178_2847_891702023-09-12 18:52:20.331807+02:002023-09-12 18:59:48.752904+02:001244285261922False31702NaN48aef9efab29bfb1537656908863bcc1
\n", + "

318969 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " id number created_at \\\n", + "0 2119081 1433_136_212_68356 2023-09-12 17:42:45.396336+02:00 \n", + "1 2119082 1433_136_194_68356 2023-09-12 17:42:45.409056+02:00 \n", + "2 2119083 33158_158_343_68357 2023-09-12 17:42:45.409824+02:00 \n", + "3 2119084 33158_158_297_68357 2023-09-12 17:42:45.410447+02:00 \n", + "4 2119085 33158_158_318_68357 2023-09-12 17:42:45.411059+02:00 \n", + "... ... ... ... \n", + "318964 2564021 44247_204_239_89278 2023-09-12 18:59:48.750953+02:00 \n", + "318965 2564022 44247_204_299_89278 2023-09-12 18:59:48.751441+02:00 \n", + "318966 2564023 44247_204_259_89278 2023-09-12 18:59:48.751924+02:00 \n", + "318967 2564024 44247_204_279_89278 2023-09-12 18:59:48.752425+02:00 \n", + "318968 2513156 4854_178_2847_89170 2023-09-12 18:52:20.331807+02:00 \n", + "\n", + " updated_at purchase_id product_id \\\n", + "0 2023-09-12 17:42:45.396336+02:00 861764 209879 \n", + "1 2023-09-12 17:42:45.409056+02:00 861763 209879 \n", + "2 2023-09-12 17:42:45.409824+02:00 861769 209880 \n", + "3 2023-09-12 17:42:45.410447+02:00 861767 209880 \n", + "4 2023-09-12 17:42:45.411059+02:00 861768 209880 \n", + "... ... ... ... \n", + "318964 2023-09-12 18:59:48.750953+02:00 1244281 210158 \n", + "318965 2023-09-12 18:59:48.751441+02:00 1244284 210158 \n", + "318966 2023-09-12 18:59:48.751924+02:00 1244282 210158 \n", + "318967 2023-09-12 18:59:48.752425+02:00 1244283 210158 \n", + "318968 2023-09-12 18:59:48.752904+02:00 1244285 261922 \n", + "\n", + " is_from_subscription type_of supplier_id barcode \\\n", + "0 False 1 1702 NaN \n", + "1 False 1 1702 NaN \n", + "2 False 1 1702 NaN \n", + "3 False 1 1702 NaN \n", + "4 False 1 1702 NaN \n", + "... ... ... ... ... \n", + "318964 False 1 1702 NaN \n", + "318965 False 1 1702 NaN \n", + "318966 False 1 1702 NaN \n", + "318967 False 1 1702 NaN \n", + "318968 False 3 1702 NaN \n", + "\n", + " identifier \n", + "0 f694c255855ce5643c6fcc7fed5e9237 \n", + "1 838d6101db2fc8bc80536d8b91b49859 \n", + "2 8a8d938d66a4dc57bcb44c2773c6fdfa \n", + "3 b7a3dd0794c0957c942d45b8913e5b96 \n", + "4 d7ea7e443581ebe520dd13f6cad31af7 \n", + "... ... \n", + "318964 82c9af8b2167f7ac34a5e834242b0239 \n", + "318965 235e8e608f066cb72949bbd397d0a76f \n", + "318966 ec22fa828931f030f7e79a4cc5478c4b \n", + "318967 31ec4deaf718e04caf193e1ff8d621ef \n", + "318968 48aef9efab29bfb1537656908863bcc1 \n", + "\n", + "[318969 rows x 11 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tickets" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "ad743347-33d1-41f0-852d-f9e6354f82ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 3, 0])" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tickets['type_of'].unique()" + ] + }, { "cell_type": "markdown", "id": "b88808fe-3b4e-49ed-9885-d52910b6f211", @@ -1901,15 +2278,28 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "a52b0973-be86-4661-86f3-f433d0987f00", + "execution_count": 15, + "id": "6cb04679-26e7-4ed8-bfc1-42285da96374", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "357" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "events['name'].nunique()" + ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 16, "id": "c10297e8-a8f9-45f9-8553-17e3fdb6f8c1", "metadata": {}, "outputs": [ @@ -1917,42 +2307,788 @@ "name": "stdout", "output_type": "stream", "text": [ - "Index(['id', 'name', 'created_at', 'updated_at', 'fidelity_delay',\n", + "Index(['id', 'serial', 'event_id', 'created_at', 'updated_at',\n", + " 'start_date_time', 'open', 'satisfaction', 'end_date_time', 'name',\n", + " 'is_display', 'representation_type_id', 'expected_filling',\n", + " 'max_filling', 'extra_field', 'identifier'],\n", + " dtype='object')\n", + "(996, 16)\n", + "\n", + "RangeIndex: 996 entries, 0 to 995\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 996 non-null int64 \n", + " 1 serial 0 non-null float64\n", + " 2 event_id 996 non-null int64 \n", + " 3 created_at 996 non-null object \n", + " 4 updated_at 996 non-null object \n", + " 5 start_date_time 996 non-null object \n", + " 6 open 996 non-null bool \n", + " 7 satisfaction 0 non-null float64\n", + " 8 end_date_time 996 non-null object \n", + " 9 name 0 non-null float64\n", + " 10 is_display 996 non-null bool \n", + " 11 representation_type_id 0 non-null float64\n", + " 12 expected_filling 24 non-null float64\n", + " 13 max_filling 24 non-null float64\n", + " 14 extra_field 0 non-null float64\n", + " 15 identifier 996 non-null object \n", + "dtypes: bool(2), float64(7), int64(2), object(5)\n", + "memory usage: 111.0+ KB\n" + ] + } + ], + "source": [ + "# Représentation des évenements = representations.csv\n", + "FILE_PATH_S3 = 'bdc2324-data/11/11representations.csv'\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " representations = pd.read_csv(file_in, sep=\",\")\n", + "\n", + "print(representations.columns)\n", + "print(representations.shape)\n", + "representations.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "41ef6a1b-e99e-4c73-a2ae-ba7d438d90c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idserialevent_idcreated_atupdated_atstart_date_timeopensatisfactionend_date_timenameis_displayrepresentation_type_idexpected_fillingmax_fillingextra_fieldidentifier
044351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaN550.0550.0NaN33520762e8cc28982e3841cbc2be8ce2
145497NaN207572023-11-01 03:55:20.875712+01:002023-11-01 03:55:20.875712+01:002023-11-28 10:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN5c34b84e3d11276e0995d984c94cd28d
244383NaN203832023-09-13 10:41:08.964302+02:002023-09-13 10:41:08.964302+02:002023-06-04 17:00:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaNbf3c65a1dfefbd747dcc2360e6887eac
344384NaN203832023-09-13 10:41:08.972401+02:002023-09-13 10:41:08.972401+02:002023-06-03 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaNb0e69ae8b78ebab3066aac83de22d239
444385NaN203842023-09-13 10:41:08.973290+02:002023-09-13 10:41:08.973290+02:002023-06-03 16:15:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN9fb91c8b1cf9e444111c511e212ac5c1
...................................................
99133894NaN156472023-09-12 17:42:25.564297+02:002023-09-12 17:42:25.564297+02:002022-11-08 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN44bbcecfd007ceaad05805391beccabb
99233873NaN156402023-09-12 17:42:25.554863+02:002023-09-12 17:42:25.554863+02:002022-11-14 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN151edbec8e0a3cd80071038e857f3493
99333610NaN155202023-09-12 17:42:25.442979+02:002023-09-12 17:42:25.442979+02:002023-06-19 18:00:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN9e9e38d527427e1b6f67e0c3f12b82fc
99433953NaN155202023-09-12 17:42:25.590746+02:002023-09-12 17:42:25.590746+02:002023-06-19 20:00:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN7bf0978aabb6cac1bb4cd2784afb2b6b
99533639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaNfae68f1e09710ec8747957af6e22f61d
\n", + "

996 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " id serial event_id created_at \\\n", + "0 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", + "1 45497 NaN 20757 2023-11-01 03:55:20.875712+01:00 \n", + "2 44383 NaN 20383 2023-09-13 10:41:08.964302+02:00 \n", + "3 44384 NaN 20383 2023-09-13 10:41:08.972401+02:00 \n", + "4 44385 NaN 20384 2023-09-13 10:41:08.973290+02:00 \n", + ".. ... ... ... ... \n", + "991 33894 NaN 15647 2023-09-12 17:42:25.564297+02:00 \n", + "992 33873 NaN 15640 2023-09-12 17:42:25.554863+02:00 \n", + "993 33610 NaN 15520 2023-09-12 17:42:25.442979+02:00 \n", + "994 33953 NaN 15520 2023-09-12 17:42:25.590746+02:00 \n", + "995 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", + "\n", + " updated_at start_date_time open \\\n", + "0 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", + "1 2023-11-01 03:55:20.875712+01:00 2023-11-28 10:00:00+01:00 True \n", + "2 2023-09-13 10:41:08.964302+02:00 2023-06-04 17:00:00+02:00 True \n", + "3 2023-09-13 10:41:08.972401+02:00 2023-06-03 17:30:00+02:00 True \n", + "4 2023-09-13 10:41:08.973290+02:00 2023-06-03 16:15:00+02:00 True \n", + ".. ... ... ... \n", + "991 2023-09-12 17:42:25.564297+02:00 2022-11-08 20:00:00+01:00 True \n", + "992 2023-09-12 17:42:25.554863+02:00 2022-11-14 20:00:00+01:00 True \n", + "993 2023-09-12 17:42:25.442979+02:00 2023-06-19 18:00:00+02:00 True \n", + "994 2023-09-12 17:42:25.590746+02:00 2023-06-19 20:00:00+02:00 True \n", + "995 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", + "\n", + " satisfaction end_date_time name is_display \\\n", + "0 NaN 1901-01-01 00:09:21+00:09 NaN True \n", + "1 NaN 1901-01-01 00:09:21+00:09 NaN True \n", + "2 NaN 1901-01-01 00:09:21+00:09 NaN True \n", + "3 NaN 1901-01-01 00:09:21+00:09 NaN True \n", + "4 NaN 1901-01-01 00:09:21+00:09 NaN True \n", + ".. ... ... ... ... \n", + "991 NaN 1901-01-01 00:09:21+00:09 NaN True \n", + "992 NaN 1901-01-01 00:09:21+00:09 NaN True \n", + "993 NaN 1901-01-01 00:09:21+00:09 NaN True \n", + "994 NaN 1901-01-01 00:09:21+00:09 NaN True \n", + "995 NaN 1901-01-01 00:09:21+00:09 NaN True \n", + "\n", + " representation_type_id expected_filling max_filling extra_field \\\n", + "0 NaN 550.0 550.0 NaN \n", + "1 NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN \n", + ".. ... ... ... ... \n", + "991 NaN NaN NaN NaN \n", + "992 NaN NaN NaN NaN \n", + "993 NaN NaN NaN NaN \n", + "994 NaN NaN NaN NaN \n", + "995 NaN NaN NaN NaN \n", + "\n", + " identifier \n", + "0 33520762e8cc28982e3841cbc2be8ce2 \n", + "1 5c34b84e3d11276e0995d984c94cd28d \n", + "2 bf3c65a1dfefbd747dcc2360e6887eac \n", + "3 b0e69ae8b78ebab3066aac83de22d239 \n", + "4 9fb91c8b1cf9e444111c511e212ac5c1 \n", + ".. ... \n", + "991 44bbcecfd007ceaad05805391beccabb \n", + "992 151edbec8e0a3cd80071038e857f3493 \n", + "993 9e9e38d527427e1b6f67e0c3f12b82fc \n", + "994 7bf0978aabb6cac1bb4cd2784afb2b6b \n", + "995 fae68f1e09710ec8747957af6e22f61d \n", + "\n", + "[996 rows x 16 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "representations" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "ae6cdad3-2184-4ae7-928c-2f8bd7769a5b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'amount', 'is_full_price', 'representation_id',\n", + " 'pricing_formula_id', 'created_at', 'updated_at', 'category_id',\n", + " 'apply_price', 'products_group_id', 'product_pack_id', 'extra_field',\n", + " 'amount_consumption', 'identifier'],\n", + " dtype='object')\n", + "(14648, 14)\n", + "\n", + "RangeIndex: 14648 entries, 0 to 14647\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 14648 non-null int64 \n", + " 1 amount 14648 non-null float64\n", + " 2 is_full_price 14648 non-null bool \n", + " 3 representation_id 14648 non-null int64 \n", + " 4 pricing_formula_id 14648 non-null int64 \n", + " 5 created_at 14648 non-null object \n", + " 6 updated_at 14648 non-null object \n", + " 7 category_id 14648 non-null int64 \n", + " 8 apply_price 14648 non-null float64\n", + " 9 products_group_id 14648 non-null int64 \n", + " 10 product_pack_id 14648 non-null int64 \n", + " 11 extra_field 0 non-null float64\n", + " 12 amount_consumption 0 non-null float64\n", + " 13 identifier 14648 non-null object \n", + "dtypes: bool(1), float64(4), int64(6), object(3)\n", + "memory usage: 1.5+ MB\n" + ] + } + ], + "source": [ + "# Produits vendues = products.csv\n", + "FILE_PATH_S3 = 'bdc2324-data/11/11products.csv'\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " products = pd.read_csv(file_in, sep=\",\")\n", + "\n", + "print(products.columns)\n", + "print(products.shape)\n", + "products.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "34f1825d-148a-4a6e-88d6-61449fee3ee4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idamountis_full_pricerepresentation_idpricing_formula_idcreated_atupdated_atcategory_idapply_priceproducts_group_idproduct_pack_idextra_fieldamount_consumptionidentifier
026832518.0False44332204772023-09-13 03:42:45.415594+02:002023-09-13 03:42:45.415594+02:0049720.02681081NaNNaNb823bbea3ba837da2ef8efaf1287272d
127411836.8False44340205022023-10-25 03:26:57.430694+02:002023-10-25 03:26:57.430694+02:0049690.02739011NaNNaN81e8b7991f6948e3ef7cfe5011d13532
226833839.1False44340204972023-09-13 03:42:45.430942+02:002023-09-13 03:42:45.430942+02:0049690.02681211NaNNaNbe8bc0399db4d04aefa9f44afd4d5efa
32098830.0False33443204752023-09-12 17:42:27.595998+02:002023-09-12 17:42:27.595998+02:0049700.02097061NaNNaN01a9eea5f8ad53491faa864bfac44183
426832663.0False44333204772023-09-13 03:42:45.417283+02:002023-09-13 03:42:45.417283+02:0049690.02681091NaNNaN781a917ecfdabb14169701d7b143bbe4
.............................................
1464321787833.6False33919204892023-09-12 17:51:11.572882+02:002023-09-12 17:51:11.572882+02:0049710.02176951NaNNaN82bba69321466069411b3023343b44a4
1464426831510.0False33919205042023-09-12 18:59:29.995176+02:002023-09-12 18:59:29.995176+02:0049690.02680981NaNNaNeae56a8eb0a4315c5713b2053103d595
146452101485.0False33531204732023-09-12 17:42:27.733260+02:002023-09-12 17:42:27.733260+02:0049750.02099711NaNNaN449f86c1ef2b478d3389f7d0e27d0e6b
1464621205430.0False33810204732023-09-12 17:42:28.724681+02:002023-09-12 17:42:28.724681+02:0049720.02118761NaNNaN2090203e2c0b58ea8f505089faee6d62
1464726192221.0False33766204882023-09-12 18:52:00.519838+02:002023-09-12 18:52:00.519838+02:0049720.02617091NaNNaN9139ee36a92bed766ae95372cca77336
\n", + "

14648 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " id amount is_full_price representation_id pricing_formula_id \\\n", + "0 268325 18.0 False 44332 20477 \n", + "1 274118 36.8 False 44340 20502 \n", + "2 268338 39.1 False 44340 20497 \n", + "3 209883 0.0 False 33443 20475 \n", + "4 268326 63.0 False 44333 20477 \n", + "... ... ... ... ... ... \n", + "14643 217878 33.6 False 33919 20489 \n", + "14644 268315 10.0 False 33919 20504 \n", + "14645 210148 5.0 False 33531 20473 \n", + "14646 212054 30.0 False 33810 20473 \n", + "14647 261922 21.0 False 33766 20488 \n", + "\n", + " created_at updated_at \\\n", + "0 2023-09-13 03:42:45.415594+02:00 2023-09-13 03:42:45.415594+02:00 \n", + "1 2023-10-25 03:26:57.430694+02:00 2023-10-25 03:26:57.430694+02:00 \n", + "2 2023-09-13 03:42:45.430942+02:00 2023-09-13 03:42:45.430942+02:00 \n", + "3 2023-09-12 17:42:27.595998+02:00 2023-09-12 17:42:27.595998+02:00 \n", + "4 2023-09-13 03:42:45.417283+02:00 2023-09-13 03:42:45.417283+02:00 \n", + "... ... ... \n", + "14643 2023-09-12 17:51:11.572882+02:00 2023-09-12 17:51:11.572882+02:00 \n", + "14644 2023-09-12 18:59:29.995176+02:00 2023-09-12 18:59:29.995176+02:00 \n", + "14645 2023-09-12 17:42:27.733260+02:00 2023-09-12 17:42:27.733260+02:00 \n", + "14646 2023-09-12 17:42:28.724681+02:00 2023-09-12 17:42:28.724681+02:00 \n", + "14647 2023-09-12 18:52:00.519838+02:00 2023-09-12 18:52:00.519838+02:00 \n", + "\n", + " category_id apply_price products_group_id product_pack_id \\\n", + "0 4972 0.0 268108 1 \n", + "1 4969 0.0 273901 1 \n", + "2 4969 0.0 268121 1 \n", + "3 4970 0.0 209706 1 \n", + "4 4969 0.0 268109 1 \n", + "... ... ... ... ... \n", + "14643 4971 0.0 217695 1 \n", + "14644 4969 0.0 268098 1 \n", + "14645 4975 0.0 209971 1 \n", + "14646 4972 0.0 211876 1 \n", + "14647 4972 0.0 261709 1 \n", + "\n", + " extra_field amount_consumption identifier \n", + "0 NaN NaN b823bbea3ba837da2ef8efaf1287272d \n", + "1 NaN NaN 81e8b7991f6948e3ef7cfe5011d13532 \n", + "2 NaN NaN be8bc0399db4d04aefa9f44afd4d5efa \n", + "3 NaN NaN 01a9eea5f8ad53491faa864bfac44183 \n", + "4 NaN NaN 781a917ecfdabb14169701d7b143bbe4 \n", + "... ... ... ... \n", + "14643 NaN NaN 82bba69321466069411b3023343b44a4 \n", + "14644 NaN NaN eae56a8eb0a4315c5713b2053103d595 \n", + "14645 NaN NaN 449f86c1ef2b478d3389f7d0e27d0e6b \n", + "14646 NaN NaN 2090203e2c0b58ea8f505089faee6d62 \n", + "14647 NaN NaN 9139ee36a92bed766ae95372cca77336 \n", + "\n", + "[14648 rows x 14 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "products" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "6735b338-26b5-479d-825d-677ea533dad5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'name', 'created_at', 'updated_at', 'street_id', 'fixed_capacity',\n", " 'identifier'],\n", " dtype='object')\n", - "(1, 6)\n", + "(1, 7)\n", "\n", "RangeIndex: 1 entries, 0 to 0\n", - "Data columns (total 6 columns):\n", + "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 id 1 non-null int64 \n", " 1 name 0 non-null float64\n", " 2 created_at 1 non-null object \n", " 3 updated_at 1 non-null object \n", - " 4 fidelity_delay 1 non-null int64 \n", - " 5 identifier 1 non-null object \n", - "dtypes: float64(1), int64(2), object(3)\n", - "memory usage: 176.0+ bytes\n" + " 4 street_id 1 non-null int64 \n", + " 5 fixed_capacity 0 non-null float64\n", + " 6 identifier 1 non-null object \n", + "dtypes: float64(2), int64(2), object(3)\n", + "memory usage: 184.0+ bytes\n" ] } ], "source": [ - "# Type d'évenement = representation_types.csv\n", - "FILE_PATH_S3 = 'bdc2324-data/11/11representation_types.csv'\n", + "# Lieu = facilities.csv\n", + "FILE_PATH_S3 = 'bdc2324-data/11/11facilities.csv'\n", "\n", "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", - " representation_types = pd.read_csv(file_in, sep=\",\")\n", + " facilities = pd.read_csv(file_in, sep=\",\")\n", "\n", - "print(representation_types.columns)\n", - "print(representation_types.shape)\n", - "representation_types.info()" + "print(facilities.columns)\n", + "print(facilities.shape)\n", + "facilities.info()" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "41ef6a1b-e99e-4c73-a2ae-ba7d438d90c2", + "execution_count": 21, + "id": "428b86c2-50f4-42a5-9bbb-a17ffe820bf9", "metadata": {}, "outputs": [ { @@ -1980,18 +3116,20 @@ " name\n", " created_at\n", " updated_at\n", - " fidelity_delay\n", + " street_id\n", + " fixed_capacity\n", " identifier\n", " \n", " \n", " \n", " \n", " 0\n", - " 1055\n", + " 1054\n", + " NaN\n", + " 2023-09-12 17:42:25.223064+02:00\n", + " 2023-09-12 17:42:25.223064+02:00\n", + " 1\n", " NaN\n", - " 2023-09-12 17:42:25.216901+02:00\n", - " 2023-09-12 17:42:25.216901+02:00\n", - " 36\n", " d41d8cd98f00b204e9800998ecf8427e\n", " \n", " \n", @@ -2000,22 +3138,2167 @@ ], "text/plain": [ " id name created_at \\\n", - "0 1055 NaN 2023-09-12 17:42:25.216901+02:00 \n", + "0 1054 NaN 2023-09-12 17:42:25.223064+02:00 \n", "\n", - " updated_at fidelity_delay \\\n", - "0 2023-09-12 17:42:25.216901+02:00 36 \n", + " updated_at street_id fixed_capacity \\\n", + "0 2023-09-12 17:42:25.223064+02:00 1 NaN \n", "\n", " identifier \n", "0 d41d8cd98f00b204e9800998ecf8427e " ] }, - "execution_count": 11, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "event_types" + "facilities" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "f6b26ad5-a4cc-4219-a0b0-406d9b025458", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'name', 'created_at', 'updated_at', 'start_date_time',\n", + " 'identifier'],\n", + " dtype='object')\n", + "(9, 6)\n", + "\n", + "RangeIndex: 9 entries, 0 to 8\n", + "Data columns (total 6 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 9 non-null int64 \n", + " 1 name 9 non-null object \n", + " 2 created_at 9 non-null object \n", + " 3 updated_at 9 non-null object \n", + " 4 start_date_time 0 non-null float64\n", + " 5 identifier 9 non-null object \n", + "dtypes: float64(1), int64(1), object(4)\n", + "memory usage: 560.0+ bytes\n" + ] + } + ], + "source": [ + "# Saisons = seasons.csv période sur deux années consécutives\n", + "FILE_PATH_S3 = 'bdc2324-data/11/11seasons.csv'\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " seasons = pd.read_csv(file_in, sep=\",\")\n", + "\n", + "print(seasons.columns)\n", + "print(seasons.shape)\n", + "seasons.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "75c8c0ef-4ff5-45b1-a791-8ba2e9a4437e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['saison 2023-2024', 'saison 2021-2022', 'saison 2015-2016',\n", + " 'saison 2016-2017', 'saison 2017-2018', 'saison 2018-2019',\n", + " 'saison 2020-2021', 'saison 2019-2020', 'saison 2022-2023'],\n", + " dtype=object)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seasons['name'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "cd0d10df-10cc-4f75-8b88-35f676c91f5b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'purchase_date', 'customer_id', 'created_at', 'updated_at',\n", + " 'number', 'identifier'],\n", + " dtype='object')\n", + "(410695, 7)\n", + "\n", + "RangeIndex: 410695 entries, 0 to 410694\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 410695 non-null int64 \n", + " 1 purchase_date 410695 non-null object \n", + " 2 customer_id 410695 non-null int64 \n", + " 3 created_at 410695 non-null object \n", + " 4 updated_at 410695 non-null object \n", + " 5 number 0 non-null float64\n", + " 6 identifier 410695 non-null object \n", + "dtypes: float64(1), int64(2), object(4)\n", + "memory usage: 21.9+ MB\n" + ] + } + ], + "source": [ + "# Achats = purchases.csv \n", + "FILE_PATH_S3 = 'bdc2324-data/11/11purchases.csv'\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " purchases = pd.read_csv(file_in, sep=\",\")\n", + "\n", + "print(purchases.columns)\n", + "print(purchases.shape)\n", + "purchases.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "8f986fdb-ca37-4cbb-b526-2a6d0ce7ca2c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpurchase_datecustomer_idcreated_atupdated_atnumberidentifier
08617612019-03-01 16:28:49+01:0049662023-09-12 17:42:37.564150+02:002023-09-12 17:42:37.564150+02:00NaNd20eb0c3a7efec0bbe338dee40dc3378
18617622019-03-01 16:29:11+01:0049662023-09-12 17:42:37.571159+02:002023-09-12 17:42:37.571159+02:00NaNcff3abfc018517bce5ccfc58f5cacf40
28617632019-03-01 16:29:17+01:0049662023-09-12 17:42:37.571646+02:002023-09-12 17:42:37.571646+02:00NaNe1155cf26b34f792bdb23e49244d7264
38617642019-03-01 16:29:19+01:0049662023-09-12 17:42:37.572063+02:002023-09-12 17:42:37.572063+02:00NaNe8b95cc6a1a8b103ffa39755ce3bfc4d
48617652019-03-01 16:32:08+01:004059942023-09-12 17:42:37.572470+02:002023-09-12 17:42:37.572470+02:00NaN1b763278914f1309e357abe5033a3f0f
........................
41069012859642023-10-21 21:46:41+02:005173092023-10-23 03:43:16.457501+02:002023-10-23 03:43:16.457501+02:00NaN72c4e90c2b151dcffc87b19ea8a0c4f1
41069112859652023-10-21 21:47:07+02:005173092023-10-23 03:43:16.458458+02:002023-10-23 03:43:16.458458+02:00NaNee65532087132145daa6154fbae050ea
41069212859662023-10-21 21:47:20+02:005173092023-10-23 03:43:16.458811+02:002023-10-23 03:43:16.458811+02:00NaN7e825dd352bc6a11ab81cb8068e325e6
41069312859672023-10-21 23:07:06+02:003999692023-10-23 03:43:16.459738+02:002023-10-23 03:43:16.459738+02:00NaNfdb92627a48d6ba8fa817d60a83dbea8
41069412859682023-10-21 23:07:39+02:003999692023-10-23 03:43:16.462409+02:002023-10-23 03:43:16.462409+02:00NaNe9dbaff4f7037a5b0efa11263584dfad
\n", + "

410695 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " id purchase_date customer_id \\\n", + "0 861761 2019-03-01 16:28:49+01:00 4966 \n", + "1 861762 2019-03-01 16:29:11+01:00 4966 \n", + "2 861763 2019-03-01 16:29:17+01:00 4966 \n", + "3 861764 2019-03-01 16:29:19+01:00 4966 \n", + "4 861765 2019-03-01 16:32:08+01:00 405994 \n", + "... ... ... ... \n", + "410690 1285964 2023-10-21 21:46:41+02:00 517309 \n", + "410691 1285965 2023-10-21 21:47:07+02:00 517309 \n", + "410692 1285966 2023-10-21 21:47:20+02:00 517309 \n", + "410693 1285967 2023-10-21 23:07:06+02:00 399969 \n", + "410694 1285968 2023-10-21 23:07:39+02:00 399969 \n", + "\n", + " created_at updated_at \\\n", + "0 2023-09-12 17:42:37.564150+02:00 2023-09-12 17:42:37.564150+02:00 \n", + "1 2023-09-12 17:42:37.571159+02:00 2023-09-12 17:42:37.571159+02:00 \n", + "2 2023-09-12 17:42:37.571646+02:00 2023-09-12 17:42:37.571646+02:00 \n", + "3 2023-09-12 17:42:37.572063+02:00 2023-09-12 17:42:37.572063+02:00 \n", + "4 2023-09-12 17:42:37.572470+02:00 2023-09-12 17:42:37.572470+02:00 \n", + "... ... ... \n", + "410690 2023-10-23 03:43:16.457501+02:00 2023-10-23 03:43:16.457501+02:00 \n", + "410691 2023-10-23 03:43:16.458458+02:00 2023-10-23 03:43:16.458458+02:00 \n", + "410692 2023-10-23 03:43:16.458811+02:00 2023-10-23 03:43:16.458811+02:00 \n", + "410693 2023-10-23 03:43:16.459738+02:00 2023-10-23 03:43:16.459738+02:00 \n", + "410694 2023-10-23 03:43:16.462409+02:00 2023-10-23 03:43:16.462409+02:00 \n", + "\n", + " number identifier \n", + "0 NaN d20eb0c3a7efec0bbe338dee40dc3378 \n", + "1 NaN cff3abfc018517bce5ccfc58f5cacf40 \n", + "2 NaN e1155cf26b34f792bdb23e49244d7264 \n", + "3 NaN e8b95cc6a1a8b103ffa39755ce3bfc4d \n", + "4 NaN 1b763278914f1309e357abe5033a3f0f \n", + "... ... ... \n", + "410690 NaN 72c4e90c2b151dcffc87b19ea8a0c4f1 \n", + "410691 NaN ee65532087132145daa6154fbae050ea \n", + "410692 NaN 7e825dd352bc6a11ab81cb8068e325e6 \n", + "410693 NaN fdb92627a48d6ba8fa817d60a83dbea8 \n", + "410694 NaN e9dbaff4f7037a5b0efa11263584dfad \n", + "\n", + "[410695 rows x 7 columns]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "purchases" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "281c48da-e1a0-4298-b2e6-81f9fc6461aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
id_xpurchase_datecustomer_idcreated_at_xupdated_at_xnumber_xidentifier_xid_ynumber_ycreated_at_yupdated_at_ypurchase_idproduct_idis_from_subscriptiontype_ofsupplier_idbarcodeidentifier_y
08617632019-03-01 16:29:17+01:0049662023-09-12 17:42:37.571646+02:002023-09-12 17:42:37.571646+02:00NaNe1155cf26b34f792bdb23e49244d726421190821433_136_194_683562023-09-12 17:42:45.409056+02:002023-09-12 17:42:45.409056+02:00861763209879False11702NaN838d6101db2fc8bc80536d8b91b49859
18617642019-03-01 16:29:19+01:0049662023-09-12 17:42:37.572063+02:002023-09-12 17:42:37.572063+02:00NaNe8b95cc6a1a8b103ffa39755ce3bfc4d21190811433_136_212_683562023-09-12 17:42:45.396336+02:002023-09-12 17:42:45.396336+02:00861764209879False11702NaNf694c255855ce5643c6fcc7fed5e9237
28617672019-03-01 16:33:01+01:004059942023-09-12 17:42:37.573280+02:002023-09-12 17:42:37.573280+02:00NaN6edb259b88fc6f6ae82ede82defaef92211908433158_158_297_683572023-09-12 17:42:45.410447+02:002023-09-12 17:42:45.410447+02:00861767209880False11702NaNb7a3dd0794c0957c942d45b8913e5b96
38617682019-03-01 16:33:03+01:004059942023-09-12 17:42:37.573646+02:002023-09-12 17:42:37.573646+02:00NaN5d3fcb50784bada3731a967ddc9fbba8211908533158_158_318_683572023-09-12 17:42:45.411059+02:002023-09-12 17:42:45.411059+02:00861768209880False11702NaNd7ea7e443581ebe520dd13f6cad31af7
48617692019-03-01 16:33:06+01:004059942023-09-12 17:42:37.574034+02:002023-09-12 17:42:37.574034+02:00NaN5516d19b2331db9ad0b11f7e70299575211908333158_158_343_683572023-09-12 17:42:45.409824+02:002023-09-12 17:42:45.409824+02:00861769209880False11702NaN8a8d938d66a4dc57bcb44c2773c6fdfa
.........................................................
31896412852062023-10-19 22:14:55+02:003542332023-10-21 04:50:44.397308+02:002023-10-21 04:50:44.397308+02:00NaN819dd5c8b312ee583335f32f481d782a259756470649_398_403_1686522023-10-21 04:50:44.991960+02:002023-10-21 04:50:44.991960+02:001285206270350False11702NaN56c452c39089f658ed74a06c96b78725
31896512852092023-10-19 22:59:26+02:005170012023-10-21 04:50:44.399870+02:002023-10-21 04:50:44.399870+02:00NaNef79fbeb3b80de3529df9c65cb4d4ca2259756589203_398_1187_1686562023-10-21 04:50:44.993354+02:002023-10-21 04:50:44.993354+02:001285209268450False11702NaN5ef9912e7b533b8a1b2685db538df7d3
31896612852092023-10-19 22:59:26+02:005170012023-10-21 04:50:44.399870+02:002023-10-21 04:50:44.399870+02:00NaNef79fbeb3b80de3529df9c65cb4d4ca2259756689203_398_1232_1686552023-10-21 04:50:44.994301+02:002023-10-21 04:50:44.994301+02:001285209272403False11702NaN9742a56e9ffbdfb0a31a541dc5ccb889
31896712852092023-10-19 22:59:26+02:005170012023-10-21 04:50:44.399870+02:002023-10-21 04:50:44.399870+02:00NaNef79fbeb3b80de3529df9c65cb4d4ca2259756789203_398_1211_1686552023-10-21 04:50:44.995318+02:002023-10-21 04:50:44.995318+02:001285209272403False11702NaN56a9e032281d7a9c004da644818839cc
31896812859662023-10-21 21:47:20+02:005173092023-10-23 03:43:16.458811+02:002023-10-23 03:43:16.458811+02:00NaN7e825dd352bc6a11ab81cb8068e325e6259826089257_401_2652_1687932023-10-23 03:43:16.856244+02:002023-10-23 03:43:16.856244+02:001285966268428False11702NaN86d6c0c2720435206078ac4bbf4f74f1
\n", + "

318969 rows × 18 columns

\n", + "
" + ], + "text/plain": [ + " id_x purchase_date customer_id \\\n", + "0 861763 2019-03-01 16:29:17+01:00 4966 \n", + "1 861764 2019-03-01 16:29:19+01:00 4966 \n", + "2 861767 2019-03-01 16:33:01+01:00 405994 \n", + "3 861768 2019-03-01 16:33:03+01:00 405994 \n", + "4 861769 2019-03-01 16:33:06+01:00 405994 \n", + "... ... ... ... \n", + "318964 1285206 2023-10-19 22:14:55+02:00 354233 \n", + "318965 1285209 2023-10-19 22:59:26+02:00 517001 \n", + "318966 1285209 2023-10-19 22:59:26+02:00 517001 \n", + "318967 1285209 2023-10-19 22:59:26+02:00 517001 \n", + "318968 1285966 2023-10-21 21:47:20+02:00 517309 \n", + "\n", + " created_at_x updated_at_x \\\n", + "0 2023-09-12 17:42:37.571646+02:00 2023-09-12 17:42:37.571646+02:00 \n", + "1 2023-09-12 17:42:37.572063+02:00 2023-09-12 17:42:37.572063+02:00 \n", + "2 2023-09-12 17:42:37.573280+02:00 2023-09-12 17:42:37.573280+02:00 \n", + "3 2023-09-12 17:42:37.573646+02:00 2023-09-12 17:42:37.573646+02:00 \n", + "4 2023-09-12 17:42:37.574034+02:00 2023-09-12 17:42:37.574034+02:00 \n", + "... ... ... \n", + "318964 2023-10-21 04:50:44.397308+02:00 2023-10-21 04:50:44.397308+02:00 \n", + "318965 2023-10-21 04:50:44.399870+02:00 2023-10-21 04:50:44.399870+02:00 \n", + "318966 2023-10-21 04:50:44.399870+02:00 2023-10-21 04:50:44.399870+02:00 \n", + "318967 2023-10-21 04:50:44.399870+02:00 2023-10-21 04:50:44.399870+02:00 \n", + "318968 2023-10-23 03:43:16.458811+02:00 2023-10-23 03:43:16.458811+02:00 \n", + "\n", + " number_x identifier_x id_y \\\n", + "0 NaN e1155cf26b34f792bdb23e49244d7264 2119082 \n", + "1 NaN e8b95cc6a1a8b103ffa39755ce3bfc4d 2119081 \n", + "2 NaN 6edb259b88fc6f6ae82ede82defaef92 2119084 \n", + "3 NaN 5d3fcb50784bada3731a967ddc9fbba8 2119085 \n", + "4 NaN 5516d19b2331db9ad0b11f7e70299575 2119083 \n", + "... ... ... ... \n", + "318964 NaN 819dd5c8b312ee583335f32f481d782a 2597564 \n", + "318965 NaN ef79fbeb3b80de3529df9c65cb4d4ca2 2597565 \n", + "318966 NaN ef79fbeb3b80de3529df9c65cb4d4ca2 2597566 \n", + "318967 NaN ef79fbeb3b80de3529df9c65cb4d4ca2 2597567 \n", + "318968 NaN 7e825dd352bc6a11ab81cb8068e325e6 2598260 \n", + "\n", + " number_y created_at_y \\\n", + "0 1433_136_194_68356 2023-09-12 17:42:45.409056+02:00 \n", + "1 1433_136_212_68356 2023-09-12 17:42:45.396336+02:00 \n", + "2 33158_158_297_68357 2023-09-12 17:42:45.410447+02:00 \n", + "3 33158_158_318_68357 2023-09-12 17:42:45.411059+02:00 \n", + "4 33158_158_343_68357 2023-09-12 17:42:45.409824+02:00 \n", + "... ... ... \n", + "318964 70649_398_403_168652 2023-10-21 04:50:44.991960+02:00 \n", + "318965 89203_398_1187_168656 2023-10-21 04:50:44.993354+02:00 \n", + "318966 89203_398_1232_168655 2023-10-21 04:50:44.994301+02:00 \n", + "318967 89203_398_1211_168655 2023-10-21 04:50:44.995318+02:00 \n", + "318968 89257_401_2652_168793 2023-10-23 03:43:16.856244+02:00 \n", + "\n", + " updated_at_y purchase_id product_id \\\n", + "0 2023-09-12 17:42:45.409056+02:00 861763 209879 \n", + "1 2023-09-12 17:42:45.396336+02:00 861764 209879 \n", + "2 2023-09-12 17:42:45.410447+02:00 861767 209880 \n", + "3 2023-09-12 17:42:45.411059+02:00 861768 209880 \n", + "4 2023-09-12 17:42:45.409824+02:00 861769 209880 \n", + "... ... ... ... \n", + "318964 2023-10-21 04:50:44.991960+02:00 1285206 270350 \n", + "318965 2023-10-21 04:50:44.993354+02:00 1285209 268450 \n", + "318966 2023-10-21 04:50:44.994301+02:00 1285209 272403 \n", + "318967 2023-10-21 04:50:44.995318+02:00 1285209 272403 \n", + "318968 2023-10-23 03:43:16.856244+02:00 1285966 268428 \n", + "\n", + " is_from_subscription type_of supplier_id barcode \\\n", + "0 False 1 1702 NaN \n", + "1 False 1 1702 NaN \n", + "2 False 1 1702 NaN \n", + "3 False 1 1702 NaN \n", + "4 False 1 1702 NaN \n", + "... ... ... ... ... \n", + "318964 False 1 1702 NaN \n", + "318965 False 1 1702 NaN \n", + "318966 False 1 1702 NaN \n", + "318967 False 1 1702 NaN \n", + "318968 False 1 1702 NaN \n", + "\n", + " identifier_y \n", + "0 838d6101db2fc8bc80536d8b91b49859 \n", + "1 f694c255855ce5643c6fcc7fed5e9237 \n", + "2 b7a3dd0794c0957c942d45b8913e5b96 \n", + "3 d7ea7e443581ebe520dd13f6cad31af7 \n", + "4 8a8d938d66a4dc57bcb44c2773c6fdfa \n", + "... ... \n", + "318964 56c452c39089f658ed74a06c96b78725 \n", + "318965 5ef9912e7b533b8a1b2685db538df7d3 \n", + "318966 9742a56e9ffbdfb0a31a541dc5ccb889 \n", + "318967 56a9e032281d7a9c004da644818839cc \n", + "318968 86d6c0c2720435206078ac4bbf4f74f1 \n", + "\n", + "[318969 rows x 18 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "e8f340b3-7519-47e7-a8bb-c8d1b68ca683", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
id_xcustomer_idproduct_idpurchase_datetype_ofis_from_subscription
086176349662098792019-03-01 16:29:17+01:001False
186176449662098792019-03-01 16:29:19+01:001False
28617674059942098802019-03-01 16:33:01+01:001False
38617684059942098802019-03-01 16:33:03+01:001False
48617694059942098802019-03-01 16:33:06+01:001False
.....................
31896412852063542332703502023-10-19 22:14:55+02:001False
31896512852095170012684502023-10-19 22:59:26+02:001False
31896612852095170012724032023-10-19 22:59:26+02:001False
31896712852095170012724032023-10-19 22:59:26+02:001False
31896812859665173092684282023-10-21 21:47:20+02:001False
\n", + "

318969 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " id_x customer_id product_id purchase_date type_of \\\n", + "0 861763 4966 209879 2019-03-01 16:29:17+01:00 1 \n", + "1 861764 4966 209879 2019-03-01 16:29:19+01:00 1 \n", + "2 861767 405994 209880 2019-03-01 16:33:01+01:00 1 \n", + "3 861768 405994 209880 2019-03-01 16:33:03+01:00 1 \n", + "4 861769 405994 209880 2019-03-01 16:33:06+01:00 1 \n", + "... ... ... ... ... ... \n", + "318964 1285206 354233 270350 2023-10-19 22:14:55+02:00 1 \n", + "318965 1285209 517001 268450 2023-10-19 22:59:26+02:00 1 \n", + "318966 1285209 517001 272403 2023-10-19 22:59:26+02:00 1 \n", + "318967 1285209 517001 272403 2023-10-19 22:59:26+02:00 1 \n", + "318968 1285966 517309 268428 2023-10-21 21:47:20+02:00 1 \n", + "\n", + " is_from_subscription \n", + "0 False \n", + "1 False \n", + "2 False \n", + "3 False \n", + "4 False \n", + "... ... \n", + "318964 False \n", + "318965 False \n", + "318966 False \n", + "318967 False \n", + "318968 False \n", + "\n", + "[318969 rows x 6 columns]" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Jonction client et évenement\n", + "merge_1 = pd.merge(purchases, tickets, left_on='id', right_on='purchase_id', how='inner')[['id_x', 'customer_id','product_id', 'purchase_date', 'type_of', 'is_from_subscription']]\n", + "merge_1" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "a598b86c-4128-4e5c-ae38-52689f755fd5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
id_xcustomer_idrepresentation_idpurchase_datetype_ofis_from_subscriptionamountis_full_price
012498477634443322023-09-03 18:43:56+02:001False18.0False
112498477634443322023-09-03 18:43:56+02:001False18.0False
21252665426962443322023-07-06 12:13:08+02:001False18.0False
31252665426962443322023-07-06 12:13:08+02:001False18.0False
41252671426731443322023-07-06 13:10:07+02:003False18.0False
...........................
318964121279711092338102018-11-28 13:44:22+01:001False30.0False
318965121347625851338102018-12-28 16:53:36+01:001False30.0False
318966122603926314338102018-12-29 16:30:47+01:001False30.0False
31896712442763104338102018-12-31 19:54:09+01:001False30.0False
318968124428586337662019-12-31 13:02:47+01:003False21.0False
\n", + "

318969 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " id_x customer_id representation_id purchase_date \\\n", + "0 1249847 7634 44332 2023-09-03 18:43:56+02:00 \n", + "1 1249847 7634 44332 2023-09-03 18:43:56+02:00 \n", + "2 1252665 426962 44332 2023-07-06 12:13:08+02:00 \n", + "3 1252665 426962 44332 2023-07-06 12:13:08+02:00 \n", + "4 1252671 426731 44332 2023-07-06 13:10:07+02:00 \n", + "... ... ... ... ... \n", + "318964 1212797 11092 33810 2018-11-28 13:44:22+01:00 \n", + "318965 1213476 25851 33810 2018-12-28 16:53:36+01:00 \n", + "318966 1226039 26314 33810 2018-12-29 16:30:47+01:00 \n", + "318967 1244276 3104 33810 2018-12-31 19:54:09+01:00 \n", + "318968 1244285 86 33766 2019-12-31 13:02:47+01:00 \n", + "\n", + " type_of is_from_subscription amount is_full_price \n", + "0 1 False 18.0 False \n", + "1 1 False 18.0 False \n", + "2 1 False 18.0 False \n", + "3 1 False 18.0 False \n", + "4 3 False 18.0 False \n", + "... ... ... ... ... \n", + "318964 1 False 30.0 False \n", + "318965 1 False 30.0 False \n", + "318966 1 False 30.0 False \n", + "318967 1 False 30.0 False \n", + "318968 3 False 21.0 False \n", + "\n", + "[318969 rows x 8 columns]" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merge_2 = pd.merge(products, merge_1, left_on='id', right_on='product_id', how='inner')[['id_x', 'customer_id', 'representation_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price']]\n", + "merge_2" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "9d394f79-2615-448e-8ebd-074e225f1584", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idserialevent_idcreated_atupdated_atstart_date_timeopensatisfactionend_date_timename...extra_fieldidentifierid_xcustomer_idrepresentation_idpurchase_datetype_ofis_from_subscriptionamountis_full_price
044351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaN33520762e8cc28982e3841cbc2be8ce21293590627443512023-11-08 12:25:21+01:000False22.4False
144351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaN33520762e8cc28982e3841cbc2be8ce21293590627443512023-11-08 12:25:21+01:000False22.4False
244351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaN33520762e8cc28982e3841cbc2be8ce21293590627443512023-11-08 12:25:21+01:000False22.4False
344351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaN33520762e8cc28982e3841cbc2be8ce21293590627443512023-11-08 12:25:21+01:000False22.4False
444351NaN203712023-09-13 03:42:45.245879+02:002023-09-13 03:42:45.245879+02:002023-12-21 20:00:00+01:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaN33520762e8cc28982e3841cbc2be8ce21293590627443512023-11-08 12:25:21+01:000False22.4False
..................................................................
31896433639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaNfae68f1e09710ec8747957af6e22f61d118302615258336392023-03-26 16:09:31+02:001False0.0False
31896533639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaNfae68f1e09710ec8747957af6e22f61d118302615258336392023-03-26 16:09:31+02:001False0.0False
31896633639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaNfae68f1e09710ec8747957af6e22f61d118302615258336392023-03-26 16:09:31+02:001False0.0False
31896733639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaNfae68f1e09710ec8747957af6e22f61d1194433412831336392023-03-27 17:38:59+02:001False0.0False
31896833639NaN155332023-09-12 17:42:25.455708+02:002023-09-12 17:42:25.455708+02:002023-04-15 17:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaN...NaNfae68f1e09710ec8747957af6e22f61d1194433412831336392023-03-27 17:38:59+02:001False0.0False
\n", + "

318969 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " id serial event_id created_at \\\n", + "0 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", + "1 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", + "2 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", + "3 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", + "4 44351 NaN 20371 2023-09-13 03:42:45.245879+02:00 \n", + "... ... ... ... ... \n", + "318964 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", + "318965 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", + "318966 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", + "318967 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", + "318968 33639 NaN 15533 2023-09-12 17:42:25.455708+02:00 \n", + "\n", + " updated_at start_date_time open \\\n", + "0 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", + "1 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", + "2 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", + "3 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", + "4 2023-09-13 03:42:45.245879+02:00 2023-12-21 20:00:00+01:00 True \n", + "... ... ... ... \n", + "318964 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", + "318965 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", + "318966 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", + "318967 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", + "318968 2023-09-12 17:42:25.455708+02:00 2023-04-15 17:30:00+02:00 True \n", + "\n", + " satisfaction end_date_time name ... extra_field \\\n", + "0 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", + "1 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", + "2 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", + "3 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", + "4 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", + "... ... ... ... ... ... \n", + "318964 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", + "318965 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", + "318966 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", + "318967 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", + "318968 NaN 1901-01-01 00:09:21+00:09 NaN ... NaN \n", + "\n", + " identifier id_x customer_id \\\n", + "0 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n", + "1 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n", + "2 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n", + "3 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n", + "4 33520762e8cc28982e3841cbc2be8ce2 1293590 627 \n", + "... ... ... ... \n", + "318964 fae68f1e09710ec8747957af6e22f61d 1183026 15258 \n", + "318965 fae68f1e09710ec8747957af6e22f61d 1183026 15258 \n", + "318966 fae68f1e09710ec8747957af6e22f61d 1183026 15258 \n", + "318967 fae68f1e09710ec8747957af6e22f61d 1194433 412831 \n", + "318968 fae68f1e09710ec8747957af6e22f61d 1194433 412831 \n", + "\n", + " representation_id purchase_date type_of \\\n", + "0 44351 2023-11-08 12:25:21+01:00 0 \n", + "1 44351 2023-11-08 12:25:21+01:00 0 \n", + "2 44351 2023-11-08 12:25:21+01:00 0 \n", + "3 44351 2023-11-08 12:25:21+01:00 0 \n", + "4 44351 2023-11-08 12:25:21+01:00 0 \n", + "... ... ... ... \n", + "318964 33639 2023-03-26 16:09:31+02:00 1 \n", + "318965 33639 2023-03-26 16:09:31+02:00 1 \n", + "318966 33639 2023-03-26 16:09:31+02:00 1 \n", + "318967 33639 2023-03-27 17:38:59+02:00 1 \n", + "318968 33639 2023-03-27 17:38:59+02:00 1 \n", + "\n", + " is_from_subscription amount is_full_price \n", + "0 False 22.4 False \n", + "1 False 22.4 False \n", + "2 False 22.4 False \n", + "3 False 22.4 False \n", + "4 False 22.4 False \n", + "... ... ... ... \n", + "318964 False 0.0 False \n", + "318965 False 0.0 False \n", + "318966 False 0.0 False \n", + "318967 False 0.0 False \n", + "318968 False 0.0 False \n", + "\n", + "[318969 rows x 24 columns]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "63bcbfad-fa20-425a-881f-ca9aa212c419", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
id_xcustomer_idevent_idpurchase_datetype_ofis_from_subscriptionamountis_full_pricestart_date_time
01293590627203712023-11-08 12:25:21+01:000False22.4False2023-12-21 20:00:00+01:00
11293590627203712023-11-08 12:25:21+01:000False22.4False2023-12-21 20:00:00+01:00
21293590627203712023-11-08 12:25:21+01:000False22.4False2023-12-21 20:00:00+01:00
31293590627203712023-11-08 12:25:21+01:000False22.4False2023-12-21 20:00:00+01:00
41293590627203712023-11-08 12:25:21+01:000False22.4False2023-12-21 20:00:00+01:00
..............................
318964118302615258155332023-03-26 16:09:31+02:001False0.0False2023-04-15 17:30:00+02:00
318965118302615258155332023-03-26 16:09:31+02:001False0.0False2023-04-15 17:30:00+02:00
318966118302615258155332023-03-26 16:09:31+02:001False0.0False2023-04-15 17:30:00+02:00
3189671194433412831155332023-03-27 17:38:59+02:001False0.0False2023-04-15 17:30:00+02:00
3189681194433412831155332023-03-27 17:38:59+02:001False0.0False2023-04-15 17:30:00+02:00
\n", + "

318969 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " id_x customer_id event_id purchase_date type_of \\\n", + "0 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n", + "1 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n", + "2 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n", + "3 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n", + "4 1293590 627 20371 2023-11-08 12:25:21+01:00 0 \n", + "... ... ... ... ... ... \n", + "318964 1183026 15258 15533 2023-03-26 16:09:31+02:00 1 \n", + "318965 1183026 15258 15533 2023-03-26 16:09:31+02:00 1 \n", + "318966 1183026 15258 15533 2023-03-26 16:09:31+02:00 1 \n", + "318967 1194433 412831 15533 2023-03-27 17:38:59+02:00 1 \n", + "318968 1194433 412831 15533 2023-03-27 17:38:59+02:00 1 \n", + "\n", + " is_from_subscription amount is_full_price start_date_time \n", + "0 False 22.4 False 2023-12-21 20:00:00+01:00 \n", + "1 False 22.4 False 2023-12-21 20:00:00+01:00 \n", + "2 False 22.4 False 2023-12-21 20:00:00+01:00 \n", + "3 False 22.4 False 2023-12-21 20:00:00+01:00 \n", + "4 False 22.4 False 2023-12-21 20:00:00+01:00 \n", + "... ... ... ... ... \n", + "318964 False 0.0 False 2023-04-15 17:30:00+02:00 \n", + "318965 False 0.0 False 2023-04-15 17:30:00+02:00 \n", + "318966 False 0.0 False 2023-04-15 17:30:00+02:00 \n", + "318967 False 0.0 False 2023-04-15 17:30:00+02:00 \n", + "318968 False 0.0 False 2023-04-15 17:30:00+02:00 \n", + "\n", + "[318969 rows x 9 columns]" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merge_3 = pd.merge(representations, merge_2, left_on='id', right_on='representation_id', how='inner')[['id_x', 'customer_id', 'event_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time']]\n", + "merge_3" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "db52559b-6562-439b-b16e-f5d8dc9bc891", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreated_atupdated_atseason_idfacility_idnameevent_type_idmanual_addedis_displayevent_type_key_id...identifierid_xcustomer_idevent_idpurchase_datetype_ofis_from_subscriptionamountis_full_pricestart_date_time
0203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055...26d1e9a4acad18b9cf79244334c86c931253614432123203672023-09-07 18:02:58+02:003False2.0False2023-11-29 14:30:00+01:00
1203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055...26d1e9a4acad18b9cf79244334c86c931253614432123203672023-09-07 18:02:58+02:003False2.0False2023-11-29 14:30:00+01:00
2203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055...26d1e9a4acad18b9cf79244334c86c931252930431824203672023-09-06 16:06:40+02:001False5.0False2023-11-29 14:30:00+01:00
3203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055...26d1e9a4acad18b9cf79244334c86c931252931431824203672023-09-06 16:06:42+02:001False5.0False2023-11-29 14:30:00+01:00
4203672023-09-13 03:42:45.214293+02:002023-09-13 03:54:30.086969+02:0018651054marelle1055FalseTrue1055...26d1e9a4acad18b9cf79244334c86c931252932431824203672023-09-06 16:06:44+02:001False5.0False2023-11-29 14:30:00+01:00
..................................................................
318964154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue1055...4f015946bcbd856aa573cadb7ac42b9f1206691358863154392023-03-28 17:53:40+02:003False4.0False2023-03-29 20:00:00+02:00
318965154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue1055...4f015946bcbd856aa573cadb7ac42b9f1218071413015154392023-03-29 17:01:01+02:001False4.0False2023-03-29 20:00:00+02:00
318966154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue1055...4f015946bcbd856aa573cadb7ac42b9f1218125344045154392023-03-29 18:20:05+02:001False4.0False2023-03-29 20:00:00+02:00
318967154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue1055...4f015946bcbd856aa573cadb7ac42b9f1218185381006154392023-03-29 19:50:18+02:001False4.0False2023-03-29 20:00:00+02:00
318968154392023-09-12 17:42:25.252747+02:002023-09-12 19:00:00.735990+02:0017081054florilege1055FalseTrue1055...4f015946bcbd856aa573cadb7ac42b9f12390744512154392023-01-31 16:14:27+01:001False4.0False2023-03-29 20:00:00+02:00
\n", + "

318969 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " id created_at \\\n", + "0 20367 2023-09-13 03:42:45.214293+02:00 \n", + "1 20367 2023-09-13 03:42:45.214293+02:00 \n", + "2 20367 2023-09-13 03:42:45.214293+02:00 \n", + "3 20367 2023-09-13 03:42:45.214293+02:00 \n", + "4 20367 2023-09-13 03:42:45.214293+02:00 \n", + "... ... ... \n", + "318964 15439 2023-09-12 17:42:25.252747+02:00 \n", + "318965 15439 2023-09-12 17:42:25.252747+02:00 \n", + "318966 15439 2023-09-12 17:42:25.252747+02:00 \n", + "318967 15439 2023-09-12 17:42:25.252747+02:00 \n", + "318968 15439 2023-09-12 17:42:25.252747+02:00 \n", + "\n", + " updated_at season_id facility_id name \\\n", + "0 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n", + "1 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n", + "2 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n", + "3 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n", + "4 2023-09-13 03:54:30.086969+02:00 1865 1054 marelle \n", + "... ... ... ... ... \n", + "318964 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n", + "318965 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n", + "318966 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n", + "318967 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n", + "318968 2023-09-12 19:00:00.735990+02:00 1708 1054 florilege \n", + "\n", + " event_type_id manual_added is_display event_type_key_id ... \\\n", + "0 1055 False True 1055 ... \n", + "1 1055 False True 1055 ... \n", + "2 1055 False True 1055 ... \n", + "3 1055 False True 1055 ... \n", + "4 1055 False True 1055 ... \n", + "... ... ... ... ... ... \n", + "318964 1055 False True 1055 ... \n", + "318965 1055 False True 1055 ... \n", + "318966 1055 False True 1055 ... \n", + "318967 1055 False True 1055 ... \n", + "318968 1055 False True 1055 ... \n", + "\n", + " identifier id_x customer_id event_id \\\n", + "0 26d1e9a4acad18b9cf79244334c86c93 1253614 432123 20367 \n", + "1 26d1e9a4acad18b9cf79244334c86c93 1253614 432123 20367 \n", + "2 26d1e9a4acad18b9cf79244334c86c93 1252930 431824 20367 \n", + "3 26d1e9a4acad18b9cf79244334c86c93 1252931 431824 20367 \n", + "4 26d1e9a4acad18b9cf79244334c86c93 1252932 431824 20367 \n", + "... ... ... ... ... \n", + "318964 4f015946bcbd856aa573cadb7ac42b9f 1206691 358863 15439 \n", + "318965 4f015946bcbd856aa573cadb7ac42b9f 1218071 413015 15439 \n", + "318966 4f015946bcbd856aa573cadb7ac42b9f 1218125 344045 15439 \n", + "318967 4f015946bcbd856aa573cadb7ac42b9f 1218185 381006 15439 \n", + "318968 4f015946bcbd856aa573cadb7ac42b9f 1239074 4512 15439 \n", + "\n", + " purchase_date type_of is_from_subscription amount \\\n", + "0 2023-09-07 18:02:58+02:00 3 False 2.0 \n", + "1 2023-09-07 18:02:58+02:00 3 False 2.0 \n", + "2 2023-09-06 16:06:40+02:00 1 False 5.0 \n", + "3 2023-09-06 16:06:42+02:00 1 False 5.0 \n", + "4 2023-09-06 16:06:44+02:00 1 False 5.0 \n", + "... ... ... ... ... \n", + "318964 2023-03-28 17:53:40+02:00 3 False 4.0 \n", + "318965 2023-03-29 17:01:01+02:00 1 False 4.0 \n", + "318966 2023-03-29 18:20:05+02:00 1 False 4.0 \n", + "318967 2023-03-29 19:50:18+02:00 1 False 4.0 \n", + "318968 2023-01-31 16:14:27+01:00 1 False 4.0 \n", + "\n", + " is_full_price start_date_time \n", + "0 False 2023-11-29 14:30:00+01:00 \n", + "1 False 2023-11-29 14:30:00+01:00 \n", + "2 False 2023-11-29 14:30:00+01:00 \n", + "3 False 2023-11-29 14:30:00+01:00 \n", + "4 False 2023-11-29 14:30:00+01:00 \n", + "... ... ... \n", + "318964 False 2023-03-29 20:00:00+02:00 \n", + "318965 False 2023-03-29 20:00:00+02:00 \n", + "318966 False 2023-03-29 20:00:00+02:00 \n", + "318967 False 2023-03-29 20:00:00+02:00 \n", + "318968 False 2023-03-29 20:00:00+02:00 \n", + "\n", + "[318969 rows x 21 columns]" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "d8ab2477-c199-4815-88d9-c5683e466772", + "metadata": {}, + "outputs": [], + "source": [ + "merge_4 = pd.merge(events, merge_3, left_on='id', right_on='event_id', how='inner')[['id_x', 'customer_id', 'purchase_date', 'type_of', 'is_from_subscription', 'amount', 'is_full_price', 'start_date_time', 'name']]\n", + "merge_4 = merge_4.rename(columns={'name': 'event_name'})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0917b77-6a73-4ae3-a58a-0bb7964f1406", + "metadata": {}, + "outputs": [], + "source": [ + "merge_5 = pd.merge(customersplus, merge_4, left_on = 'id', right_on = 'customer_id', how = " ] } ], diff --git a/Notebook_AJ.ipynb b/Notebook_AJ.ipynb index f61e733..c59dff1 100644 --- a/Notebook_AJ.ipynb +++ b/Notebook_AJ.ipynb @@ -358,7 +358,9 @@ "cell_type": "code", "execution_count": 49, "id": "0eb345e4-69f5-4e16-ac57-e33674c6c43d", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout",