diff --git a/Notebook_AR.ipynb b/Notebook_AR.ipynb index 29b3a21..b1d753b 100644 --- a/Notebook_AR.ipynb +++ b/Notebook_AR.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 196, + "execution_count": null, "id": "20eeb149-6618-4ef2-9cfd-ff062950f36c", "metadata": {}, "outputs": [], @@ -22,35 +22,10 @@ }, { "cell_type": "code", - "execution_count": 197, + "execution_count": null, "id": "30494c5e-9649-4fff-8708-617544188b20", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['bdc2324-data/1',\n", - " 'bdc2324-data/10',\n", - " 'bdc2324-data/101',\n", - " 'bdc2324-data/11',\n", - " 'bdc2324-data/12',\n", - " 'bdc2324-data/13',\n", - " 'bdc2324-data/14',\n", - " 'bdc2324-data/2',\n", - " 'bdc2324-data/3',\n", - " 'bdc2324-data/4',\n", - " 'bdc2324-data/5',\n", - " 'bdc2324-data/6',\n", - " 'bdc2324-data/7',\n", - " 'bdc2324-data/8',\n", - " 'bdc2324-data/9']" - ] - }, - "execution_count": 197, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", @@ -78,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 198, + "execution_count": null, "id": "f1cce705-46e1-42de-8e93-2ee15312d288", "metadata": {}, "outputs": [], @@ -88,43 +63,10 @@ }, { "cell_type": "code", - "execution_count": 199, + "execution_count": null, "id": "82d4db0e-0cd5-49af-a4d3-f17f54b1c03c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bdc2324-data/8/8campaign_stats.csv\n", - "bdc2324-data/8/8campaigns.csv\n", - "bdc2324-data/8/8categories.csv\n", - "bdc2324-data/8/8countries.csv\n", - "bdc2324-data/8/8currencies.csv\n", - "bdc2324-data/8/8customer_target_mappings.csv\n", - "bdc2324-data/8/8customersplus.csv\n", - "bdc2324-data/8/8event_types.csv\n", - "bdc2324-data/8/8events.csv\n", - "bdc2324-data/8/8facilities.csv\n", - "bdc2324-data/8/8link_stats.csv\n", - "bdc2324-data/8/8pricing_formulas.csv\n", - "bdc2324-data/8/8product_packs.csv\n", - "bdc2324-data/8/8products.csv\n", - "bdc2324-data/8/8products_groups.csv\n", - "bdc2324-data/8/8purchases.csv\n", - "bdc2324-data/8/8representation_category_capacities.csv\n", - "bdc2324-data/8/8representations.csv\n", - "bdc2324-data/8/8seasons.csv\n", - "bdc2324-data/8/8suppliers.csv\n", - "bdc2324-data/8/8target_types.csv\n", - "bdc2324-data/8/8targets.csv\n", - "bdc2324-data/8/8tickets.csv\n", - "bdc2324-data/8/8type_of_categories.csv\n", - "bdc2324-data/8/8type_of_pricing_formulas.csv\n", - "bdc2324-data/8/8type_ofs.csv\n" - ] - } - ], + "outputs": [], "source": [ "# check the files in the directory\n", "\n", @@ -136,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 200, + "execution_count": null, "id": "65cb38ad-52ae-4266-85d8-c47d81b00283", "metadata": {}, "outputs": [], @@ -165,162 +107,10 @@ }, { "cell_type": "code", - "execution_count": 201, + "execution_count": null, "id": "0214d30d-5f83-498f-867f-e67b5793b731", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8campaigns.csv\n", - "Shape : (1689, 11)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
01#LOUSFP RELANCE P'TITS LOU14362022-02-01 15:22:53.564432+01:002022-02-01 15:22:53.564432+01:00NaNNaN0Falseeaa32c96f620053cf442ad32258076b92022-01-31 00:00:00+01:00
12#LOUSFP BRASSERIE ACHETEURS14352022-02-01 15:22:53.572592+01:002022-02-01 15:22:53.572592+01:00NaNNaN0False1f3202d820180a39f736f20fce790de82022-01-31 00:00:00+01:00
23PRESSE. LOU/SF Paris - RDV et protocole14332022-02-01 15:22:53.578426+01:002022-02-01 15:22:53.578426+01:00NaNNaN0Falseb069b3415151fa7217e870017374de7c2022-01-31 00:00:00+01:00
34#LOUSFP ÉTUDIANTS14322022-02-01 15:22:53.584235+01:002022-02-01 15:22:53.584235+01:00NaNNaN0False56468d5607a5aaf1604ff5e15593b0032022-01-27 00:00:00+01:00
45#LOUSFP P'TITS LOU14312022-02-01 15:22:53.590187+01:002022-02-01 15:22:53.590187+01:00NaNNaN0Falsee11943a6031a0e6114ae69c2576179802022-01-27 00:00:00+01:00
\n", - "
" - ], - "text/plain": [ - " id name service_id \\\n", - "0 1 #LOUSFP RELANCE P'TITS LOU 1436 \n", - "1 2 #LOUSFP BRASSERIE ACHETEURS 1435 \n", - "2 3 PRESSE. LOU/SF Paris - RDV et protocole 1433 \n", - "3 4 #LOUSFP ÉTUDIANTS 1432 \n", - "4 5 #LOUSFP P'TITS LOU 1431 \n", - "\n", - " created_at updated_at \\\n", - "0 2022-02-01 15:22:53.564432+01:00 2022-02-01 15:22:53.564432+01:00 \n", - "1 2022-02-01 15:22:53.572592+01:00 2022-02-01 15:22:53.572592+01:00 \n", - "2 2022-02-01 15:22:53.578426+01:00 2022-02-01 15:22:53.578426+01:00 \n", - "3 2022-02-01 15:22:53.584235+01:00 2022-02-01 15:22:53.584235+01:00 \n", - "4 2022-02-01 15:22:53.590187+01:00 2022-02-01 15:22:53.590187+01:00 \n", - "\n", - " process_id report_url category to_be_synced \\\n", - "0 NaN NaN 0 False \n", - "1 NaN NaN 0 False \n", - "2 NaN NaN 0 False \n", - "3 NaN NaN 0 False \n", - "4 NaN NaN 0 False \n", - "\n", - " identifier sent_at \n", - "0 eaa32c96f620053cf442ad32258076b9 2022-01-31 00:00:00+01:00 \n", - "1 1f3202d820180a39f736f20fce790de8 2022-01-31 00:00:00+01:00 \n", - "2 b069b3415151fa7217e870017374de7c 2022-01-31 00:00:00+01:00 \n", - "3 56468d5607a5aaf1604ff5e15593b003 2022-01-27 00:00:00+01:00 \n", - "4 e11943a6031a0e6114ae69c257617980 2022-01-27 00:00:00+01:00 " - ] - }, - "execution_count": 201, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "campaigns = display_databases(\"8campaigns.csv\")\n", "campaigns.head()" @@ -328,137 +118,10 @@ }, { "cell_type": "code", - "execution_count": 202, + "execution_count": null, "id": "e7982be4-2c42-4a91-be5a-329a999644cc", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8campaign_stats.csv\n", - "Shape : (2527083, 8)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_at
0151614102022-02-02 18:16:07+01:00NaNNaN2022-02-02 17:16:08.616899+01:002022-02-02 17:16:08.623098+01:00
121542282022-02-02 18:18:11+01:00NaNNaN2022-02-02 17:18:12.030260+01:002022-02-02 17:18:12.036606+01:00
2361207942022-02-02 18:18:58+01:00NaNNaN2022-02-02 17:19:00.129697+01:002022-02-02 17:19:00.134704+01:00
3434670252022-02-02 18:19:33+01:00NaNNaN2022-02-02 17:19:34.023492+01:002022-02-02 17:19:34.027570+01:00
4521421062022-02-02 18:19:35+01:00NaNNaN2022-02-02 17:19:36.553321+01:002022-02-02 17:19:36.557473+01:00
\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at sent_at \\\n", - "0 1 5 161410 2022-02-02 18:16:07+01:00 NaN \n", - "1 2 1 54228 2022-02-02 18:18:11+01:00 NaN \n", - "2 3 6 120794 2022-02-02 18:18:58+01:00 NaN \n", - "3 4 3 467025 2022-02-02 18:19:33+01:00 NaN \n", - "4 5 2 142106 2022-02-02 18:19:35+01:00 NaN \n", - "\n", - " delivered_at created_at \\\n", - "0 NaN 2022-02-02 17:16:08.616899+01:00 \n", - "1 NaN 2022-02-02 17:18:12.030260+01:00 \n", - "2 NaN 2022-02-02 17:19:00.129697+01:00 \n", - "3 NaN 2022-02-02 17:19:34.023492+01:00 \n", - "4 NaN 2022-02-02 17:19:36.553321+01:00 \n", - "\n", - " updated_at \n", - "0 2022-02-02 17:16:08.623098+01:00 \n", - "1 2022-02-02 17:18:12.036606+01:00 \n", - "2 2022-02-02 17:19:00.134704+01:00 \n", - "3 2022-02-02 17:19:34.027570+01:00 \n", - "4 2022-02-02 17:19:36.557473+01:00 " - ] - }, - "execution_count": 202, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "campaign_stats = display_databases(\"8campaign_stats.csv\")\n", "campaign_stats.head()" @@ -482,118 +145,10 @@ }, { "cell_type": "code", - "execution_count": 203, + "execution_count": null, "id": "e973575b-4ed6-4b23-8024-f383ac82e87c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8link_stats.csv\n", - "Shape : (108461, 6)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idclicked_atlink_idcustomer_idcreated_atupdated_at
012022-02-02 18:33:17+01:001621372022-02-02 17:33:19.237759+01:002022-02-02 17:33:19.237759+01:00
122022-02-02 18:33:26+01:0015560482022-02-02 17:33:28.101943+01:002022-02-02 17:33:28.101943+01:00
232022-02-02 18:33:49+01:0021944562022-02-02 17:33:50.595125+01:002022-02-02 17:33:50.595125+01:00
342022-02-02 18:34:19+01:0011944562022-02-02 17:34:20.493986+01:002022-02-02 17:34:20.493986+01:00
452022-02-02 18:34:21+01:002215712022-02-02 17:34:22.300427+01:002022-02-02 17:34:22.300427+01:00
\n", - "
" - ], - "text/plain": [ - " id clicked_at link_id customer_id \\\n", - "0 1 2022-02-02 18:33:17+01:00 1 62137 \n", - "1 2 2022-02-02 18:33:26+01:00 1 556048 \n", - "2 3 2022-02-02 18:33:49+01:00 2 194456 \n", - "3 4 2022-02-02 18:34:19+01:00 1 194456 \n", - "4 5 2022-02-02 18:34:21+01:00 2 21571 \n", - "\n", - " created_at updated_at \n", - "0 2022-02-02 17:33:19.237759+01:00 2022-02-02 17:33:19.237759+01:00 \n", - "1 2022-02-02 17:33:28.101943+01:00 2022-02-02 17:33:28.101943+01:00 \n", - "2 2022-02-02 17:33:50.595125+01:00 2022-02-02 17:33:50.595125+01:00 \n", - "3 2022-02-02 17:34:20.493986+01:00 2022-02-02 17:34:20.493986+01:00 \n", - "4 2022-02-02 17:34:22.300427+01:00 2022-02-02 17:34:22.300427+01:00 " - ] - }, - "execution_count": 203, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "links_stats = display_databases(\"8link_stats.csv\")\n", "links_stats.head()" @@ -609,239 +164,10 @@ }, { "cell_type": "code", - "execution_count": 204, + "execution_count": null, "id": "3b523575-c779-451c-a12e-a36fb4ad232c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bdc2324-data/8/8customersplus.csv\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_456/2210053343.py:5: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " customersplus = pd.read_csv(file_in, sep=\",\")\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idlastnamefirstnamebirthdateemailstreet_idcreated_atupdated_atcivilityis_partner...preferred_categorypreferred_supplierpreferred_formulapurchase_countfirst_buying_datelast_visiting_datezipcodecountryagetenant_id
01411166NaNNaNNaNemail141116612022-12-19 15:03:39.419371+01:002022-12-19 15:03:39.419371+01:00NaNFalse...NaNNaNNaN0NaNNaNNaNfrNaN1594
1478498lastname478498firstname478498NaNemail4784983391672021-09-17 18:58:30.259053+02:002023-06-28 15:25:24.146689+02:00NaNFalse...NaNNaNNaN0NaNNaNNaNNaNNaN1594
2473678NaNNaNNaNemail4736783391672021-09-17 18:44:04.119713+02:002021-09-17 18:44:04.124204+02:00NaNFalse...NaNNaNNaN0NaNNaNNaNNaNNaN1594
3475026NaNNaNNaNemail4750263391672021-09-17 18:47:28.789618+02:002021-09-17 18:47:28.793958+02:00NaNFalse...NaNNaNNaN0NaNNaNNaNNaNNaN1594
4487146NaNNaNNaNemail4871463391672021-09-17 19:10:24.070460+02:002021-09-17 19:10:24.076033+02:00NaNFalse...NaNNaNNaN0NaNNaNNaNNaNNaN1594
\n", - "

5 rows × 43 columns

\n", - "
" - ], - "text/plain": [ - " id lastname firstname birthdate email \\\n", - "0 1411166 NaN NaN NaN email1411166 \n", - "1 478498 lastname478498 firstname478498 NaN email478498 \n", - "2 473678 NaN NaN NaN email473678 \n", - "3 475026 NaN NaN NaN email475026 \n", - "4 487146 NaN NaN NaN email487146 \n", - "\n", - " street_id created_at \\\n", - "0 1 2022-12-19 15:03:39.419371+01:00 \n", - "1 339167 2021-09-17 18:58:30.259053+02:00 \n", - "2 339167 2021-09-17 18:44:04.119713+02:00 \n", - "3 339167 2021-09-17 18:47:28.789618+02:00 \n", - "4 339167 2021-09-17 19:10:24.070460+02:00 \n", - "\n", - " updated_at civility is_partner ... \\\n", - "0 2022-12-19 15:03:39.419371+01:00 NaN False ... \n", - "1 2023-06-28 15:25:24.146689+02:00 NaN False ... \n", - "2 2021-09-17 18:44:04.124204+02:00 NaN False ... \n", - "3 2021-09-17 18:47:28.793958+02:00 NaN False ... \n", - "4 2021-09-17 19:10:24.076033+02:00 NaN False ... \n", - "\n", - " preferred_category preferred_supplier preferred_formula purchase_count \\\n", - "0 NaN NaN NaN 0 \n", - "1 NaN NaN NaN 0 \n", - "2 NaN NaN NaN 0 \n", - "3 NaN NaN NaN 0 \n", - "4 NaN NaN NaN 0 \n", - "\n", - " first_buying_date last_visiting_date zipcode country age tenant_id \n", - "0 NaN NaN NaN fr NaN 1594 \n", - "1 NaN NaN NaN NaN NaN 1594 \n", - "2 NaN NaN NaN NaN NaN 1594 \n", - "3 NaN NaN NaN NaN NaN 1594 \n", - "4 NaN NaN NaN NaN NaN 1594 \n", - "\n", - "[5 rows x 43 columns]" - ] - }, - "execution_count": 204, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "file_name = \"8customersplus.csv\"\n", "file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n", @@ -862,19 +188,10 @@ }, { "cell_type": "code", - "execution_count": 205, + "execution_count": null, "id": "87d801fc-d19a-4c45-9b21-9b6d7a8451fd", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bdc2324-data/8/8structures.csv\n", - "No structures database\n" - ] - } - ], + "outputs": [], "source": [ "file_name = \"8structures.csv\"\n", "file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n", @@ -904,124 +221,10 @@ }, { "cell_type": "code", - "execution_count": 206, + "execution_count": null, "id": "b6e4c3ea-5ccf-4aec-bd2d-79a5a1194178", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bdc2324-data/8/8customer_target_mappings.csv\n", - "Shape : (1449147, 7)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_atupdated_atnameextra_field
01460062682021-09-17 20:20:24.562734+02:002021-09-17 20:20:24.562734+02:00NaNNaN
12460056682021-09-17 20:20:24.610139+02:002021-09-17 20:20:24.610139+02:00NaNNaN
23460051652021-09-17 20:20:24.641381+02:002021-09-17 20:20:24.641381+02:00NaNNaN
34460051662021-09-17 20:20:24.672238+02:002021-09-17 20:20:24.672238+02:00NaNNaN
45460049712021-09-17 20:20:24.703110+02:002021-09-17 20:20:24.703110+02:00NaNNaN
\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at \\\n", - "0 1 460062 68 2021-09-17 20:20:24.562734+02:00 \n", - "1 2 460056 68 2021-09-17 20:20:24.610139+02:00 \n", - "2 3 460051 65 2021-09-17 20:20:24.641381+02:00 \n", - "3 4 460051 66 2021-09-17 20:20:24.672238+02:00 \n", - "4 5 460049 71 2021-09-17 20:20:24.703110+02:00 \n", - "\n", - " updated_at name extra_field \n", - "0 2021-09-17 20:20:24.562734+02:00 NaN NaN \n", - "1 2021-09-17 20:20:24.610139+02:00 NaN NaN \n", - "2 2021-09-17 20:20:24.641381+02:00 NaN NaN \n", - "3 2021-09-17 20:20:24.672238+02:00 NaN NaN \n", - "4 2021-09-17 20:20:24.703110+02:00 NaN NaN " - ] - }, - "execution_count": 206, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "file_name = \"8customer_target_mappings.csv\"\n", "file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n", @@ -1039,112 +242,10 @@ }, { "cell_type": "code", - "execution_count": 207, + "execution_count": null, "id": "6e81a35c-3c6f-403d-9ebd-e8399ecd4263", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bdc2324-data/8/8targets.csv\n", - "Shape : (331, 5)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtarget_type_idnamecreated_atupdated_at
011ÉTUDIANTS (OPÉ PANIERS) 21-222021-09-17 18:10:40.879995+02:002021-09-17 18:10:40.879995+02:00
121EFFECTIF + STAFF 21-222021-09-17 18:10:40.894758+02:002021-09-17 18:10:40.894758+02:00
231Acheteurs LOU / USAP2021-09-17 18:10:40.911969+02:002021-09-17 18:10:40.911969+02:00
341Liste Compensation 21-222021-09-17 18:10:40.928796+02:002021-09-17 18:10:40.928796+02:00
451Partenaires 21-222021-09-17 18:10:40.945476+02:002021-09-17 18:10:40.945476+02:00
\n", - "
" - ], - "text/plain": [ - " id target_type_id name \\\n", - "0 1 1 ÉTUDIANTS (OPÉ PANIERS) 21-22 \n", - "1 2 1 EFFECTIF + STAFF 21-22 \n", - "2 3 1 Acheteurs LOU / USAP \n", - "3 4 1 Liste Compensation 21-22 \n", - "4 5 1 Partenaires 21-22 \n", - "\n", - " created_at updated_at \n", - "0 2021-09-17 18:10:40.879995+02:00 2021-09-17 18:10:40.879995+02:00 \n", - "1 2021-09-17 18:10:40.894758+02:00 2021-09-17 18:10:40.894758+02:00 \n", - "2 2021-09-17 18:10:40.911969+02:00 2021-09-17 18:10:40.911969+02:00 \n", - "3 2021-09-17 18:10:40.928796+02:00 2021-09-17 18:10:40.928796+02:00 \n", - "4 2021-09-17 18:10:40.945476+02:00 2021-09-17 18:10:40.945476+02:00 " - ] - }, - "execution_count": 207, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "file_name = \"8targets.csv\"\n", "file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n", @@ -1162,107 +263,10 @@ }, { "cell_type": "code", - "execution_count": 208, + "execution_count": null, "id": "85696d74-3b2f-4368-9045-44db5322b60d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bdc2324-data/8/8target_types.csv\n", - "Shape : (4, 6)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idis_importnamecreated_atupdated_atidentifier
01NaNmanual_static_filter2021-09-17 18:10:40.864320+02:002021-09-17 18:10:40.864320+02:00e34e3aa838a6eb4c41df6ed4444b796a
12Falsemanual_dynamic_filter2022-03-09 14:41:45.695407+01:002022-03-09 14:41:45.695407+01:00e0f4b8693184850fefd6d2a38f10584e
23Falsemanual_static_filter2022-04-01 17:02:49.588910+02:002022-04-01 17:02:49.588910+02:00fb27e81baa4debc6a4e1a8639c20e808
34Truemanual_import2022-05-06 14:26:01.923160+02:002022-05-06 14:26:01.923160+02:0012213df2ce68a624e4c0070521437bac
\n", - "
" - ], - "text/plain": [ - " id is_import name created_at \\\n", - "0 1 NaN manual_static_filter 2021-09-17 18:10:40.864320+02:00 \n", - "1 2 False manual_dynamic_filter 2022-03-09 14:41:45.695407+01:00 \n", - "2 3 False manual_static_filter 2022-04-01 17:02:49.588910+02:00 \n", - "3 4 True manual_import 2022-05-06 14:26:01.923160+02:00 \n", - "\n", - " updated_at identifier \n", - "0 2021-09-17 18:10:40.864320+02:00 e34e3aa838a6eb4c41df6ed4444b796a \n", - "1 2022-03-09 14:41:45.695407+01:00 e0f4b8693184850fefd6d2a38f10584e \n", - "2 2022-04-01 17:02:49.588910+02:00 fb27e81baa4debc6a4e1a8639c20e808 \n", - "3 2022-05-06 14:26:01.923160+02:00 12213df2ce68a624e4c0070521437bac " - ] - }, - "execution_count": 208, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "file_name = \"8target_types.csv\"\n", "file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n", @@ -1298,131 +302,10 @@ }, { "cell_type": "code", - "execution_count": 209, + "execution_count": null, "id": "7c57529b-2ffb-4039-9795-b27c6fbd54a4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8purchases.csv\n", - "Shape : (975703, 7)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idpurchase_datecustomer_idcreated_atupdated_atnumberidentifier
01196092017-09-09 15:39:45.913000+02:0011492021-06-29 21:52:21.816195+02:002021-06-29 21:52:21.816195+02:00193416f2956e2d53321317e7c15c1cb992156c
11196102017-09-09 15:39:46.033000+02:0011492021-06-29 21:52:21.817846+02:002021-06-29 21:52:21.817846+02:00193416faabab441b2668a85bb484490b2166c3
254642017-07-24 19:44:11.923000+02:0012512021-06-29 21:33:45.604224+02:002021-06-29 21:33:45.604224+02:00184354f63c69fa585ce4f91681f0d9ebeb770f
31196132017-09-10 11:25:45.820000+02:00125582021-06-29 21:52:21.822033+02:002021-06-29 21:52:21.822033+02:00193462ffce5fd8d2348eb6885d0ee9c7bd017c
414228602018-10-08 10:30:42.980000+02:00179352021-07-16 04:20:55.347369+02:002021-07-16 04:20:55.347369+02:00247459193e41eae8ee078537107a569c0426ef
\n", - "
" - ], - "text/plain": [ - " id purchase_date customer_id \\\n", - "0 119609 2017-09-09 15:39:45.913000+02:00 1149 \n", - "1 119610 2017-09-09 15:39:46.033000+02:00 1149 \n", - "2 5464 2017-07-24 19:44:11.923000+02:00 1251 \n", - "3 119613 2017-09-10 11:25:45.820000+02:00 12558 \n", - "4 1422860 2018-10-08 10:30:42.980000+02:00 17935 \n", - "\n", - " created_at updated_at number \\\n", - "0 2021-06-29 21:52:21.816195+02:00 2021-06-29 21:52:21.816195+02:00 193416 \n", - "1 2021-06-29 21:52:21.817846+02:00 2021-06-29 21:52:21.817846+02:00 193416 \n", - "2 2021-06-29 21:33:45.604224+02:00 2021-06-29 21:33:45.604224+02:00 184354 \n", - "3 2021-06-29 21:52:21.822033+02:00 2021-06-29 21:52:21.822033+02:00 193462 \n", - "4 2021-07-16 04:20:55.347369+02:00 2021-07-16 04:20:55.347369+02:00 247459 \n", - "\n", - " identifier \n", - "0 f2956e2d53321317e7c15c1cb992156c \n", - "1 faabab441b2668a85bb484490b2166c3 \n", - "2 f63c69fa585ce4f91681f0d9ebeb770f \n", - "3 ffce5fd8d2348eb6885d0ee9c7bd017c \n", - "4 193e41eae8ee078537107a569c0426ef " - ] - }, - "execution_count": 209, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "purchases = display_databases(\"8purchases.csv\")\n", "purchases.head()" @@ -1430,162 +313,10 @@ }, { "cell_type": "code", - "execution_count": 210, + "execution_count": null, "id": "903321fb-99f8-475d-b4a6-c70ec2efe190", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8tickets.csv\n", - "Shape : (2370152, 11)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnumbercreated_atupdated_atpurchase_idproduct_idis_from_subscriptiontype_ofsupplier_idbarcodeidentifier
0254164193416_763837_650_688_3262122021-06-29 21:53:14.951871+02:002021-06-29 21:53:14.951871+02:001196093334False12NaN9ec3b5617fc54512acf131aa5fa26870
1254165193416_763838_650_688_3262362021-06-29 21:53:14.953717+02:002021-06-29 21:53:14.953717+02:001196103334False12NaNb227c664e2574a919672683f5cc4c98e
2254168193462_763921_649_687_3056762021-06-29 21:53:14.958207+02:002021-06-29 21:53:14.958207+02:001196133432False12NaN28ac507ad84a30993bdfc0996fd2476b
3254169193462_763922_649_687_3056532021-06-29 21:53:14.959681+02:002021-06-29 21:53:14.959681+02:001196143268False12NaN131dbaeef23f5ac2271bf0266ce35476
4254170193462_763923_649_687_3056302021-06-29 21:53:14.961157+02:002021-06-29 21:53:14.961157+02:001196153268False12NaN1a6342ad2c213b626aa55e5374cd661a
\n", - "
" - ], - "text/plain": [ - " id number created_at \\\n", - "0 254164 193416_763837_650_688_326212 2021-06-29 21:53:14.951871+02:00 \n", - "1 254165 193416_763838_650_688_326236 2021-06-29 21:53:14.953717+02:00 \n", - "2 254168 193462_763921_649_687_305676 2021-06-29 21:53:14.958207+02:00 \n", - "3 254169 193462_763922_649_687_305653 2021-06-29 21:53:14.959681+02:00 \n", - "4 254170 193462_763923_649_687_305630 2021-06-29 21:53:14.961157+02:00 \n", - "\n", - " updated_at purchase_id product_id \\\n", - "0 2021-06-29 21:53:14.951871+02:00 119609 3334 \n", - "1 2021-06-29 21:53:14.953717+02:00 119610 3334 \n", - "2 2021-06-29 21:53:14.958207+02:00 119613 3432 \n", - "3 2021-06-29 21:53:14.959681+02:00 119614 3268 \n", - "4 2021-06-29 21:53:14.961157+02:00 119615 3268 \n", - "\n", - " is_from_subscription type_of supplier_id barcode \\\n", - "0 False 1 2 NaN \n", - "1 False 1 2 NaN \n", - "2 False 1 2 NaN \n", - "3 False 1 2 NaN \n", - "4 False 1 2 NaN \n", - "\n", - " identifier \n", - "0 9ec3b5617fc54512acf131aa5fa26870 \n", - "1 b227c664e2574a919672683f5cc4c98e \n", - "2 28ac507ad84a30993bdfc0996fd2476b \n", - "3 131dbaeef23f5ac2271bf0266ce35476 \n", - "4 1a6342ad2c213b626aa55e5374cd661a " - ] - }, - "execution_count": 210, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "tickets = display_databases(\"8tickets.csv\")\n", "tickets.head()" @@ -1593,143 +324,10 @@ }, { "cell_type": "code", - "execution_count": 211, + "execution_count": null, "id": "243e6942-0233-4cd5-b32b-e005457131d2", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8suppliers.csv\n", - "Shape : (16, 9)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamemanually_addedlabelitrupdated_atcreated_atcommissionidentifier
0152plateformecewebFalseNaNNaN2021-07-16 00:02:17.805193+02:002021-07-16 00:02:17.805193+02:00NaN0fc934f49bfa9f1f4e6ab7e2593b6839
16accreditation annuelleFalseNaNNaN2021-06-29 21:33:14.138349+02:002021-06-29 21:33:14.138349+02:00NaNfe13238540e0ff293ec8aad29aeae6c3
268abonnement parkingFalseNaNNaN2021-06-29 22:10:31.167367+02:002021-06-29 22:10:31.167367+02:00NaN0f7defc52a97cdca533af74f4e6e5b1e
39accreditation matchFalseNaNNaN2021-06-29 21:33:14.142084+02:002021-06-29 21:33:14.142084+02:00NaN40e19a7c4824eaad298e0107ed7e3691
4154web lnr-louFalseNaNNaN2021-07-16 00:02:17.806521+02:002021-07-16 00:02:17.806521+02:00NaNb144dd617807b02e0d9002fac6c61768
\n", - "
" - ], - "text/plain": [ - " id name manually_added label itr \\\n", - "0 152 plateformeceweb False NaN NaN \n", - "1 6 accreditation annuelle False NaN NaN \n", - "2 68 abonnement parking False NaN NaN \n", - "3 9 accreditation match False NaN NaN \n", - "4 154 web lnr-lou False NaN NaN \n", - "\n", - " updated_at created_at \\\n", - "0 2021-07-16 00:02:17.805193+02:00 2021-07-16 00:02:17.805193+02:00 \n", - "1 2021-06-29 21:33:14.138349+02:00 2021-06-29 21:33:14.138349+02:00 \n", - "2 2021-06-29 22:10:31.167367+02:00 2021-06-29 22:10:31.167367+02:00 \n", - "3 2021-06-29 21:33:14.142084+02:00 2021-06-29 21:33:14.142084+02:00 \n", - "4 2021-07-16 00:02:17.806521+02:00 2021-07-16 00:02:17.806521+02:00 \n", - "\n", - " commission identifier \n", - "0 NaN 0fc934f49bfa9f1f4e6ab7e2593b6839 \n", - "1 NaN fe13238540e0ff293ec8aad29aeae6c3 \n", - "2 NaN 0f7defc52a97cdca533af74f4e6e5b1e \n", - "3 NaN 40e19a7c4824eaad298e0107ed7e3691 \n", - "4 NaN b144dd617807b02e0d9002fac6c61768 " - ] - }, - "execution_count": 211, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "suppliers = display_databases(\"8suppliers.csv\")\n", "suppliers.head()" @@ -1745,180 +343,10 @@ }, { "cell_type": "code", - "execution_count": 212, + "execution_count": null, "id": "6b82efce-1dee-4d89-8585-28c4ad477eef", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8products.csv\n", - "Shape : (45411, 14)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idamountis_full_pricerepresentation_idpricing_formula_idcreated_atupdated_atcategory_idapply_priceproducts_group_idproduct_pack_idextra_fieldamount_consumptionidentifier
0900130.0False19619122021-07-16 04:56:05.797551+02:002021-07-16 04:56:05.797551+02:00340.0879171NaNNaN476e111175b1660688b7c13dade2b57e
16620.0False11292021-06-29 21:33:17.389201+02:002021-06-29 21:33:17.389201+02:00160.06401NaNNaN2c765698e9bedd48e8a3fd27dc8dbc97
26460.0False46102021-06-29 21:33:17.366742+02:002021-06-29 21:33:17.366742+02:00150.06241NaNNaN4e719148651fd7f175e3fb51bdb5d31b
357035.0False71882021-06-29 21:52:09.374365+02:002021-06-29 21:52:09.374365+02:0040.055401NaNNaNe4d7beeb0a631e2e51e61951623ba9b1
46480.0False49102021-06-29 21:33:17.369471+02:002021-06-29 21:33:17.369471+02:00150.06261NaNNaN07a5dd9e125345b9458651ab73605255
\n", - "
" - ], - "text/plain": [ - " id amount is_full_price representation_id pricing_formula_id \\\n", - "0 90013 0.0 False 1961 912 \n", - "1 662 0.0 False 11 29 \n", - "2 646 0.0 False 46 10 \n", - "3 5703 5.0 False 7 188 \n", - "4 648 0.0 False 49 10 \n", - "\n", - " created_at updated_at \\\n", - "0 2021-07-16 04:56:05.797551+02:00 2021-07-16 04:56:05.797551+02:00 \n", - "1 2021-06-29 21:33:17.389201+02:00 2021-06-29 21:33:17.389201+02:00 \n", - "2 2021-06-29 21:33:17.366742+02:00 2021-06-29 21:33:17.366742+02:00 \n", - "3 2021-06-29 21:52:09.374365+02:00 2021-06-29 21:52:09.374365+02:00 \n", - "4 2021-06-29 21:33:17.369471+02:00 2021-06-29 21:33:17.369471+02:00 \n", - "\n", - " category_id apply_price products_group_id product_pack_id extra_field \\\n", - "0 34 0.0 87917 1 NaN \n", - "1 16 0.0 640 1 NaN \n", - "2 15 0.0 624 1 NaN \n", - "3 4 0.0 5540 1 NaN \n", - "4 15 0.0 626 1 NaN \n", - "\n", - " amount_consumption identifier \n", - "0 NaN 476e111175b1660688b7c13dade2b57e \n", - "1 NaN 2c765698e9bedd48e8a3fd27dc8dbc97 \n", - "2 NaN 4e719148651fd7f175e3fb51bdb5d31b \n", - "3 NaN e4d7beeb0a631e2e51e61951623ba9b1 \n", - "4 NaN 07a5dd9e125345b9458651ab73605255 " - ] - }, - "execution_count": 212, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "products = display_databases(\"8products.csv\")\n", "products.head()" @@ -1942,125 +370,10 @@ }, { "cell_type": "code", - "execution_count": 213, + "execution_count": null, "id": "daf37bff-a26d-4ff5-ad50-c90f917164bd", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8pricing_formulas.csv\n", - "Shape : (516, 6)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreated_atupdated_atextra_fieldidentifier
07visite stade enfant2021-06-29 21:33:14.160728+02:002021-06-29 21:33:14.160728+02:00NaNbbc80e5761a0ea325f6f6a5411752659
13229tarif bloc etudiants2021-07-16 04:20:46.684601+02:002021-09-03 16:44:46.096785+02:00NaN205122cc7e96d559330972b0ec0cf35a
242invitation eiffage2021-06-29 21:33:14.204483+02:002021-06-29 21:33:14.204483+02:00NaNe4e6365c02e2a7b01ebe2ce8ace624f2
34379invitation offre speciale2021-07-16 05:21:44.984893+02:002021-07-16 05:21:44.984893+02:00NaN307817b6205535a35915a64027ee161e
42641prevente reabo enfant2021-07-16 03:47:40.896805+02:002021-09-03 16:08:35.304298+02:00NaN478eb63c71ba35d8d3d64c8637dafdee
\n", - "
" - ], - "text/plain": [ - " id name created_at \\\n", - "0 7 visite stade enfant 2021-06-29 21:33:14.160728+02:00 \n", - "1 3229 tarif bloc etudiants 2021-07-16 04:20:46.684601+02:00 \n", - "2 42 invitation eiffage 2021-06-29 21:33:14.204483+02:00 \n", - "3 4379 invitation offre speciale 2021-07-16 05:21:44.984893+02:00 \n", - "4 2641 prevente reabo enfant 2021-07-16 03:47:40.896805+02:00 \n", - "\n", - " updated_at extra_field \\\n", - "0 2021-06-29 21:33:14.160728+02:00 NaN \n", - "1 2021-09-03 16:44:46.096785+02:00 NaN \n", - "2 2021-06-29 21:33:14.204483+02:00 NaN \n", - "3 2021-07-16 05:21:44.984893+02:00 NaN \n", - "4 2021-09-03 16:08:35.304298+02:00 NaN \n", - "\n", - " identifier \n", - "0 bbc80e5761a0ea325f6f6a5411752659 \n", - "1 205122cc7e96d559330972b0ec0cf35a \n", - "2 e4e6365c02e2a7b01ebe2ce8ace624f2 \n", - "3 307817b6205535a35915a64027ee161e \n", - "4 478eb63c71ba35d8d3d64c8637dafdee " - ] - }, - "execution_count": 213, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pricing_formulas = display_databases(\"8pricing_formulas.csv\")\n", "pricing_formulas.head()" @@ -2068,118 +381,10 @@ }, { "cell_type": "code", - "execution_count": 214, + "execution_count": null, "id": "cdb14488-b093-4b39-84fa-1c2b4576208f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8type_of_pricing_formulas.csv\n", - "Shape : (103, 6)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtype_of_idpricing_formula_idcreated_atupdated_atidentifier
01710212021-09-03 14:17:19.816110+02:002021-09-03 14:17:19.816110+02:0041047fbeb7cd3e1cb2713c608d2f786d
12743052021-09-03 14:17:19.848088+02:002021-09-03 14:17:19.848088+02:00a62a4dad7d62738129244bbb5ede0747
23743062021-09-03 14:17:19.864067+02:002021-09-03 14:17:19.864067+02:00c3770373e09f55412068c447736d9da3
347292021-09-03 14:17:19.880078+02:002021-09-03 14:17:19.880078+02:007b7b1242ae7a8c9eb66d35d8a4348ccd
458102021-09-03 14:18:03.616081+02:002021-09-03 14:18:03.616081+02:000a2b941c46b31258c03b316aa064e86a
\n", - "
" - ], - "text/plain": [ - " id type_of_id pricing_formula_id created_at \\\n", - "0 1 7 1021 2021-09-03 14:17:19.816110+02:00 \n", - "1 2 7 4305 2021-09-03 14:17:19.848088+02:00 \n", - "2 3 7 4306 2021-09-03 14:17:19.864067+02:00 \n", - "3 4 7 29 2021-09-03 14:17:19.880078+02:00 \n", - "4 5 8 10 2021-09-03 14:18:03.616081+02:00 \n", - "\n", - " updated_at identifier \n", - "0 2021-09-03 14:17:19.816110+02:00 41047fbeb7cd3e1cb2713c608d2f786d \n", - "1 2021-09-03 14:17:19.848088+02:00 a62a4dad7d62738129244bbb5ede0747 \n", - "2 2021-09-03 14:17:19.864067+02:00 c3770373e09f55412068c447736d9da3 \n", - "3 2021-09-03 14:17:19.880078+02:00 7b7b1242ae7a8c9eb66d35d8a4348ccd \n", - "4 2021-09-03 14:18:03.616081+02:00 0a2b941c46b31258c03b316aa064e86a " - ] - }, - "execution_count": 214, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "type_pricing_formulas = display_databases(\"8type_of_pricing_formulas.csv\")\n", "type_pricing_formulas.head()" @@ -2203,131 +408,10 @@ }, { "cell_type": "code", - "execution_count": 215, + "execution_count": null, "id": "6582694d-5339-4f33-a943-c73033121a90", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8categories.csv\n", - "Shape : (148, 7)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreated_atupdated_atextra_fieldquotaidentifier
0653acces village implid2021-07-16 00:04:37.181331+02:002021-07-16 00:04:37.181331+02:00NaNNaNc447d053646a6503d3cd84d4798bf5b7
1805parking organisation2021-07-16 01:54:15.822407+02:002021-07-16 01:54:15.822407+02:00NaNNaN02bf9871964345f505ad305080daec36
2809rose rouge orange2021-07-16 01:54:15.825345+02:002021-07-16 01:54:15.825345+02:00NaNNaN31fb5b57bc1a2bcd5c155fb0d9e7c0dd
321832eme catégorie j.b. centrale2021-07-16 04:37:25.446835+02:002021-07-16 04:37:25.446835+02:00NaNNaNc9eb6651caaed42b809b3f4407a847c9
4621acces brasserie2021-07-16 00:02:17.249701+02:002021-07-16 00:02:17.249701+02:00NaNNaN349e6a59585d78d80d46acbc6a520c50
\n", - "
" - ], - "text/plain": [ - " id name created_at \\\n", - "0 653 acces village implid 2021-07-16 00:04:37.181331+02:00 \n", - "1 805 parking organisation 2021-07-16 01:54:15.822407+02:00 \n", - "2 809 rose rouge orange 2021-07-16 01:54:15.825345+02:00 \n", - "3 2183 2eme catégorie j.b. centrale 2021-07-16 04:37:25.446835+02:00 \n", - "4 621 acces brasserie 2021-07-16 00:02:17.249701+02:00 \n", - "\n", - " updated_at extra_field quota \\\n", - "0 2021-07-16 00:04:37.181331+02:00 NaN NaN \n", - "1 2021-07-16 01:54:15.822407+02:00 NaN NaN \n", - "2 2021-07-16 01:54:15.825345+02:00 NaN NaN \n", - "3 2021-07-16 04:37:25.446835+02:00 NaN NaN \n", - "4 2021-07-16 00:02:17.249701+02:00 NaN NaN \n", - "\n", - " identifier \n", - "0 c447d053646a6503d3cd84d4798bf5b7 \n", - "1 02bf9871964345f505ad305080daec36 \n", - "2 31fb5b57bc1a2bcd5c155fb0d9e7c0dd \n", - "3 c9eb6651caaed42b809b3f4407a847c9 \n", - "4 349e6a59585d78d80d46acbc6a520c50 " - ] - }, - "execution_count": 215, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "categories = display_databases(\"8categories.csv\")\n", "categories.head()" @@ -2335,118 +419,10 @@ }, { "cell_type": "code", - "execution_count": 216, + "execution_count": null, "id": "589076df-1958-42de-9941-1aff9fa8536f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8type_of_categories.csv\n", - "Shape : (6, 6)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtype_of_idcategory_idcreated_atupdated_atidentifier
01122021-08-20 15:22:05.558209+02:002021-08-20 15:22:05.558209+02:00af8fa6d57f6b19a7600a69e7771c7c3a
12212021-09-02 17:29:32.582002+02:002021-09-02 17:29:32.582002+02:0063718e7ad306912427758ddf988ad34f
23332021-09-02 17:32:38.299733+02:002021-09-02 17:32:38.299733+02:005e147d4d90888df14c4584f5c6887c96
34442021-09-02 17:35:04.748993+02:002021-09-02 17:35:04.748993+02:00a9dfdc3f40b41e3018933c6167fc38a5
455172021-09-02 17:35:37.396740+02:002021-09-02 17:35:37.396740+02:00c05b0061d2a875adbc35d3dfa6a50a12
\n", - "
" - ], - "text/plain": [ - " id type_of_id category_id created_at \\\n", - "0 1 1 2 2021-08-20 15:22:05.558209+02:00 \n", - "1 2 2 1 2021-09-02 17:29:32.582002+02:00 \n", - "2 3 3 3 2021-09-02 17:32:38.299733+02:00 \n", - "3 4 4 4 2021-09-02 17:35:04.748993+02:00 \n", - "4 5 5 17 2021-09-02 17:35:37.396740+02:00 \n", - "\n", - " updated_at identifier \n", - "0 2021-08-20 15:22:05.558209+02:00 af8fa6d57f6b19a7600a69e7771c7c3a \n", - "1 2021-09-02 17:29:32.582002+02:00 63718e7ad306912427758ddf988ad34f \n", - "2 2021-09-02 17:32:38.299733+02:00 5e147d4d90888df14c4584f5c6887c96 \n", - "3 2021-09-02 17:35:04.748993+02:00 a9dfdc3f40b41e3018933c6167fc38a5 \n", - "4 2021-09-02 17:35:37.396740+02:00 c05b0061d2a875adbc35d3dfa6a50a12 " - ] - }, - "execution_count": 216, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "type_categories = display_databases(\"8type_of_categories.csv\")\n", "type_categories.head()" @@ -2472,124 +448,10 @@ }, { "cell_type": "code", - "execution_count": 217, + "execution_count": null, "id": "6f06d72a-5725-4eee-8e4c-e9ef5820f346", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8representation_category_capacities.csv\n", - "Shape : (7378, 7)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcreated_atupdated_atrepresentation_idcategory_idexpected_fillingmax_filling
05612021-06-29 21:33:14.096827+02:002021-06-29 21:33:14.096827+02:001737NaNNaN
15712021-06-29 21:33:14.110047+02:002021-06-29 21:33:14.110047+02:001439NaNNaN
296652021-07-16 00:02:17.736387+02:002021-07-16 00:02:17.736387+02:0018878NaNNaN
33839062023-03-04 02:55:01.585418+01:002023-03-04 02:55:01.585418+01:0052729476NaNNaN
43932021-06-29 21:33:13.876766+02:002021-06-29 21:33:13.876766+02:00923NaNNaN
\n", - "
" - ], - "text/plain": [ - " id created_at updated_at \\\n", - "0 561 2021-06-29 21:33:14.096827+02:00 2021-06-29 21:33:14.096827+02:00 \n", - "1 571 2021-06-29 21:33:14.110047+02:00 2021-06-29 21:33:14.110047+02:00 \n", - "2 9665 2021-07-16 00:02:17.736387+02:00 2021-07-16 00:02:17.736387+02:00 \n", - "3 383906 2023-03-04 02:55:01.585418+01:00 2023-03-04 02:55:01.585418+01:00 \n", - "4 393 2021-06-29 21:33:13.876766+02:00 2021-06-29 21:33:13.876766+02:00 \n", - "\n", - " representation_id category_id expected_filling max_filling \n", - "0 17 37 NaN NaN \n", - "1 14 39 NaN NaN \n", - "2 1887 8 NaN NaN \n", - "3 52729 476 NaN NaN \n", - "4 9 23 NaN NaN " - ] - }, - "execution_count": 217, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "representation_category_capacities = display_databases(\"8representation_category_capacities.csv\")\n", "representation_category_capacities.head()" @@ -2597,199 +459,10 @@ }, { "cell_type": "code", - "execution_count": 218, + "execution_count": null, "id": "bd405913-033d-4f15-a5b9-103d577baaff", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8representations.csv\n", - "Shape : (1015, 16)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idserialevent_idcreated_atupdated_atstart_date_timeopensatisfactionend_date_timenameis_displayrepresentation_type_idexpected_fillingmax_fillingextra_fieldidentifier
05903NaN58362021-07-16 05:16:57.419565+02:002021-07-16 05:16:57.419565+02:002019-08-24 18:00:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN8009c34cae4e79e3781f16f3ceeab244
167133NaN656522023-09-27 02:21:36.573001+02:002023-09-27 02:21:36.573001+02:002023-10-04 10:30:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN4e9d3fc8d1f7bf563dc586548fe6390e
21874NaN18262021-07-16 00:02:17.390274+02:002021-07-16 00:02:17.390274+02:002019-09-14 18:00:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN19f666370c1fc781dff638c20ae04c8a
35904NaN58372021-07-16 05:16:57.420302+02:002021-07-16 05:16:57.420302+02:002019-09-01 17:05:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN4221acd3f49179f5d0b292c15d1ab8e4
44165NaN41062021-07-16 03:53:05.929713+02:002021-07-16 03:53:05.929713+02:002018-10-14 14:00:00+02:00TrueNaN1901-01-01 00:09:21+00:09NaNTrueNaNNaNNaNNaN733104286519c0614b2d45470eb180a1
\n", - "
" - ], - "text/plain": [ - " id serial event_id created_at \\\n", - "0 5903 NaN 5836 2021-07-16 05:16:57.419565+02:00 \n", - "1 67133 NaN 65652 2023-09-27 02:21:36.573001+02:00 \n", - "2 1874 NaN 1826 2021-07-16 00:02:17.390274+02:00 \n", - "3 5904 NaN 5837 2021-07-16 05:16:57.420302+02:00 \n", - "4 4165 NaN 4106 2021-07-16 03:53:05.929713+02:00 \n", - "\n", - " updated_at start_date_time open \\\n", - "0 2021-07-16 05:16:57.419565+02:00 2019-08-24 18:00:00+02:00 True \n", - "1 2023-09-27 02:21:36.573001+02:00 2023-10-04 10:30:00+02:00 True \n", - "2 2021-07-16 00:02:17.390274+02:00 2019-09-14 18:00:00+02:00 True \n", - "3 2021-07-16 05:16:57.420302+02:00 2019-09-01 17:05:00+02:00 True \n", - "4 2021-07-16 03:53:05.929713+02:00 2018-10-14 14:00:00+02:00 True \n", - "\n", - " satisfaction end_date_time name is_display \\\n", - "0 NaN 1901-01-01 00:09:21+00:09 NaN True \n", - "1 NaN 1901-01-01 00:09:21+00:09 NaN True \n", - "2 NaN 1901-01-01 00:09:21+00:09 NaN True \n", - "3 NaN 1901-01-01 00:09:21+00:09 NaN True \n", - "4 NaN 1901-01-01 00:09:21+00:09 NaN True \n", - "\n", - " representation_type_id expected_filling max_filling extra_field \\\n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN \n", - "\n", - " identifier \n", - "0 8009c34cae4e79e3781f16f3ceeab244 \n", - "1 4e9d3fc8d1f7bf563dc586548fe6390e \n", - "2 19f666370c1fc781dff638c20ae04c8a \n", - "3 4221acd3f49179f5d0b292c15d1ab8e4 \n", - "4 733104286519c0614b2d45470eb180a1 " - ] - }, - "execution_count": 218, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "representations = display_databases(\"8representations.csv\")\n", "representations.head()" @@ -2797,7 +470,7 @@ }, { "cell_type": "code", - "execution_count": 219, + "execution_count": null, "id": "0f2c7ea3-6964-48fd-9411-17547b2c3a3f", "metadata": {}, "outputs": [], @@ -2823,168 +496,10 @@ }, { "cell_type": "code", - "execution_count": 220, + "execution_count": null, "id": "cba22ee2-338d-4ce1-a1e8-829a11a94bcf", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8events.csv\n", - "Shape : (922, 12)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcreated_atupdated_atseason_idfacility_idnameevent_type_idmanual_addedis_displayevent_type_key_idfacility_key_ididentifier
0415422022-10-29 02:54:32.756920+02:002022-10-29 02:57:35.511792+02:00521match lou feminin - lons5588FalseTrue5588140cc5a346b1af4ee7108ac28b144fb77
1210682021-12-17 03:43:53.166446+01:002021-12-17 03:46:40.346096+01:00511repas brasserie lou-racing2310FalseTrue23101500b670b79aa592ecb06f4957800a752
2598122023-05-26 01:45:54.321665+02:002023-05-26 01:46:01.571397+02:0015012parking match 210185FalseTrue101852d5f62ed879867b8b51ed7b85f1fc3ab0
334242021-07-16 03:13:06.988358+02:002021-07-16 05:33:31.321933+02:0011rugby + hockey sur glace5FalseTrue51822b47176c355a647aa2dbdf8dfbc594
4213792021-12-23 02:37:22.948114+01:002021-12-23 02:38:20.726329+01:00511bloc des etudiants lou-racing2562FalseTrue2562117b91f19c71ff6287ffc1f44af952576
\n", - "
" - ], - "text/plain": [ - " id created_at updated_at \\\n", - "0 41542 2022-10-29 02:54:32.756920+02:00 2022-10-29 02:57:35.511792+02:00 \n", - "1 21068 2021-12-17 03:43:53.166446+01:00 2021-12-17 03:46:40.346096+01:00 \n", - "2 59812 2023-05-26 01:45:54.321665+02:00 2023-05-26 01:46:01.571397+02:00 \n", - "3 3424 2021-07-16 03:13:06.988358+02:00 2021-07-16 05:33:31.321933+02:00 \n", - "4 21379 2021-12-23 02:37:22.948114+01:00 2021-12-23 02:38:20.726329+01:00 \n", - "\n", - " season_id facility_id name event_type_id \\\n", - "0 52 1 match lou feminin - lons 5588 \n", - "1 51 1 repas brasserie lou-racing 2310 \n", - "2 1501 2 parking match 2 10185 \n", - "3 1 1 rugby + hockey sur glace 5 \n", - "4 51 1 bloc des etudiants lou-racing 2562 \n", - "\n", - " manual_added is_display event_type_key_id facility_key_id \\\n", - "0 False True 5588 1 \n", - "1 False True 2310 1 \n", - "2 False True 10185 2 \n", - "3 False True 5 1 \n", - "4 False True 2562 1 \n", - "\n", - " identifier \n", - "0 40cc5a346b1af4ee7108ac28b144fb77 \n", - "1 500b670b79aa592ecb06f4957800a752 \n", - "2 d5f62ed879867b8b51ed7b85f1fc3ab0 \n", - "3 822b47176c355a647aa2dbdf8dfbc594 \n", - "4 17b91f19c71ff6287ffc1f44af952576 " - ] - }, - "execution_count": 220, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "events = display_databases(\"8events.csv\")\n", "events.head()" @@ -2992,125 +507,10 @@ }, { "cell_type": "code", - "execution_count": 221, + "execution_count": null, "id": "3db00b9d-2187-4cb6-980d-8ac6ab9eb460", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8event_types.csv\n", - "Shape : (73, 6)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreated_atupdated_atfidelity_delayidentifier
01standard2021-06-29 13:52:10.434850+02:002021-06-29 13:52:10.434850+02:0036c00f0c4675b91fb8b918e4079a0b1bac
111ptit lou2021-06-29 21:33:13.000743+02:002021-06-29 21:33:13.000743+02:0036dedd3579bc13b3ed7a90277247d9944b
2274parking 19-202021-07-16 00:02:17.225410+02:002021-07-16 00:02:17.225410+02:00360d348caeec0b66f9d4987dfbe30e1e8b
3129events 2018-20192021-06-30 01:35:18.110429+02:002021-06-30 01:35:18.110429+02:003665eb39ddf8f79d28d93c2f2c53118f50
410accreditations 2017-20182021-06-29 21:33:12.999510+02:002021-06-29 21:33:12.999510+02:0036732cfdcf2065fa0005faf42793ddd76c
\n", - "
" - ], - "text/plain": [ - " id name created_at \\\n", - "0 1 standard 2021-06-29 13:52:10.434850+02:00 \n", - "1 11 ptit lou 2021-06-29 21:33:13.000743+02:00 \n", - "2 274 parking 19-20 2021-07-16 00:02:17.225410+02:00 \n", - "3 129 events 2018-2019 2021-06-30 01:35:18.110429+02:00 \n", - "4 10 accreditations 2017-2018 2021-06-29 21:33:12.999510+02:00 \n", - "\n", - " updated_at fidelity_delay \\\n", - "0 2021-06-29 13:52:10.434850+02:00 36 \n", - "1 2021-06-29 21:33:13.000743+02:00 36 \n", - "2 2021-07-16 00:02:17.225410+02:00 36 \n", - "3 2021-06-30 01:35:18.110429+02:00 36 \n", - "4 2021-06-29 21:33:12.999510+02:00 36 \n", - "\n", - " identifier \n", - "0 c00f0c4675b91fb8b918e4079a0b1bac \n", - "1 dedd3579bc13b3ed7a90277247d9944b \n", - "2 0d348caeec0b66f9d4987dfbe30e1e8b \n", - "3 65eb39ddf8f79d28d93c2f2c53118f50 \n", - "4 732cfdcf2065fa0005faf42793ddd76c " - ] - }, - "execution_count": 221, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "event_types = display_databases(\"8event_types.csv\")\n", "event_types.head()" @@ -3118,125 +518,10 @@ }, { "cell_type": "code", - "execution_count": 222, + "execution_count": null, "id": "cba0ee58-6280-45fe-99b3-0be09db5922b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8seasons.csv\n", - "Shape : (16, 6)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreated_atupdated_atstart_date_timeidentifier
01501saison 2023-20242022-06-25 03:07:31.209270+02:002022-06-25 03:07:31.209270+02:00NaN71f5c069ce45c5e933dcc37c22507fbf
11194saison 2049-20502022-02-17 03:24:23.942691+01:002022-02-17 03:24:23.942691+01:00NaN44e20620bbc5926db2e295d38b606afd
22saison 2016-20172021-06-29 21:33:00.702563+02:002021-06-29 21:33:00.702563+02:00NaNf9cf989d4f49300220df67ef93aa2294
347saison 2018-20192021-06-30 01:35:15.156097+02:002021-06-30 01:35:15.156097+02:00NaNeec50c35fbf8593b364ced287335d90c
4100saison 2010-20112021-07-16 00:23:27.607648+02:002021-07-16 00:23:27.607648+02:00NaN7ccc51049a85e0df9b80662e45b6ddb8
\n", - "
" - ], - "text/plain": [ - " id name created_at \\\n", - "0 1501 saison 2023-2024 2022-06-25 03:07:31.209270+02:00 \n", - "1 1194 saison 2049-2050 2022-02-17 03:24:23.942691+01:00 \n", - "2 2 saison 2016-2017 2021-06-29 21:33:00.702563+02:00 \n", - "3 47 saison 2018-2019 2021-06-30 01:35:15.156097+02:00 \n", - "4 100 saison 2010-2011 2021-07-16 00:23:27.607648+02:00 \n", - "\n", - " updated_at start_date_time \\\n", - "0 2022-06-25 03:07:31.209270+02:00 NaN \n", - "1 2022-02-17 03:24:23.942691+01:00 NaN \n", - "2 2021-06-29 21:33:00.702563+02:00 NaN \n", - "3 2021-06-30 01:35:15.156097+02:00 NaN \n", - "4 2021-07-16 00:23:27.607648+02:00 NaN \n", - "\n", - " identifier \n", - "0 71f5c069ce45c5e933dcc37c22507fbf \n", - "1 44e20620bbc5926db2e295d38b606afd \n", - "2 f9cf989d4f49300220df67ef93aa2294 \n", - "3 eec50c35fbf8593b364ced287335d90c \n", - "4 7ccc51049a85e0df9b80662e45b6ddb8 " - ] - }, - "execution_count": 222, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "seasons = display_databases(\"8seasons.csv\")\n", "seasons.head()" @@ -3244,131 +529,10 @@ }, { "cell_type": "code", - "execution_count": 223, + "execution_count": null, "id": "6fa82fd7-d6d3-4857-af24-ea573b1129d0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/8/8facilities.csv\n", - "Shape : (5, 7)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreated_atupdated_atstreet_idfixed_capacityidentifier
074plan pour campagne d'abo 2011/20122021-07-16 00:23:30.337698+02:002021-07-16 00:23:30.337698+02:001NaN2e1d25d5f7e46e23c734fe0e4951390e
13accreditation2021-06-29 21:33:13.018552+02:002021-06-29 21:33:13.018552+02:001NaNda37a04e592cbd344142730ce05a6887
24organisation match exterieur2021-06-29 21:33:13.019878+02:002021-06-29 21:33:13.019878+02:001NaN8f9ee8c2e954585f7c68096d7f1cf4f1
32parking matmut stadium2021-06-29 21:33:13.017165+02:002021-06-29 21:33:13.017165+02:001NaNaeab282982ea738674dbf5c3763a0be0
41matmut stadium2021-06-29 21:33:13.004560+02:002021-06-29 21:33:13.004560+02:001NaN89feffd283ebdabdc3b81fb62ea4f6f0
\n", - "
" - ], - "text/plain": [ - " id name created_at \\\n", - "0 74 plan pour campagne d'abo 2011/2012 2021-07-16 00:23:30.337698+02:00 \n", - "1 3 accreditation 2021-06-29 21:33:13.018552+02:00 \n", - "2 4 organisation match exterieur 2021-06-29 21:33:13.019878+02:00 \n", - "3 2 parking matmut stadium 2021-06-29 21:33:13.017165+02:00 \n", - "4 1 matmut stadium 2021-06-29 21:33:13.004560+02:00 \n", - "\n", - " updated_at street_id fixed_capacity \\\n", - "0 2021-07-16 00:23:30.337698+02:00 1 NaN \n", - "1 2021-06-29 21:33:13.018552+02:00 1 NaN \n", - "2 2021-06-29 21:33:13.019878+02:00 1 NaN \n", - "3 2021-06-29 21:33:13.017165+02:00 1 NaN \n", - "4 2021-06-29 21:33:13.004560+02:00 1 NaN \n", - "\n", - " identifier \n", - "0 2e1d25d5f7e46e23c734fe0e4951390e \n", - "1 da37a04e592cbd344142730ce05a6887 \n", - "2 8f9ee8c2e954585f7c68096d7f1cf4f1 \n", - "3 aeab282982ea738674dbf5c3763a0be0 \n", - "4 89feffd283ebdabdc3b81fb62ea4f6f0 " - ] - }, - "execution_count": 223, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "facilities = display_databases(\"8facilities.csv\")\n", "facilities.head()" @@ -3408,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 224, + "execution_count": null, "id": "c240b811-48a6-4501-9e70-bc51d69e3ac4", "metadata": {}, "outputs": [], @@ -3424,18 +588,10 @@ }, { "cell_type": "code", - "execution_count": 225, + "execution_count": null, "id": "54057367-9df9-42f4-aa07-bf524bb76462", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of databases : 30\n" - ] - } - ], + "outputs": [], "source": [ "# Then we create a list of all database\n", "\n", @@ -3445,19 +601,10 @@ }, { "cell_type": "code", - "execution_count": 226, + "execution_count": null, "id": "63914e20-9efc-4088-877b-edab5f225d00", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "30\n", - "23\n" - ] - } - ], + "outputs": [], "source": [ "## We then create a set of database in common for all companies\n", "\n", @@ -3493,7 +640,7 @@ }, { "cell_type": "code", - "execution_count": 227, + "execution_count": null, "id": "590a132a-4f57-4ea3-a282-2ef913e4b753", "metadata": {}, "outputs": [], @@ -3503,7 +650,7 @@ }, { "cell_type": "code", - "execution_count": 228, + "execution_count": null, "id": "0fbebfb7-a827-46b1-890b-86c9def7cdbb", "metadata": {}, "outputs": [], @@ -3513,7 +660,7 @@ }, { "cell_type": "code", - "execution_count": 229, + "execution_count": null, "id": "b8aa5f8f-845e-4ee5-b80d-38b7061a94a2", "metadata": {}, "outputs": [], @@ -3528,7 +675,7 @@ }, { "cell_type": "code", - "execution_count": 230, + "execution_count": null, "id": "2c478213-09ae-44ef-8c7c-125bcb571642", "metadata": {}, "outputs": [], @@ -3546,7 +693,7 @@ }, { "cell_type": "code", - "execution_count": 231, + "execution_count": null, "id": "327e44b0-eb99-4022-b4ca-79548072f0f0", "metadata": {}, "outputs": [], @@ -3561,7 +708,7 @@ }, { "cell_type": "code", - "execution_count": 232, + "execution_count": null, "id": "10926def-267f-4e86-b2c9-72e27ff9a9df", "metadata": {}, "outputs": [], @@ -3585,181 +732,10 @@ }, { "cell_type": "code", - "execution_count": 233, + "execution_count": null, "id": "862a7658-0602-4d94-bb58-d23774c00d32", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1products.csv\n", - "Shape : (94803, 14)\n", - "Number of columns : 14\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idamountis_full_pricerepresentation_idpricing_formula_idcreated_atupdated_atcategory_idapply_priceproducts_group_idproduct_pack_idextra_fieldamount_consumptionidentifier
0106829.0False9141142020-09-03 14:09:43.119798+02:002020-09-03 14:09:43.119798+02:00410.0106551NaNNaN35c88f2db8a63d7474e46eb8ca9260e7
14789.5False2731312020-09-03 13:21:22.711773+02:002020-09-03 13:21:22.711773+02:0010.04711NaNNaN8a179671ab198e570e6a104c4451379f
22087311.5False2751372020-09-03 14:46:33.589030+02:002020-09-03 14:46:33.589030+02:0010.0208251NaNNaNee83779ce29e67ad251e40234b426d6a
31571428.0False8251992022-01-28 19:29:23.525722+01:002022-01-28 19:29:23.525722+01:0050.01567731NaNNaNd865383579314b791aa4bcf3fb418f17
413418.5False9932020-09-03 13:29:30.773089+02:002020-09-03 13:29:30.773089+02:0010.011751NaNNaNf1c4689bc47dee6f60b56d74b593dd46
\n", - "
" - ], - "text/plain": [ - " id amount is_full_price representation_id pricing_formula_id \\\n", - "0 10682 9.0 False 914 114 \n", - "1 478 9.5 False 273 131 \n", - "2 20873 11.5 False 275 137 \n", - "3 157142 8.0 False 82519 9 \n", - "4 1341 8.5 False 9 93 \n", - "\n", - " created_at updated_at \\\n", - "0 2020-09-03 14:09:43.119798+02:00 2020-09-03 14:09:43.119798+02:00 \n", - "1 2020-09-03 13:21:22.711773+02:00 2020-09-03 13:21:22.711773+02:00 \n", - "2 2020-09-03 14:46:33.589030+02:00 2020-09-03 14:46:33.589030+02:00 \n", - "3 2022-01-28 19:29:23.525722+01:00 2022-01-28 19:29:23.525722+01:00 \n", - "4 2020-09-03 13:29:30.773089+02:00 2020-09-03 13:29:30.773089+02:00 \n", - "\n", - " category_id apply_price products_group_id product_pack_id extra_field \\\n", - "0 41 0.0 10655 1 NaN \n", - "1 1 0.0 471 1 NaN \n", - "2 1 0.0 20825 1 NaN \n", - "3 5 0.0 156773 1 NaN \n", - "4 1 0.0 1175 1 NaN \n", - "\n", - " amount_consumption identifier \n", - "0 NaN 35c88f2db8a63d7474e46eb8ca9260e7 \n", - "1 NaN 8a179671ab198e570e6a104c4451379f \n", - "2 NaN ee83779ce29e67ad251e40234b426d6a \n", - "3 NaN d865383579314b791aa4bcf3fb418f17 \n", - "4 NaN f1c4689bc47dee6f60b56d74b593dd46 " - ] - }, - "execution_count": 233, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "products = display_databases(\"1products.csv\")\n", "print(\"Number of columns : \", len(products.columns))\n", @@ -3768,164 +744,10 @@ }, { "cell_type": "code", - "execution_count": 234, + "execution_count": null, "id": "f0db8c51-2792-4d49-9b1a-d98ce0d9ea28", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of columns : 12\n", - "Columns : Index(['id', 'representation_id', 'pricing_formula_id', 'category_id',\n", - " 'products_group_id', 'product_pack_id', 'identifier', 'amount',\n", - " 'is_full_price', 'apply_price', 'extra_field', 'amount_consumption'],\n", - " dtype='object')\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idrepresentation_idpricing_formula_idcategory_idproducts_group_idproduct_pack_ididentifieramountis_full_priceapply_priceextra_fieldamount_consumption
0106829141144110655135c88f2db8a63d7474e46eb8ca9260e79.0False0.0NaNNaN
1478273131147118a179671ab198e570e6a104c4451379f9.5False0.0NaNNaN
2208732751371208251ee83779ce29e67ad251e40234b426d6a11.5False0.0NaNNaN
315714282519951567731d865383579314b791aa4bcf3fb418f178.0False0.0NaNNaN
41341993111751f1c4689bc47dee6f60b56d74b593dd468.5False0.0NaNNaN
\n", - "
" - ], - "text/plain": [ - " id representation_id pricing_formula_id category_id \\\n", - "0 10682 914 114 41 \n", - "1 478 273 131 1 \n", - "2 20873 275 137 1 \n", - "3 157142 82519 9 5 \n", - "4 1341 9 93 1 \n", - "\n", - " products_group_id product_pack_id identifier \\\n", - "0 10655 1 35c88f2db8a63d7474e46eb8ca9260e7 \n", - "1 471 1 8a179671ab198e570e6a104c4451379f \n", - "2 20825 1 ee83779ce29e67ad251e40234b426d6a \n", - "3 156773 1 d865383579314b791aa4bcf3fb418f17 \n", - "4 1175 1 f1c4689bc47dee6f60b56d74b593dd46 \n", - "\n", - " amount is_full_price apply_price extra_field amount_consumption \n", - "0 9.0 False 0.0 NaN NaN \n", - "1 9.5 False 0.0 NaN NaN \n", - "2 11.5 False 0.0 NaN NaN \n", - "3 8.0 False 0.0 NaN NaN \n", - "4 8.5 False 0.0 NaN NaN " - ] - }, - "execution_count": 234, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "products = remove_horodates(products)\n", "print(\"Number of columns : \", len(products.columns))\n", @@ -3936,60 +758,20 @@ }, { "cell_type": "code", - "execution_count": 235, + "execution_count": null, "id": "a383474f-7da9-422c-bb69-3f0cc0b7053f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id int64\n", - "representation_id int64\n", - "pricing_formula_id int64\n", - "category_id int64\n", - "products_group_id int64\n", - "product_pack_id int64\n", - "identifier object\n", - "amount float64\n", - "is_full_price bool\n", - "apply_price float64\n", - "extra_field float64\n", - "amount_consumption float64\n", - "dtype: object\n" - ] - } - ], + "outputs": [], "source": [ "print(products.dtypes)" ] }, { "cell_type": "code", - "execution_count": 236, + "execution_count": null, "id": "460749ac-aa26-4216-8667-518546f72f72", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id 0.0\n", - "representation_id 0.0\n", - "pricing_formula_id 0.0\n", - "category_id 0.0\n", - "products_group_id 0.0\n", - "product_pack_id 0.0\n", - "identifier 0.0\n", - "amount 0.0\n", - "is_full_price 0.0\n", - "apply_price 0.0\n", - "extra_field 100.0\n", - "amount_consumption 100.0\n", - "dtype: float64\n" - ] - } - ], + "outputs": [], "source": [ "percent_missing = products.isna().sum() * 100 / len(products)\n", "print(percent_missing)" @@ -4005,7 +787,7 @@ }, { "cell_type": "code", - "execution_count": 237, + "execution_count": null, "id": "3efce2b6-2d2f-4da9-98ed-1aae17da624c", "metadata": {}, "outputs": [], @@ -4015,132 +797,10 @@ }, { "cell_type": "code", - "execution_count": 238, + "execution_count": null, "id": "38aa39fd-58af-4fb8-98f2-4269dbaf35de", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1categories.csv\n", - "Shape : (27, 7)\n", - "Number of columns : 7\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreated_atupdated_atextra_fieldquotaidentifier
030en nb entrées gr2020-09-03 13:21:20.019202+02:002020-09-03 13:21:20.019202+02:00NaNNaN849ab2791a14f5fc2bb4d87ab2b78bf6
116indiv activité enfant2020-09-03 13:11:23.306968+02:002020-09-03 13:11:23.306968+02:00NaNNaN425fd2f01984cc4ba030c1be98f42c33
239indiv activité gr2020-09-03 13:21:20.029901+02:002020-09-03 13:21:20.029901+02:00NaNNaN9244dd3738788db0d22a5d0afe687b69
31108groupe forfait adulte2020-09-19 02:06:43.145697+02:002020-09-19 02:06:43.145697+02:00NaNNaN3edda20c877a93b5ff883827238eb711
46groupe forfait entrées tr2020-09-03 13:11:23.264997+02:002020-09-03 13:11:23.264997+02:00NaNNaNff48df4b2dd5a14116bf4d280b31621e
\n", - "
" - ], - "text/plain": [ - " id name created_at \\\n", - "0 30 en nb entrées gr 2020-09-03 13:21:20.019202+02:00 \n", - "1 16 indiv activité enfant 2020-09-03 13:11:23.306968+02:00 \n", - "2 39 indiv activité gr 2020-09-03 13:21:20.029901+02:00 \n", - "3 1108 groupe forfait adulte 2020-09-19 02:06:43.145697+02:00 \n", - "4 6 groupe forfait entrées tr 2020-09-03 13:11:23.264997+02:00 \n", - "\n", - " updated_at extra_field quota \\\n", - "0 2020-09-03 13:21:20.019202+02:00 NaN NaN \n", - "1 2020-09-03 13:11:23.306968+02:00 NaN NaN \n", - "2 2020-09-03 13:21:20.029901+02:00 NaN NaN \n", - "3 2020-09-19 02:06:43.145697+02:00 NaN NaN \n", - "4 2020-09-03 13:11:23.264997+02:00 NaN NaN \n", - "\n", - " identifier \n", - "0 849ab2791a14f5fc2bb4d87ab2b78bf6 \n", - "1 425fd2f01984cc4ba030c1be98f42c33 \n", - "2 9244dd3738788db0d22a5d0afe687b69 \n", - "3 3edda20c877a93b5ff883827238eb711 \n", - "4 ff48df4b2dd5a14116bf4d280b31621e " - ] - }, - "execution_count": 238, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = display_databases(name_dataset)\n", "print(\"Number of columns : \", len(df.columns))\n", @@ -4149,118 +809,10 @@ }, { "cell_type": "code", - "execution_count": 239, + "execution_count": null, "id": "99eb6d14-8b4b-4d55-8fc7-ddf2726096f4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of columns : 5\n", - "Columns : Index(['id', 'identifier', 'name', 'extra_field', 'quota'], dtype='object')\n", - "Percent of NA for each column : id 0.000000\n", - "identifier 0.000000\n", - "name 3.703704\n", - "extra_field 100.000000\n", - "quota 100.000000\n", - "dtype: float64\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ididentifiernameextra_fieldquota
030849ab2791a14f5fc2bb4d87ab2b78bf6en nb entrées grNaNNaN
116425fd2f01984cc4ba030c1be98f42c33indiv activité enfantNaNNaN
2399244dd3738788db0d22a5d0afe687b69indiv activité grNaNNaN
311083edda20c877a93b5ff883827238eb711groupe forfait adulteNaNNaN
46ff48df4b2dd5a14116bf4d280b31621egroupe forfait entrées trNaNNaN
\n", - "
" - ], - "text/plain": [ - " id identifier name \\\n", - "0 30 849ab2791a14f5fc2bb4d87ab2b78bf6 en nb entrées gr \n", - "1 16 425fd2f01984cc4ba030c1be98f42c33 indiv activité enfant \n", - "2 39 9244dd3738788db0d22a5d0afe687b69 indiv activité gr \n", - "3 1108 3edda20c877a93b5ff883827238eb711 groupe forfait adulte \n", - "4 6 ff48df4b2dd5a14116bf4d280b31621e groupe forfait entrées tr \n", - "\n", - " extra_field quota \n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN " - ] - }, - "execution_count": 239, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = process_df(df)\n", "df.head()" @@ -4268,26 +820,10 @@ }, { "cell_type": "code", - "execution_count": 240, + "execution_count": null, "id": "c5f39cc9-dff8-452c-9a3e-9f7df81a8a19", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "identifier object\n", - "name object\n", - "extra_field float64\n", - "quota float64\n", - "dtype: object" - ] - }, - "execution_count": 240, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.dtypes" ] @@ -4326,7 +862,7 @@ }, { "cell_type": "code", - "execution_count": 241, + "execution_count": null, "id": "2d52d6da-cca5-4abd-be05-2f00fd3eca8e", "metadata": {}, "outputs": [], @@ -4336,169 +872,10 @@ }, { "cell_type": "code", - "execution_count": 242, + "execution_count": null, "id": "6cab507d-8b11-404d-9286-5cc205228af9", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1events.csv\n", - "Shape : (1232, 12)\n", - "Number of columns : 12\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcreated_atupdated_atseason_idfacility_idnameevent_type_idmanual_addedis_displayevent_type_key_idfacility_key_ididentifier
01922020-09-03 13:36:42.216991+02:002021-11-02 15:06:40.663219+01:00161frontières4FalseTrue41c1cecd093146068fd57896e254e98170
1303292023-11-04 02:50:34.602462+01:002023-11-04 02:52:26.138154+01:0027671visite guidée une autre histoire du monde (1h00)5FalseTrue51f510a6710878d7aca36e71c54abab525
21612020-09-03 13:29:27.944002+02:002021-11-02 15:06:40.652026+01:00161visite contée les chercheurs d'or indiv2FalseTrue2121177fa9acad1ae2b1f595690fb853d3
359572021-07-31 11:16:42.575583+02:002021-11-02 15:06:40.663219+01:005821we dreamt of utopia and we woke up screaming.4FalseTrue41962601f1eb153d45d49437f8fe839f7f
483372021-08-17 13:40:34.111923+02:002021-11-02 15:06:40.663219+01:005821jeff koons épisodes 44FalseTrue41bfa22f5a2364a2dacfc45cca1c8d3215
\n", - "
" - ], - "text/plain": [ - " id created_at updated_at \\\n", - "0 192 2020-09-03 13:36:42.216991+02:00 2021-11-02 15:06:40.663219+01:00 \n", - "1 30329 2023-11-04 02:50:34.602462+01:00 2023-11-04 02:52:26.138154+01:00 \n", - "2 161 2020-09-03 13:29:27.944002+02:00 2021-11-02 15:06:40.652026+01:00 \n", - "3 5957 2021-07-31 11:16:42.575583+02:00 2021-11-02 15:06:40.663219+01:00 \n", - "4 8337 2021-08-17 13:40:34.111923+02:00 2021-11-02 15:06:40.663219+01:00 \n", - "\n", - " season_id facility_id name \\\n", - "0 16 1 frontières \n", - "1 2767 1 visite guidée une autre histoire du monde (1h00) \n", - "2 16 1 visite contée les chercheurs d'or indiv \n", - "3 582 1 we dreamt of utopia and we woke up screaming. \n", - "4 582 1 jeff koons épisodes 4 \n", - "\n", - " event_type_id manual_added is_display event_type_key_id \\\n", - "0 4 False True 4 \n", - "1 5 False True 5 \n", - "2 2 False True 2 \n", - "3 4 False True 4 \n", - "4 4 False True 4 \n", - "\n", - " facility_key_id identifier \n", - "0 1 c1cecd093146068fd57896e254e98170 \n", - "1 1 f510a6710878d7aca36e71c54abab525 \n", - "2 1 21177fa9acad1ae2b1f595690fb853d3 \n", - "3 1 962601f1eb153d45d49437f8fe839f7f \n", - "4 1 bfa22f5a2364a2dacfc45cca1c8d3215 " - ] - }, - "execution_count": 242, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = display_databases(name_dataset)\n", "print(\"Number of columns : \", len(df.columns))\n", @@ -4507,162 +884,10 @@ }, { "cell_type": "code", - "execution_count": 243, + "execution_count": null, "id": "9fe57873-8108-44c9-b8a5-f58d3cbb6d17", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of columns : 10\n", - "Columns : Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n", - " 'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n", - " dtype='object')\n", - "Percent of NA for each column : id 0.000000\n", - "season_id 0.000000\n", - "facility_id 0.000000\n", - "event_type_id 0.000000\n", - "event_type_key_id 0.000000\n", - "facility_key_id 0.000000\n", - "identifier 0.000000\n", - "name 0.974026\n", - "manual_added 0.000000\n", - "is_display 0.000000\n", - "dtype: float64\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idseason_idfacility_idevent_type_idevent_type_key_idfacility_key_ididentifiernamemanual_addedis_display
0192161441c1cecd093146068fd57896e254e98170frontièresFalseTrue
13032927671551f510a6710878d7aca36e71c54abab525visite guidée une autre histoire du monde (1h00)FalseTrue
216116122121177fa9acad1ae2b1f595690fb853d3visite contée les chercheurs d'or indivFalseTrue
359575821441962601f1eb153d45d49437f8fe839f7fwe dreamt of utopia and we woke up screaming.FalseTrue
483375821441bfa22f5a2364a2dacfc45cca1c8d3215jeff koons épisodes 4FalseTrue
\n", - "
" - ], - "text/plain": [ - " id season_id facility_id event_type_id event_type_key_id \\\n", - "0 192 16 1 4 4 \n", - "1 30329 2767 1 5 5 \n", - "2 161 16 1 2 2 \n", - "3 5957 582 1 4 4 \n", - "4 8337 582 1 4 4 \n", - "\n", - " facility_key_id identifier \\\n", - "0 1 c1cecd093146068fd57896e254e98170 \n", - "1 1 f510a6710878d7aca36e71c54abab525 \n", - "2 1 21177fa9acad1ae2b1f595690fb853d3 \n", - "3 1 962601f1eb153d45d49437f8fe839f7f \n", - "4 1 bfa22f5a2364a2dacfc45cca1c8d3215 \n", - "\n", - " name manual_added is_display \n", - "0 frontières False True \n", - "1 visite guidée une autre histoire du monde (1h00) False True \n", - "2 visite contée les chercheurs d'or indiv False True \n", - "3 we dreamt of utopia and we woke up screaming. False True \n", - "4 jeff koons épisodes 4 False True " - ] - }, - "execution_count": 243, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = process_df(df)\n", "df.head()" @@ -4670,31 +895,10 @@ }, { "cell_type": "code", - "execution_count": 244, + "execution_count": null, "id": "7fd9e5bd-baac-4b3b-9ffb-5a9baa18399b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "season_id int64\n", - "facility_id int64\n", - "event_type_id int64\n", - "event_type_key_id int64\n", - "facility_key_id int64\n", - "identifier object\n", - "name object\n", - "manual_added bool\n", - "is_display bool\n", - "dtype: object" - ] - }, - "execution_count": 244, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.dtypes" ] @@ -4709,7 +913,7 @@ }, { "cell_type": "code", - "execution_count": 245, + "execution_count": null, "id": "90ab62d4-a086-4469-961c-67eefb375388", "metadata": {}, "outputs": [], @@ -4719,126 +923,10 @@ }, { "cell_type": "code", - "execution_count": 246, + "execution_count": null, "id": "58db1751-fd56-4c28-b49e-bc8235bb0dc8", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1event_types.csv\n", - "Shape : (9, 6)\n", - "Number of columns : 6\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreated_atupdated_atfidelity_delayidentifier
01standard2020-09-03 12:24:22.574262+02:002020-09-03 12:24:22.574262+02:0036c00f0c4675b91fb8b918e4079a0b1bac
166package2020-09-03 14:05:04.648137+02:002020-09-03 14:05:04.648137+02:0036efe90a8e604a7c840e88d03a67f6b7d8
283guide multimédias2020-09-03 14:15:17.252539+02:002020-09-03 14:15:17.252539+02:0036ee14c62b3b9f6c7dd5401685a18e4460
33non défini2020-09-03 13:11:23.117024+02:002020-09-03 13:11:23.117024+02:003652ff3466787b4d538407372e5f7afe0f
42723NaN2021-12-22 09:45:47.715105+01:002021-12-22 09:45:47.715105+01:0036d41d8cd98f00b204e9800998ecf8427e
\n", - "
" - ], - "text/plain": [ - " id name created_at \\\n", - "0 1 standard 2020-09-03 12:24:22.574262+02:00 \n", - "1 66 package 2020-09-03 14:05:04.648137+02:00 \n", - "2 83 guide multimédias 2020-09-03 14:15:17.252539+02:00 \n", - "3 3 non défini 2020-09-03 13:11:23.117024+02:00 \n", - "4 2723 NaN 2021-12-22 09:45:47.715105+01:00 \n", - "\n", - " updated_at fidelity_delay \\\n", - "0 2020-09-03 12:24:22.574262+02:00 36 \n", - "1 2020-09-03 14:05:04.648137+02:00 36 \n", - "2 2020-09-03 14:15:17.252539+02:00 36 \n", - "3 2020-09-03 13:11:23.117024+02:00 36 \n", - "4 2021-12-22 09:45:47.715105+01:00 36 \n", - "\n", - " identifier \n", - "0 c00f0c4675b91fb8b918e4079a0b1bac \n", - "1 efe90a8e604a7c840e88d03a67f6b7d8 \n", - "2 ee14c62b3b9f6c7dd5401685a18e4460 \n", - "3 52ff3466787b4d538407372e5f7afe0f \n", - "4 d41d8cd98f00b204e9800998ecf8427e " - ] - }, - "execution_count": 246, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = display_databases(name_dataset)\n", "print(\"Number of columns : \", len(df.columns))\n", @@ -4847,104 +935,10 @@ }, { "cell_type": "code", - "execution_count": 247, + "execution_count": null, "id": "ac93382c-0b5f-462d-8021-0dd1e7201b8c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of columns : 4\n", - "Columns : Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n", - "Percent of NA for each column : id 0.000000\n", - "fidelity_delay 0.000000\n", - "identifier 0.000000\n", - "name 11.111111\n", - "dtype: float64\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idfidelity_delayidentifiername
0136c00f0c4675b91fb8b918e4079a0b1bacstandard
16636efe90a8e604a7c840e88d03a67f6b7d8package
28336ee14c62b3b9f6c7dd5401685a18e4460guide multimédias
333652ff3466787b4d538407372e5f7afe0fnon défini
4272336d41d8cd98f00b204e9800998ecf8427eNaN
\n", - "
" - ], - "text/plain": [ - " id fidelity_delay identifier name\n", - "0 1 36 c00f0c4675b91fb8b918e4079a0b1bac standard\n", - "1 66 36 efe90a8e604a7c840e88d03a67f6b7d8 package\n", - "2 83 36 ee14c62b3b9f6c7dd5401685a18e4460 guide multimédias\n", - "3 3 36 52ff3466787b4d538407372e5f7afe0f non défini\n", - "4 2723 36 d41d8cd98f00b204e9800998ecf8427e NaN" - ] - }, - "execution_count": 247, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = process_df(df)\n", "df.head()" @@ -4952,25 +946,10 @@ }, { "cell_type": "code", - "execution_count": 248, + "execution_count": null, "id": "18cbd630-3c7d-49e1-932b-9460badf3758", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "fidelity_delay int64\n", - "identifier object\n", - "name object\n", - "dtype: object" - ] - }, - "execution_count": 248, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.dtypes" ] @@ -4985,7 +964,7 @@ }, { "cell_type": "code", - "execution_count": 249, + "execution_count": null, "id": "ae544dcc-f23d-4216-bb5b-597cc1b3765e", "metadata": {}, "outputs": [], @@ -4995,126 +974,10 @@ }, { "cell_type": "code", - "execution_count": 250, + "execution_count": null, "id": "1ac97963-9208-4329-be41-d71a5797487f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1seasons.csv\n", - "Shape : (13, 6)\n", - "Number of columns : 6\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreated_atupdated_atstart_date_timeidentifier
094320132021-07-29 08:55:33.282607+02:002021-07-29 08:55:33.282607+02:00NaN8038da89e49ac5eabb489cfc6cea9fc1
112920142020-09-03 15:13:08.105567+02:002020-09-03 15:13:08.105567+02:00NaNcee8d6b7ce52554fd70354e37bbf44a2
2320152020-09-03 13:11:19.405037+02:002020-09-03 13:11:19.405037+02:00NaN65d2ea03425887a717c435081cfc5dbb
3220162020-09-03 13:11:19.401001+02:002020-09-03 13:11:19.401001+02:00NaN95192c98732387165bf8e396c0f2dad2
4420172020-09-03 13:11:19.409005+02:002020-09-03 13:11:19.409005+02:00NaN8d8818c8e140c64c743113f563cf750f
\n", - "
" - ], - "text/plain": [ - " id name created_at \\\n", - "0 943 2013 2021-07-29 08:55:33.282607+02:00 \n", - "1 129 2014 2020-09-03 15:13:08.105567+02:00 \n", - "2 3 2015 2020-09-03 13:11:19.405037+02:00 \n", - "3 2 2016 2020-09-03 13:11:19.401001+02:00 \n", - "4 4 2017 2020-09-03 13:11:19.409005+02:00 \n", - "\n", - " updated_at start_date_time \\\n", - "0 2021-07-29 08:55:33.282607+02:00 NaN \n", - "1 2020-09-03 15:13:08.105567+02:00 NaN \n", - "2 2020-09-03 13:11:19.405037+02:00 NaN \n", - "3 2020-09-03 13:11:19.401001+02:00 NaN \n", - "4 2020-09-03 13:11:19.409005+02:00 NaN \n", - "\n", - " identifier \n", - "0 8038da89e49ac5eabb489cfc6cea9fc1 \n", - "1 cee8d6b7ce52554fd70354e37bbf44a2 \n", - "2 65d2ea03425887a717c435081cfc5dbb \n", - "3 95192c98732387165bf8e396c0f2dad2 \n", - "4 8d8818c8e140c64c743113f563cf750f " - ] - }, - "execution_count": 250, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = display_databases(name_dataset)\n", "print(\"Number of columns : \", len(df.columns))\n", @@ -5123,104 +986,10 @@ }, { "cell_type": "code", - "execution_count": 251, + "execution_count": null, "id": "b4593d46-105c-47dd-aa71-babd8e63e65b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of columns : 4\n", - "Columns : Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n", - "Percent of NA for each column : id 0.000000\n", - "identifier 0.000000\n", - "name 7.692308\n", - "start_date_time 100.000000\n", - "dtype: float64\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ididentifiernamestart_date_time
09438038da89e49ac5eabb489cfc6cea9fc12013NaN
1129cee8d6b7ce52554fd70354e37bbf44a22014NaN
2365d2ea03425887a717c435081cfc5dbb2015NaN
3295192c98732387165bf8e396c0f2dad22016NaN
448d8818c8e140c64c743113f563cf750f2017NaN
\n", - "
" - ], - "text/plain": [ - " id identifier name start_date_time\n", - "0 943 8038da89e49ac5eabb489cfc6cea9fc1 2013 NaN\n", - "1 129 cee8d6b7ce52554fd70354e37bbf44a2 2014 NaN\n", - "2 3 65d2ea03425887a717c435081cfc5dbb 2015 NaN\n", - "3 2 95192c98732387165bf8e396c0f2dad2 2016 NaN\n", - "4 4 8d8818c8e140c64c743113f563cf750f 2017 NaN" - ] - }, - "execution_count": 251, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = process_df(df)\n", "df.head()" @@ -5228,25 +997,10 @@ }, { "cell_type": "code", - "execution_count": 252, + "execution_count": null, "id": "5d3b096d-8e73-4514-94e5-f2dcd4d0a89c", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "identifier object\n", - "name object\n", - "start_date_time float64\n", - "dtype: object" - ] - }, - "execution_count": 252, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.dtypes" ] @@ -5261,7 +1015,7 @@ }, { "cell_type": "code", - "execution_count": 253, + "execution_count": null, "id": "d95ef015-d44c-4353-8761-771b910d21c9", "metadata": {}, "outputs": [], @@ -5271,93 +1025,10 @@ }, { "cell_type": "code", - "execution_count": 254, + "execution_count": null, "id": "ef5fe794-8df7-4f27-8554-ecdc4074ac0b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1facilities.csv\n", - "Shape : (2, 7)\n", - "Number of columns : 7\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreated_atupdated_atstreet_idfixed_capacityidentifier
02non défini2020-09-03 13:16:35.293111+02:002020-09-03 13:16:35.293111+02:002NaN52ff3466787b4d538407372e5f7afe0f
11mucem2020-09-03 13:11:23.133059+02:002020-09-03 13:11:23.133059+02:001NaN702bd76fe3dd5dbcf118a6965a946f54
\n", - "
" - ], - "text/plain": [ - " id name created_at \\\n", - "0 2 non défini 2020-09-03 13:16:35.293111+02:00 \n", - "1 1 mucem 2020-09-03 13:11:23.133059+02:00 \n", - "\n", - " updated_at street_id fixed_capacity \\\n", - "0 2020-09-03 13:16:35.293111+02:00 2 NaN \n", - "1 2020-09-03 13:11:23.133059+02:00 1 NaN \n", - "\n", - " identifier \n", - "0 52ff3466787b4d538407372e5f7afe0f \n", - "1 702bd76fe3dd5dbcf118a6965a946f54 " - ] - }, - "execution_count": 254, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = display_databases(name_dataset)\n", "print(\"Number of columns : \", len(df.columns))\n", @@ -5366,84 +1037,10 @@ }, { "cell_type": "code", - "execution_count": 255, + "execution_count": null, "id": "e3621201-fab9-49fd-95c1-0b9d5da76e50", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of columns : 5\n", - "Columns : Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n", - "Percent of NA for each column : id 0.0\n", - "street_id 0.0\n", - "identifier 0.0\n", - "name 0.0\n", - "fixed_capacity 100.0\n", - "dtype: float64\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idstreet_ididentifiernamefixed_capacity
02252ff3466787b4d538407372e5f7afe0fnon définiNaN
111702bd76fe3dd5dbcf118a6965a946f54mucemNaN
\n", - "
" - ], - "text/plain": [ - " id street_id identifier name fixed_capacity\n", - "0 2 2 52ff3466787b4d538407372e5f7afe0f non défini NaN\n", - "1 1 1 702bd76fe3dd5dbcf118a6965a946f54 mucem NaN" - ] - }, - "execution_count": 255, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = process_df(df)\n", "df.head()" @@ -5451,26 +1048,10 @@ }, { "cell_type": "code", - "execution_count": 256, + "execution_count": null, "id": "1b198b92-8654-4531-a0dd-8f2e01c2e6c1", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "street_id int64\n", - "identifier object\n", - "name object\n", - "fixed_capacity float64\n", - "dtype: object" - ] - }, - "execution_count": 256, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.dtypes" ] @@ -5485,7 +1066,7 @@ }, { "cell_type": "code", - "execution_count": 257, + "execution_count": null, "id": "43576244-c8cf-4ca0-b056-7aea1fbf0bc7", "metadata": {}, "outputs": [], @@ -5500,7 +1081,7 @@ }, { "cell_type": "code", - "execution_count": 258, + "execution_count": null, "id": "0fad097e-474c-4af7-b1e1-7d8dda3f09ea", "metadata": {}, "outputs": [], @@ -5510,1107 +1091,71 @@ " df = process_df_2(df)\n", " # drop na :\n", " #df = df.dropna(axis=1, thresh=len(df))\n", - " df = df.drop(columns = 'identifier')\n", + " # if identifier in table : delete it\n", + " if 'identifier' in df.columns:\n", + " df = df.drop(columns = 'identifier')\n", " return df" ] }, { "cell_type": "markdown", - "id": "58ad68ff-3e20-4c4a-a122-3694670cbd28", + "id": "b60034ef-fdd6-4640-a012-cf74c17b333f", "metadata": {}, "source": [ - "Merge between products and categories is useless as the relevant columns in categories are full of NA. Is it true for all companies ?" + "### Products Table" ] }, { "cell_type": "code", - "execution_count": 259, - "id": "a28e2269-4825-4ca1-886b-d425ace118ac", + "execution_count": null, + "id": "6213b1eb-c5f8-49dd-ab69-366542380e80", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1products.csv\n", - "Shape : (94803, 14)\n", - "Number of columns : 12\n", - "Columns : Index(['id', 'representation_id', 'pricing_formula_id', 'category_id',\n", - " 'products_group_id', 'product_pack_id', 'identifier', 'amount',\n", - " 'is_full_price', 'apply_price', 'extra_field', 'amount_consumption'],\n", - " dtype='object')\n", - "File path : bdc2324-data/1/1categories.csv\n", - "Shape : (27, 7)\n", - "Number of columns : 5\n", - "Columns : Index(['id', 'identifier', 'name', 'extra_field', 'quota'], dtype='object')\n" - ] - } - ], + "outputs": [], "source": [ - "# products + categories\n", + "def create_products_table():\n", + " # first merge products and categories\n", + " print(\"first merge products and categories\")\n", + " products = load_dataset(\"1products.csv\")\n", + " categories = load_dataset(\"1categories.csv\")\n", + " products_theme = products.merge(categories, how = 'left', left_on = 'category_id',\n", + " right_on = 'id', suffixes=('_products', '_categories'))\n", + " products_theme = products_theme.rename(columns = {\"name\" : \"name_categories\"})\n", + " # Second merge products_theme and type of categories\n", + " print(\"Second merge products_theme and type of categories\")\n", + " type_of_categories = load_dataset(\"1type_of_categories.csv\")\n", + " type_of_categories = type_of_categories.drop(columns = 'id')\n", + " products_theme = products_theme.merge(type_of_categories, how = 'left', left_on = 'category_id',\n", + " right_on = 'category_id' )\n", "\n", - "products = load_dataset(\"1products.csv\")\n", - "categories = load_dataset(\"1categories.csv\")\n", + " # Index cleaning\n", + " products_theme = products_theme.drop(columns = ['id_categories'])\n", + " products_theme = order_columns_id(products_theme)\n", "\n", - "products_theme = products.merge(categories, how = 'left', left_on = 'category_id', right_on = 'id', suffixes=('_products', '_categories'))" + " return products_theme" ] }, { "cell_type": "code", - "execution_count": 260, - "id": "4c926d7b-fa74-4cdb-aecb-79e8fd1fdcbc", + "execution_count": null, + "id": "b853e020-f73d-44e8-b086-e5548ce21011", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_productsrepresentation_idpricing_formula_idcategory_idproducts_group_idproduct_pack_idamountis_full_priceapply_priceextra_field_productsamount_consumptionid_categoriesnameextra_field_categoriesquota
010682914114411065519.0False0.0NaNNaN41indiv activité trNaNNaN
1478273131147119.5False0.0NaNNaN1indiv entrées tpNaNNaN
220873275137120825111.5False0.0NaNNaN1indiv entrées tpNaNNaN
3157142825199515677318.0False0.0NaNNaN5indiv entrées trNaNNaN
413419931117518.5False0.0NaNNaN1indiv entrées tpNaNNaN
\n", - "
" - ], - "text/plain": [ - " id_products representation_id pricing_formula_id category_id \\\n", - "0 10682 914 114 41 \n", - "1 478 273 131 1 \n", - "2 20873 275 137 1 \n", - "3 157142 82519 9 5 \n", - "4 1341 9 93 1 \n", - "\n", - " products_group_id product_pack_id amount is_full_price apply_price \\\n", - "0 10655 1 9.0 False 0.0 \n", - "1 471 1 9.5 False 0.0 \n", - "2 20825 1 11.5 False 0.0 \n", - "3 156773 1 8.0 False 0.0 \n", - "4 1175 1 8.5 False 0.0 \n", - "\n", - " extra_field_products amount_consumption id_categories name \\\n", - "0 NaN NaN 41 indiv activité tr \n", - "1 NaN NaN 1 indiv entrées tp \n", - "2 NaN NaN 1 indiv entrées tp \n", - "3 NaN NaN 5 indiv entrées tr \n", - "4 NaN NaN 1 indiv entrées tp \n", - "\n", - " extra_field_categories quota \n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN " - ] - }, - "execution_count": 260, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ + "products_theme = create_products_table()\n", "products_theme.head()" ] }, { - "cell_type": "code", - "execution_count": 261, - "id": "4e60911d-6aad-4350-b210-f007a85b8638", + "cell_type": "markdown", + "id": "8bd7b7ab-fd04-48d2-898b-48c5815457f3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1type_of_categories.csv\n", - "Shape : (5, 6)\n", - "Number of columns : 4\n", - "Columns : Index(['id', 'type_of_id', 'category_id', 'identifier'], dtype='object')\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
type_of_idcategory_id
01226
11215
21228
3121
4123
\n", - "
" - ], - "text/plain": [ - " type_of_id category_id\n", - "0 12 26\n", - "1 12 15\n", - "2 12 28\n", - "3 12 1\n", - "4 12 3" - ] - }, - "execution_count": 261, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "# products_theme + type_of_categories\n", - "\n", - "type_of_categories = load_dataset(\"1type_of_categories.csv\")\n", - "type_of_categories = type_of_categories.drop(columns = 'id')\n", - "type_of_categories" + "### Events Table" ] }, { "cell_type": "code", - "execution_count": 262, - "id": "af9bca6c-3616-4b6d-8471-3dcdc62fc9bf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_productsrepresentation_idpricing_formula_idcategory_idproducts_group_idproduct_pack_idamountis_full_priceapply_priceextra_field_productsamount_consumptionid_categoriesnameextra_field_categoriesquotatype_of_id
010682914114411065519.0False0.0NaNNaN41indiv activité trNaNNaNNaN
1478273131147119.5False0.0NaNNaN1indiv entrées tpNaNNaN12.0
220873275137120825111.5False0.0NaNNaN1indiv entrées tpNaNNaN12.0
3157142825199515677318.0False0.0NaNNaN5indiv entrées trNaNNaNNaN
413419931117518.5False0.0NaNNaN1indiv entrées tpNaNNaN12.0
\n", - "
" - ], - "text/plain": [ - " id_products representation_id pricing_formula_id category_id \\\n", - "0 10682 914 114 41 \n", - "1 478 273 131 1 \n", - "2 20873 275 137 1 \n", - "3 157142 82519 9 5 \n", - "4 1341 9 93 1 \n", - "\n", - " products_group_id product_pack_id amount is_full_price apply_price \\\n", - "0 10655 1 9.0 False 0.0 \n", - "1 471 1 9.5 False 0.0 \n", - "2 20825 1 11.5 False 0.0 \n", - "3 156773 1 8.0 False 0.0 \n", - "4 1175 1 8.5 False 0.0 \n", - "\n", - " extra_field_products amount_consumption id_categories name \\\n", - "0 NaN NaN 41 indiv activité tr \n", - "1 NaN NaN 1 indiv entrées tp \n", - "2 NaN NaN 1 indiv entrées tp \n", - "3 NaN NaN 5 indiv entrées tr \n", - "4 NaN NaN 1 indiv entrées tp \n", - "\n", - " extra_field_categories quota type_of_id \n", - "0 NaN NaN NaN \n", - "1 NaN NaN 12.0 \n", - "2 NaN NaN 12.0 \n", - "3 NaN NaN NaN \n", - "4 NaN NaN 12.0 " - ] - }, - "execution_count": 262, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "products_theme = products_theme.merge(type_of_categories, how = 'left', left_on = 'category_id', right_on = 'category_id' )\n", - "products_theme.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 268, - "id": "c631a8ce-f38c-433d-ab0e-17c10cc5894c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1events.csv\n", - "Shape : (1232, 12)\n", - "Number of columns : 10\n", - "Columns : Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n", - " 'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n", - " dtype='object')\n", - "File path : bdc2324-data/1/1seasons.csv\n", - "Shape : (13, 6)\n", - "Number of columns : 4\n", - "Columns : Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_eventsseason_idfacility_idevent_type_idevent_type_key_idfacility_key_idname_eventsmanual_addedis_displayid_seasonsname_seasonsstart_date_time
0192161441frontièresFalseTrue162018NaN
13032927671551visite guidée une autre histoire du monde (1h00)FalseTrue27672023NaN
2161161221visite contée les chercheurs d'or indivFalseTrue162018NaN
359575821441we dreamt of utopia and we woke up screaming.FalseTrue5822021NaN
483375821441jeff koons épisodes 4FalseTrue5822021NaN
\n", - "
" - ], - "text/plain": [ - " id_events season_id facility_id event_type_id event_type_key_id \\\n", - "0 192 16 1 4 4 \n", - "1 30329 2767 1 5 5 \n", - "2 161 16 1 2 2 \n", - "3 5957 582 1 4 4 \n", - "4 8337 582 1 4 4 \n", - "\n", - " facility_key_id name_events \\\n", - "0 1 frontières \n", - "1 1 visite guidée une autre histoire du monde (1h00) \n", - "2 1 visite contée les chercheurs d'or indiv \n", - "3 1 we dreamt of utopia and we woke up screaming. \n", - "4 1 jeff koons épisodes 4 \n", - "\n", - " manual_added is_display id_seasons name_seasons start_date_time \n", - "0 False True 16 2018 NaN \n", - "1 False True 2767 2023 NaN \n", - "2 False True 16 2018 NaN \n", - "3 False True 582 2021 NaN \n", - "4 False True 582 2021 NaN " - ] - }, - "execution_count": 268, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# events + seasons\n", - "\n", - "events = load_dataset(\"1events.csv\")\n", - "seasons = load_dataset(\"1seasons.csv\")\n", - "\n", - "events_theme = events.merge(seasons, how = 'left', left_on = 'season_id', right_on = 'id', suffixes=('_events', '_seasons'))\n", - "\n", - "events_theme.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 270, - "id": "83a166c4-f2e1-4af3-9e60-74fbac829bf3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1event_types.csv\n", - "Shape : (9, 6)\n", - "Number of columns : 4\n", - "Columns : Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n", - "Columns events_theme : Index(['id_events', 'season_id', 'facility_id', 'event_type_id',\n", - " 'event_type_key_id', 'facility_key_id', 'name_events', 'manual_added',\n", - " 'is_display', 'id_seasons', 'name_seasons', 'start_date_time', 'id',\n", - " 'fidelity_delay', 'name_event_types'],\n", - " dtype='object')\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_eventsseason_idfacility_idevent_type_idevent_type_key_idfacility_key_idname_eventsmanual_addedis_displayid_seasonsname_seasonsstart_date_timefidelity_delayname_event_types
0192161441frontièresFalseTrue162018NaN36spectacle vivant
13032927671551visite guidée une autre histoire du monde (1h00)FalseTrue27672023NaN36offre muséale groupe
2161161221visite contée les chercheurs d'or indivFalseTrue162018NaN36offre muséale individuel
359575821441we dreamt of utopia and we woke up screaming.FalseTrue5822021NaN36spectacle vivant
483375821441jeff koons épisodes 4FalseTrue5822021NaN36spectacle vivant
\n", - "
" - ], - "text/plain": [ - " id_events season_id facility_id event_type_id event_type_key_id \\\n", - "0 192 16 1 4 4 \n", - "1 30329 2767 1 5 5 \n", - "2 161 16 1 2 2 \n", - "3 5957 582 1 4 4 \n", - "4 8337 582 1 4 4 \n", - "\n", - " facility_key_id name_events \\\n", - "0 1 frontières \n", - "1 1 visite guidée une autre histoire du monde (1h00) \n", - "2 1 visite contée les chercheurs d'or indiv \n", - "3 1 we dreamt of utopia and we woke up screaming. \n", - "4 1 jeff koons épisodes 4 \n", - "\n", - " manual_added is_display id_seasons name_seasons start_date_time \\\n", - "0 False True 16 2018 NaN \n", - "1 False True 2767 2023 NaN \n", - "2 False True 16 2018 NaN \n", - "3 False True 582 2021 NaN \n", - "4 False True 582 2021 NaN \n", - "\n", - " fidelity_delay name_event_types \n", - "0 36 spectacle vivant \n", - "1 36 offre muséale groupe \n", - "2 36 offre muséale individuel \n", - "3 36 spectacle vivant \n", - "4 36 spectacle vivant " - ] - }, - "execution_count": 270, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# events_theme + event_types\n", - "\n", - "event_types = load_dataset(\"1event_types.csv\")\n", - "\n", - "events_theme = events_theme.merge(event_types, how = 'left', left_on = 'event_type_id', right_on = 'id', suffixes=('_events', '_event_type'))\n", - "events_theme = events_theme.rename(columns = {\"name\" : \"name_event_types\"})\n", - "print(\"Columns events_theme : \", events_theme.columns)\n", - "events_theme = events_theme.drop(columns = 'id')\n", - "events_theme.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 271, - "id": "c1734e4b-ba23-4921-b80d-471057373f43", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : bdc2324-data/1/1facilities.csv\n", - "Shape : (2, 7)\n", - "Number of columns : 5\n", - "Columns : Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n" - ] - } - ], - "source": [ - "facilities = load_dataset(\"1facilities.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 272, - "id": "fd930794-e3ad-46f8-aa55-3b1fc3cea64d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Columns events_theme : Index(['id_events', 'season_id', 'facility_id', 'event_type_id',\n", - " 'event_type_key_id', 'facility_key_id', 'name_events', 'manual_added',\n", - " 'is_display', 'id_seasons', 'name_seasons', 'start_date_time',\n", - " 'fidelity_delay', 'name_event_types', 'id', 'street_id', 'name',\n", - " 'fixed_capacity'],\n", - " dtype='object')\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_eventsseason_idfacility_idevent_type_idevent_type_key_idfacility_key_idname_eventsmanual_addedis_displayid_seasonsname_seasonsstart_date_timefidelity_delayname_event_typesstreet_idnamefixed_capacity
0192161441frontièresFalseTrue162018NaN36spectacle vivant1mucemNaN
13032927671551visite guidée une autre histoire du monde (1h00)FalseTrue27672023NaN36offre muséale groupe1mucemNaN
2161161221visite contée les chercheurs d'or indivFalseTrue162018NaN36offre muséale individuel1mucemNaN
359575821441we dreamt of utopia and we woke up screaming.FalseTrue5822021NaN36spectacle vivant1mucemNaN
483375821441jeff koons épisodes 4FalseTrue5822021NaN36spectacle vivant1mucemNaN
\n", - "
" - ], - "text/plain": [ - " id_events season_id facility_id event_type_id event_type_key_id \\\n", - "0 192 16 1 4 4 \n", - "1 30329 2767 1 5 5 \n", - "2 161 16 1 2 2 \n", - "3 5957 582 1 4 4 \n", - "4 8337 582 1 4 4 \n", - "\n", - " facility_key_id name_events \\\n", - "0 1 frontières \n", - "1 1 visite guidée une autre histoire du monde (1h00) \n", - "2 1 visite contée les chercheurs d'or indiv \n", - "3 1 we dreamt of utopia and we woke up screaming. \n", - "4 1 jeff koons épisodes 4 \n", - "\n", - " manual_added is_display id_seasons name_seasons start_date_time \\\n", - "0 False True 16 2018 NaN \n", - "1 False True 2767 2023 NaN \n", - "2 False True 16 2018 NaN \n", - "3 False True 582 2021 NaN \n", - "4 False True 582 2021 NaN \n", - "\n", - " fidelity_delay name_event_types street_id name fixed_capacity \n", - "0 36 spectacle vivant 1 mucem NaN \n", - "1 36 offre muséale groupe 1 mucem NaN \n", - "2 36 offre muséale individuel 1 mucem NaN \n", - "3 36 spectacle vivant 1 mucem NaN \n", - "4 36 spectacle vivant 1 mucem NaN " - ] - }, - "execution_count": 272, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "events_theme = events_theme.merge(facilities, how = 'left', left_on = 'facility_id', right_on = 'id', suffixes=('_events', '_facility'))\n", - "print(\"Columns events_theme : \", events_theme.columns)\n", - "events_theme = events_theme.drop(columns = 'id')\n", - "events_theme.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 275, + "execution_count": null, "id": "6ed0ad20-8315-4112-9a85-10e5f04ef852", "metadata": {}, "outputs": [], @@ -6636,234 +1181,70 @@ " events_theme = events_theme.merge(facilities, how = 'left', left_on = 'facility_id', right_on = 'id', suffixes=('_events', '_facility'))\n", " events_theme = events_theme.rename(columns = {\"name\" : \"name_facilties\"})\n", " events_theme = events_theme.drop(columns = 'id')\n", - " \n", + "\n", + " # Index cleaning\n", + " events_theme = events_theme.drop(columns = ['id_seasons'])\n", + " events_theme = order_columns_id(events_theme)\n", " return events_theme" ] }, { "cell_type": "code", - "execution_count": 276, + "execution_count": null, "id": "98ef0636-8c45-4a23-a62a-1fbe1544f8ce", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "first merge events and seasons : \n", - "File path : bdc2324-data/1/1events.csv\n", - "Shape : (1232, 12)\n", - "Number of columns : 10\n", - "Columns : Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n", - " 'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n", - " dtype='object')\n", - "File path : bdc2324-data/1/1seasons.csv\n", - "Shape : (13, 6)\n", - "Number of columns : 4\n", - "Columns : Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n", - "Secondly merge events_theme and event_types : \n", - "File path : bdc2324-data/1/1event_types.csv\n", - "Shape : (9, 6)\n", - "Number of columns : 4\n", - "Columns : Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n", - "thirdly merge events_theme and facilities : \n", - "File path : bdc2324-data/1/1facilities.csv\n", - "Shape : (2, 7)\n", - "Number of columns : 5\n", - "Columns : Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
id_eventsseason_idfacility_idevent_type_idevent_type_key_idfacility_key_idname_eventsmanual_addedis_displayid_seasonsname_seasonsstart_date_timefidelity_delayname_event_typesstreet_idname_faciltiesfixed_capacity
0192161441frontièresFalseTrue162018NaN36spectacle vivant1mucemNaN
13032927671551visite guidée une autre histoire du monde (1h00)FalseTrue27672023NaN36offre muséale groupe1mucemNaN
2161161221visite contée les chercheurs d'or indivFalseTrue162018NaN36offre muséale individuel1mucemNaN
359575821441we dreamt of utopia and we woke up screaming.FalseTrue5822021NaN36spectacle vivant1mucemNaN
483375821441jeff koons épisodes 4FalseTrue5822021NaN36spectacle vivant1mucemNaN
\n", - "
" - ], - "text/plain": [ - " id_events season_id facility_id event_type_id event_type_key_id \\\n", - "0 192 16 1 4 4 \n", - "1 30329 2767 1 5 5 \n", - "2 161 16 1 2 2 \n", - "3 5957 582 1 4 4 \n", - "4 8337 582 1 4 4 \n", - "\n", - " facility_key_id name_events \\\n", - "0 1 frontières \n", - "1 1 visite guidée une autre histoire du monde (1h00) \n", - "2 1 visite contée les chercheurs d'or indiv \n", - "3 1 we dreamt of utopia and we woke up screaming. \n", - "4 1 jeff koons épisodes 4 \n", - "\n", - " manual_added is_display id_seasons name_seasons start_date_time \\\n", - "0 False True 16 2018 NaN \n", - "1 False True 2767 2023 NaN \n", - "2 False True 16 2018 NaN \n", - "3 False True 582 2021 NaN \n", - "4 False True 582 2021 NaN \n", - "\n", - " fidelity_delay name_event_types street_id name_facilties \\\n", - "0 36 spectacle vivant 1 mucem \n", - "1 36 offre muséale groupe 1 mucem \n", - "2 36 offre muséale individuel 1 mucem \n", - "3 36 spectacle vivant 1 mucem \n", - "4 36 spectacle vivant 1 mucem \n", - "\n", - " fixed_capacity \n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN " - ] - }, - "execution_count": 276, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "df = create_events_table()\n", - "df.head()" + "events_theme= create_events_table()\n", + "events_theme.head()" ] + }, + { + "cell_type": "markdown", + "id": "4ad5b680-bb27-4f86-a5f3-7ff4fd1be96a", + "metadata": {}, + "source": [ + "## Representations_Table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "481dddd6-80a8-4b9e-a05e-ed06fa3ed7a6", + "metadata": {}, + "outputs": [], + "source": [ + "def create_representations_table():\n", + " representations = load_dataset(\"1representations.csv\")\n", + " representations_capacity = load_dataset(\"1representation_category_capacities.csv\")\n", + "\n", + " representations_theme = representations.merge(representations_capacity, how='left',\n", + " left_on='id', right_on='representation_id',\n", + " suffixes=('_representation', '_representation_cap'))\n", + " # index cleaning\n", + " representations_theme = representations_theme.drop(columns = [\"representation_id\"])\n", + " representations_theme = order_columns_id(representations_theme)\n", + " return representations_theme" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "677f4ed8-ef58-45f2-9056-ede0898c6a64", + "metadata": {}, + "outputs": [], + "source": [ + "rep = create_representations_table()\n", + "rep.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b26f4e7e-134d-4e32-a615-4b0e6bb80b25", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {