{ "cells": [ { "cell_type": "markdown", "id": "3f41343f-7205-41d9-89dd-88039e301413", "metadata": {}, "source": [ "# Statistiques descriptives" ] }, { "cell_type": "code", "execution_count": 2, "id": "abfaf341-7b35-4407-9133-d21336c04027", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "import s3fs\n", "import re" ] }, { "cell_type": "code", "execution_count": 3, "id": "7fb72fa3-7940-496f-ac78-c2837f65eefa", "metadata": {}, "outputs": [], "source": [ "# Access Key to Minio\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},key ='WKTGH4YGUBAT3TR0OSUR', secret = 'g8ozi6ZUrBy8DzaAip4F7zOizbr4DKf4RgYNseqU', token = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJXS1RHSDRZR1VCQVQzVFIwT1NVUiIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiaHR0cHM6Ly9vbnl4aWEubGFiLmdyb3VwZS1nZW5lcy5mciJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTcwNzU4NjUwMCwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJhbnRvaW5lLmpvdWJyZWxAZW5zYWUuZnIiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiZXhwIjoxNzA3NjczMDQ3LCJmYW1pbHlfbmFtZSI6IkpPVUJSRUwiLCJnaXZlbl9uYW1lIjoiQW50b2luZSIsImdyb3VwcyI6WyJiZGMyMzI0LXRlYW0xIl0sImlhdCI6MTcwNzU4NjY0NywiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiI1MjQ2MDZmMS1lYWM3LTQxZDgtYTEzMy04MGZjMDk0MGVlNzEiLCJuYW1lIjoiQW50b2luZSBKT1VCUkVMIiwicG9saWN5Ijoic3Rzb25seSIsInByZWZlcnJlZF91c2VybmFtZSI6ImFqb3VicmVsLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNlc3Npb25fc3RhdGUiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzaWQiOiI1OTk2MWNkYy0xNmFiLTQ4MTAtYWE4Zi1iZGUyMjkwNjhiNzUiLCJzdWIiOiIwNWYwZDk3Mi1jNWM4LTQyNmYtODAwZC00NmQ0OGU4NjkwMzUiLCJ0eXAiOiJCZWFyZXIifQ.-imw-N4bk1uCcQGobkxhsRoeBAqxC9rT7PifElbC7ODOStnwIulc7HRR2fmtiqI2PdyrfnVvzfmIPK1g056HbA')" ] }, { "cell_type": "markdown", "id": "45d5261f-4d46-49cb-8582-dd2121122b05", "metadata": {}, "source": [ "# 1 - Comportement d'achat" ] }, { "cell_type": "code", "execution_count": 11, "id": "9376af51-4320-44b6-8f30-1e1234371556", "metadata": {}, "outputs": [], "source": [ "# Chargement des données temporaires\n", "BUCKET = \"projet-bdc2324-team1\"\n", "FILE_KEY_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n", "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", "\n", "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", " tickets_kpi = pd.read_csv(file_in, sep=\",\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "1855dcca-cfce-4c54-90ae-55d9a1ab5d45", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Unnamed: 0 | \n", "customer_id | \n", "event_type_id | \n", "nb_tickets | \n", "total_amount | \n", "nb_suppliers | \n", "vente_internet_max | \n", "purchase_date_min | \n", "purchase_date_max | \n", "time_between_purchase | \n", "nb_tickets_internet | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "1 | \n", "2 | \n", "384226 | \n", "2686540.5 | \n", "7 | \n", "1 | \n", "2014-12-03 14:55:37+00:00 | \n", "2023-11-04 15:12:16+00:00 | \n", "3258 days 00:16:39 | \n", "51.0 | \n", "
| 1 | \n", "1 | \n", "1 | \n", "4 | \n", "453242 | \n", "3248965.5 | \n", "6 | \n", "1 | \n", "2013-09-23 14:45:01+00:00 | \n", "2023-11-03 14:11:01+00:00 | \n", "3692 days 23:26:00 | \n", "2988.0 | \n", "
| 2 | \n", "2 | \n", "1 | \n", "5 | \n", "201750 | \n", "1459190.0 | \n", "6 | \n", "1 | \n", "2013-06-10 10:37:58+00:00 | \n", "2023-11-08 15:59:45+00:00 | \n", "3803 days 05:21:47 | \n", "9.0 | \n", "
| 3 | \n", "3 | \n", "1 | \n", "6 | \n", "217356 | \n", "1435871.5 | \n", "5 | \n", "1 | \n", "2017-01-01 02:20:08+00:00 | \n", "2019-12-31 02:20:06+00:00 | \n", "1093 days 23:59:58 | \n", "5.0 | \n", "
| 4 | \n", "4 | \n", "2 | \n", "2 | \n", "143 | \n", "0.0 | \n", "1 | \n", "0 | \n", "2018-04-07 12:55:07+00:00 | \n", "2020-03-08 12:06:43+00:00 | \n", "700 days 23:11:36 | \n", "0.0 | \n", "