# Business Data Challenge - Team 1 import pandas as pd import numpy as np import os import s3fs import re import warnings from datetime import date, timedelta, datetime # Create filesystem object S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"] fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL}) # Import KPI construction functions exec(open('0_KPI_functions.py').read()) # Ignore warning warnings.filterwarnings('ignore') # functions def generate_test_set(): file_path_list = fs.ls("projet-bdc2324-team1/Generalization/sport/Test_set") test_set = pd.DataFrame() for file in file_path_list: print(file) with fs.open(file, mode="rb") as file_in: df = pd.read_csv(file_in, sep=",") test_set = pd.concat([test_set, df], ignore_index = True) return test_set def generate_train_set(): file_path_list = fs.ls("projet-bdc2324-team1/Generalization/sport/Train_set") train_set = pd.DataFrame() for file in file_path_list: print(file) with fs.open(file, mode="rb") as file_in: df = pd.read_csv(file_in, sep=",") train_set = pd.concat([test_set, df], ignore_index = True) return train_set type_of_comp = input('Choisissez le type de compagnie : sport ? musique ? musee ?') BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{type_of_comp}/' # create test and train datasets test_set = generate_test_set() train_set = generate_train_set() # Exportation test set FILE_KEY_OUT_S3 = "Test_set.csv" FILE_PATH_OUT_S3 = BUCKET_OUT + FILE_KEY_OUT_S3 with fs.open(FILE_PATH_OUT_S3, 'w') as file_out: test_set.to_csv(file_out, index = False) print("Exportation dataset test : SUCCESS") # Exportation train set FILE_KEY_OUT_S3 = "Train_set.csv" FILE_PATH_OUT_S3 = BUCKET_OUT + FILE_KEY_OUT_S3 with fs.open(FILE_PATH_OUT_S3, 'w') as file_out: train_set.to_csv(file_out, index = False) print("Exportation dataset train : SUCCESS")