init full modelization
This commit is contained in:
parent
83a3c039ec
commit
14423b1d34
219
Sport/Modelization/3_full_modelization_sport.ipynb
Normal file
219
Sport/Modelization/3_full_modelization_sport.ipynb
Normal file
|
@ -0,0 +1,219 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff8cc602-e733-4a31-bf46-a31087511fe0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Predict sales - sports companies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "035dacc7-85db-47c9-b350-5ce54a2b5cdd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6666666666666666\n",
|
||||
"0.5\n",
|
||||
"0.6666666666666666 0.5714285714285715\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
|
||||
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
|
||||
"\n",
|
||||
"y_test, y_pred = [1,1,0], [1,0,0]\n",
|
||||
"print(accuracy_score(y_test, y_pred))\n",
|
||||
"print(recall_score(y_test, y_pred))\n",
|
||||
"print(f1_score(y_test, y_pred), 2 * accuracy_score(y_test, y_pred) * recall_score(y_test, y_pred)/(accuracy_score(y_test, y_pred) + recall_score(y_test, y_pred)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "415e466a-1a71-4150-bff7-2f8904766df4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Importations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "b5aaf421-850a-4a86-8e99-2c1f0723bd6c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"import s3fs\n",
|
||||
"import re\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
|
||||
"from sklearn.utils import class_weight\n",
|
||||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"from sklearn.compose import ColumnTransformer\n",
|
||||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"from sklearn.model_selection import GridSearchCV\n",
|
||||
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
|
||||
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
|
||||
"import seaborn as sns\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
|
||||
"from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
|
||||
"\n",
|
||||
"import pickle\n",
|
||||
"import warnings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c2f44070-451e-4109-9a08-3b80011d610f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load data "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "b5f8135f-b6e7-4d6d-b8e1-da185b944aff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create filesystem object\n",
|
||||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "2668a243-4ff8-40c6-9de2-5c9c07bcf714",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def load_train_test():\n",
|
||||
" BUCKET = \"projet-bdc2324-team1/Generalization/sport\"\n",
|
||||
" File_path_train = BUCKET + \"/Train_set.csv\"\n",
|
||||
" File_path_test = BUCKET + \"/Test_set.csv\"\n",
|
||||
" \n",
|
||||
" with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
|
||||
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
|
||||
" # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
|
||||
"\n",
|
||||
" with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
|
||||
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
|
||||
" # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
|
||||
" \n",
|
||||
" return dataset_train, dataset_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "13eba3e1-3ea5-435b-8b05-6d7d5744cbe2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "PermissionError",
|
||||
"evalue": "Forbidden",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:529\u001b[0m, in \u001b[0;36mS3FileSystem.info\u001b[0;34m(self, path, version_id, refresh)\u001b[0m\n\u001b[1;32m 528\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 529\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_s3\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ms3\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhead_object\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mBucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 530\u001b[0m \u001b[43m \u001b[49m\u001b[43mKey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mversion_id_kw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mversion_id\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreq_kw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 531\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\n\u001b[1;32m 532\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mETag\u001b[39m\u001b[38;5;124m'\u001b[39m: out[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mETag\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 533\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mKey\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([bucket, key]),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 540\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVersionId\u001b[39m\u001b[38;5;124m'\u001b[39m: out\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVersionId\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 541\u001b[0m }\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:200\u001b[0m, in \u001b[0;36mS3FileSystem._call_s3\u001b[0;34m(self, method, *akwarglist, **kwargs)\u001b[0m\n\u001b[1;32m 198\u001b[0m additional_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_s3_method_kwargs(method, \u001b[38;5;241m*\u001b[39makwarglist,\n\u001b[1;32m 199\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43madditional_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/botocore/client.py:553\u001b[0m, in \u001b[0;36mClientCreator._create_api_method.<locals>._api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# The \"self\" in this scope is referring to the BaseClient.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_api_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperation_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/botocore/client.py:1009\u001b[0m, in \u001b[0;36mBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 1008\u001b[0m error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
||||
"\u001b[0;31mClientError\u001b[0m: An error occurred (403) when calling the HeadObject operation: Forbidden",
|
||||
"\nDuring handling of the above exception, another exception occurred:\n",
|
||||
"\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[19], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m dataset_train, dataset_test \u001b[38;5;241m=\u001b[39m \u001b[43mload_train_test\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m dataset_train\u001b[38;5;241m.\u001b[39misna()\u001b[38;5;241m.\u001b[39msum()\n",
|
||||
"Cell \u001b[0;32mIn[18], line 6\u001b[0m, in \u001b[0;36mload_train_test\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m File_path_train \u001b[38;5;241m=\u001b[39m BUCKET \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/Train_set.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 4\u001b[0m File_path_test \u001b[38;5;241m=\u001b[39m BUCKET \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/Test_set.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[43mFile_path_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m file_in:\n\u001b[1;32m 7\u001b[0m dataset_train \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(file_in, sep\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m,\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\u001b[39;00m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1295\u001b[0m, in \u001b[0;36mAbstractFileSystem.open\u001b[0;34m(self, path, mode, block_size, cache_options, compression, **kwargs)\u001b[0m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1294\u001b[0m ac \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mautocommit\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_intrans)\n\u001b[0;32m-> 1295\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_open\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1296\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1297\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1298\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1299\u001b[0m \u001b[43m \u001b[49m\u001b[43mautocommit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mac\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1300\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1301\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1302\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compression \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1304\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfsspec\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcompression\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m compr\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:375\u001b[0m, in \u001b[0;36mS3FileSystem._open\u001b[0;34m(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, **kwargs)\u001b[0m\n\u001b[1;32m 372\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cache_type \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 373\u001b[0m cache_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_cache_type\n\u001b[0;32m--> 375\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mS3File\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43macl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43macl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 376\u001b[0m \u001b[43m \u001b[49m\u001b[43mversion_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 377\u001b[0m \u001b[43m \u001b[49m\u001b[43ms3_additional_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcache_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 378\u001b[0m \u001b[43m \u001b[49m\u001b[43mautocommit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mautocommit\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequester_pays\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequester_pays\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:1096\u001b[0m, in \u001b[0;36mS3File.__init__\u001b[0;34m(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays)\u001b[0m\n\u001b[1;32m 1094\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39ms3_additional_kwargs \u001b[38;5;241m=\u001b[39m s3_additional_kwargs \u001b[38;5;129;01mor\u001b[39;00m {}\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreq_kw \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mRequestPayer\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrequester\u001b[39m\u001b[38;5;124m'\u001b[39m} \u001b[38;5;28;01mif\u001b[39;00m requester_pays \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[0;32m-> 1096\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43ms3\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mautocommit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mautocommit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39ms3 \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfs \u001b[38;5;66;03m# compatibility\u001b[39;00m\n\u001b[1;32m 1099\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwritable():\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1651\u001b[0m, in \u001b[0;36mAbstractBufferedFile.__init__\u001b[0;34m(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, size, **kwargs)\u001b[0m\n\u001b[1;32m 1649\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m=\u001b[39m size\n\u001b[1;32m 1650\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1651\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdetails\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msize\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1652\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache \u001b[38;5;241m=\u001b[39m caches[cache_type](\n\u001b[1;32m 1653\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblocksize, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fetch_range, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcache_options\n\u001b[1;32m 1654\u001b[0m )\n\u001b[1;32m 1655\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1664\u001b[0m, in \u001b[0;36mAbstractBufferedFile.details\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1661\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 1662\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdetails\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 1663\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_details \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1664\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_details \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minfo\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1665\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_details\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:548\u001b[0m, in \u001b[0;36mS3FileSystem.info\u001b[0;34m(self, path, version_id, refresh)\u001b[0m\n\u001b[1;32m 546\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m(S3FileSystem, \u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39minfo(path)\n\u001b[1;32m 547\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 548\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ee\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ParamValidationError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFailed to head path \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m (path, e))\n",
|
||||
"\u001b[0;31mPermissionError\u001b[0m: Forbidden"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset_train, dataset_test = load_train_test()\n",
|
||||
"dataset_train.isna().sum()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e46622e7-0fc1-43f8-a7e7-34a5e90068b2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def features_target_split(dataset_train, dataset_test):\n",
|
||||
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
|
||||
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
|
||||
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||||
" X_train = dataset_train[features_l]\n",
|
||||
" y_train = dataset_train[['y_has_purchased']]\n",
|
||||
"\n",
|
||||
" X_test = dataset_test[features_l]\n",
|
||||
" y_test = dataset_test[['y_has_purchased']]\n",
|
||||
" return X_train, X_test, y_train, y_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cec4f386-e643-4bd8-b8cd-8917d2c1b3d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)\n",
|
||||
"print(\"Shape train : \", X_train.shape)\n",
|
||||
"print(\"Shape test : \", X_test.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5978909b-eaad-4442-9bee-c98d603f0e80",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in New Issue
Block a user