diff --git a/Sport/exploration_sport.ipynb b/Sport/exploration_sport.ipynb
index b60be94..bf66eaf 100644
--- a/Sport/exploration_sport.ipynb
+++ b/Sport/exploration_sport.ipynb
@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 31,
"id": "f62b996c-4e17-40ea-83ba-f0cb60be7671",
"metadata": {},
"outputs": [
@@ -54,7 +54,7 @@
" 'bdc2324-data/9']"
]
},
- "execution_count": 3,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -831,23 +831,130 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 13,
"id": "970302f5-4de2-46b4-a1ce-a5396f5330ab",
"metadata": {},
+ "outputs": [],
+ "source": [
+ "def display_databases(directory_path, file_name):\n",
+ " \"\"\"\n",
+ " This function returns the file from s3 storage \n",
+ " \"\"\"\n",
+ " file_path = \"projet-bdc2324-team1\" + \"/Generalization/\" + directory_path + \"/\" + file_name + \".csv\"\n",
+ " print(\"File path : \", file_path)\n",
+ " with fs.open(file_path, mode=\"rb\") as file_in:\n",
+ " df = pd.read_csv(file_in, sep=\",\") \n",
+ " return df "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "f5bfae82-04aa-44e1-9869-3f4fd5736b41",
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "File path : projet-bdc2324-team1/Generalization/sport/Train_set.csv\n"
+ ]
+ },
{
"data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
"text/plain": [
- ""
+ "Empty DataFrame\n",
+ "Columns: [c]\n",
+ "Index: []"
]
},
- "execution_count": 5,
+ "execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "fs("
+ "train_sport = display_databases('sport', 'Train_set')\n",
+ "train_sport.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "56d5b12e-45e8-4312-869d-bde4d24900b6",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "KeyError",
+ "evalue": "'y_has_purchased'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/pandas/core/indexes/base.py:3802\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3801\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3802\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3803\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+ "File \u001b[0;32mindex.pyx:153\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mindex.pyx:182\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: 'y_has_purchased'",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[51], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrain_sport\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43my_has_purchased\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39munique()\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/pandas/core/frame.py:4090\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 4088\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 4089\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 4090\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4091\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m 4092\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/pandas/core/indexes/base.py:3809\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 3805\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m 3807\u001b[0m ):\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3809\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3810\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3812\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
+ "\u001b[0;31mKeyError\u001b[0m: 'y_has_purchased'"
+ ]
+ }
+ ],
+ "source": [
+ "train_sport['y_has_purchased'].unique()"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "id": "bd8019ae-8d7b-4dfe-be93-abf80a497e13",
+ "metadata": {},
+ "source": [
+ "projet-bdc2324-team1/Generalization/sport/Train_set/dataset_train5.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d056c7b3-0e8c-485c-b2f3-4681077f1c2e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fs.ls('projet-bdc2324-team1/Generalization/sport')"
]
}
],