Labe/.ipynb_checkpoints/titi-checkpoint.ipynb

1391 lines
68 KiB
Plaintext
Raw Permalink Normal View History

2023-05-15 17:02:31 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 19,
"id": "2a7ae8ac-8304-4a26-8eac-63eedff991e2",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting matplotlib\n",
" Downloading matplotlib-3.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.6/11.6 MB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hCollecting pyparsing>=2.3.1\n",
" Downloading pyparsing-3.0.9-py3-none-any.whl (98 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.3/98.3 KB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting contourpy>=1.0.1\n",
" Downloading contourpy-1.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (300 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m300.3/300.3 KB\u001b[0m \u001b[31m56.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting fonttools>=4.22.0\n",
" Downloading fonttools-4.39.3-py3-none-any.whl (1.0 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m36.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting cycler>=0.10\n",
" Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\n",
"Requirement already satisfied: numpy>=1.20 in /opt/jupyterhub/lib/python3.10/site-packages (from matplotlib) (1.24.3)\n",
"Collecting pillow>=6.2.0\n",
" Downloading Pillow-9.5.0-cp310-cp310-manylinux_2_28_x86_64.whl (3.4 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m37.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: python-dateutil>=2.7 in /opt/jupyterhub/lib/python3.10/site-packages (from matplotlib) (2.8.2)\n",
"Requirement already satisfied: packaging>=20.0 in /opt/jupyterhub/lib/python3.10/site-packages (from matplotlib) (23.1)\n",
"Collecting kiwisolver>=1.0.1\n",
" Downloading kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.6 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m50.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: six>=1.5 in /opt/jupyterhub/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
"Installing collected packages: pyparsing, pillow, kiwisolver, fonttools, cycler, contourpy, matplotlib\n",
"Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.39.3 kiwisolver-1.4.4 matplotlib-3.7.1 pillow-9.5.0 pyparsing-3.0.9\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install matplotlib"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "d686eef9-708c-437d-b4ff-9eb69fe207dd",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pandas\n",
" Downloading pandas-2.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hCollecting numpy>=1.21.0\n",
" Downloading numpy-1.24.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m45.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.2 in /opt/jupyterhub/lib/python3.10/site-packages (from pandas) (2.8.2)\n",
"Collecting pytz>=2020.1\n",
" Downloading pytz-2023.3-py2.py3-none-any.whl (502 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m502.3/502.3 KB\u001b[0m \u001b[31m92.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting tzdata>=2022.1\n",
" Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.8/341.8 KB\u001b[0m \u001b[31m73.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: six>=1.5 in /opt/jupyterhub/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"Installing collected packages: pytz, tzdata, numpy, pandas\n",
"Successfully installed numpy-1.24.3 pandas-2.0.1 pytz-2023.3 tzdata-2023.3\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install pandas"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "142ecefe-9ba4-4963-94cc-227cffeb675a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"url = \"https://pixees.fr/informatiquelycee/n_site/asset/titanic.csv\""
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "66cf541f-6352-42ea-8ea9-983da560943b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"data = pd.io.parsers.read_csv(url)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "c235943b-283e-4e95-97fc-e9d4bf2cbf03",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass \n",
"0 1 0 3 \\\n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \n",
"0 Braund, Mr. Owen Harris male 22.0 1 \\\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "b4d51fb4-57e2-4a0a-a7db-8fd6a7a48b35",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 891 entries, 0 to 890\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 PassengerId 891 non-null int64 \n",
" 1 Survived 891 non-null int64 \n",
" 2 Pclass 891 non-null int64 \n",
" 3 Name 891 non-null object \n",
" 4 Sex 891 non-null object \n",
" 5 Age 714 non-null float64\n",
" 6 SibSp 891 non-null int64 \n",
" 7 Parch 891 non-null int64 \n",
" 8 Ticket 891 non-null object \n",
" 9 Fare 891 non-null float64\n",
" 10 Cabin 204 non-null object \n",
" 11 Embarked 889 non-null object \n",
"dtypes: float64(2), int64(5), object(5)\n",
"memory usage: 83.7+ KB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "1cb2c436-40f9-406d-96fa-bacd4961c39e",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',\n",
" 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],\n",
" dtype='object')"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.columns"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "7f8c19fa-ebf1-4138-aad7-922cda8b9c8f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"data = data.drop(['Name', 'PassengerId', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "7d0eb444-da4e-40f4-887e-5e85f91713e5",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>886</th>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>male</td>\n",
" <td>27.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>887</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>888</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>female</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>889</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>male</td>\n",
" <td>26.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>890</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>32.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age\n",
"886 0 2 male 27.0\n",
"887 1 1 female 19.0\n",
"888 0 3 female NaN\n",
"889 1 1 male 26.0\n",
"890 0 3 male 32.0"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.tail()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "d111faf3-636f-473f-aeb4-594db31b0058",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age\n",
"0 0 3 male 22.0\n",
"1 1 1 female 38.0\n",
"2 1 3 female 26.0\n",
"3 1 1 female 35.0\n",
"4 0 3 male 35.0"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "6e4a366a-fecd-4882-a47e-f0c1dec09a3a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"data = data.dropna(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "84b2e1f7-158b-4ccd-af2c-6512b0d69fd7",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Survived Pclass Sex Age\n",
"0 0 3 male 22.0\n",
"1 1 1 female 38.0\n",
"2 1 3 female 26.0\n",
"3 1 1 female 35.0\n",
"4 0 3 male 35.0\n",
".. ... ... ... ...\n",
"885 0 3 female 39.0\n",
"886 0 2 male 27.0\n",
"887 1 1 female 19.0\n",
"889 1 1 male 26.0\n",
"890 0 3 male 32.0\n",
"\n",
"[714 rows x 4 columns]\n"
]
}
],
"source": [
"print(data)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "7b6d896e-6181-4b5c-95f8-5cdd5d57eb4f",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>714.000000</td>\n",
" <td>714.000000</td>\n",
" <td>714.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.406162</td>\n",
" <td>2.236695</td>\n",
" <td>29.699118</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.491460</td>\n",
" <td>0.838250</td>\n",
" <td>14.526497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.420000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>20.125000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>28.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.000000</td>\n",
" <td>3.000000</td>\n",
" <td>38.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.000000</td>\n",
" <td>3.000000</td>\n",
" <td>80.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Age\n",
"count 714.000000 714.000000 714.000000\n",
"mean 0.406162 2.236695 29.699118\n",
"std 0.491460 0.838250 14.526497\n",
"min 0.000000 1.000000 0.420000\n",
"25% 0.000000 1.000000 20.125000\n",
"50% 0.000000 2.000000 28.000000\n",
"75% 1.000000 3.000000 38.000000\n",
"max 1.000000 3.000000 80.000000"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.describe()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "0851d3d0-9cb4-424f-b9da-32482e58f4ed",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Pclass\n",
"3 355\n",
"1 186\n",
"2 173\n",
"Name: count, dtype: int64"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Pclass'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "6f6bbbb4-ab72-4d49-9a4d-4aedfaa596bd",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import matplotlib as plt"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "100db300-eb44-45c6-9e13-01b515d7408f",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: xlabel='Pclass'>"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGrCAYAAADqwWxuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAkB0lEQVR4nO3de3BTZeL/8U9aaKDQpFugTTu0eEGBSotYEKKIKJVSKqLWXS+sgDIwYHEHqsh2B1FBLcs6iheEcWcXdJeK4oqXLvcCZV3qrW4FQVBY3NYpaV2QBioEaPP7w+H8vllBTUnJ0/b9mjkzzTlPTp4zRvP25CSx+f1+vwAAAAwSEe4JAAAA/C8CBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGaRfuCTRFY2OjqqurFRMTI5vNFu7pAACAn8Hv9+vIkSNKSkpSRMSPnyNpkYFSXV2t5OTkcE8DAAA0QVVVlbp37/6jY1pkoMTExEj6/gAdDkeYZwMAAH4Or9er5ORk63X8x7TIQDn9to7D4SBQAABoYX7O5RlcJAsAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDjtwj2B1uyC3/493FNoNb6anxPuKQAAzqOgzqAsXrxY6enpcjgccjgccrvdWrNmjbV92LBhstlsAcuUKVMC9lFZWamcnBxFR0crPj5eM2fO1KlTp0JzNAAAoFUI6gxK9+7dNX/+fF1yySXy+/16+eWXNWbMGP3rX//SZZddJkmaNGmS5s6da90nOjra+ruhoUE5OTlyuVzatm2bDhw4oHHjxql9+/Z68sknQ3RIAACgpQsqUEaPHh1w+4knntDixYv1/vvvW4ESHR0tl8t1xvuvX79eu3bt0saNG5WQkKDLL79c8+bN06xZs/Too48qKiqqiYcBAABakyZfJNvQ0KAVK1aovr5ebrfbWr98+XJ17dpVffv2VUFBgb777jtrW1lZmdLS0pSQkGCty8rKktfr1c6dO8/6WD6fT16vN2ABAACtV9AXye7YsUNut1vHjx9X586dtWrVKqWmpkqS7rrrLvXo0UNJSUnavn27Zs2apT179ujNN9+UJHk8noA4kWTd9ng8Z33MwsJCPfbYY8FOFQAAtFBBB0qvXr1UUVGhuro6vfHGGxo/frxKS0uVmpqqyZMnW+PS0tKUmJio4cOHa9++fbr44oubPMmCggLl5+dbt71er5KTk5u8PwAAYLag3+KJiopSz549lZGRocLCQvXr10/PPvvsGccOGjRIkrR3715JksvlUk1NTcCY07fPdt2KJNntduuTQ6cXAADQep3zF7U1NjbK5/OdcVtFRYUkKTExUZLkdru1Y8cO1dbWWmM2bNggh8NhvU0EAAAQ1Fs8BQUFys7OVkpKio4cOaKioiJt2bJF69at0759+1RUVKRRo0apS5cu2r59u2bMmKGhQ4cqPT1dkjRixAilpqbq7rvv1oIFC+TxeDR79mzl5eXJbrc3ywECAICWJ6hAqa2t1bhx43TgwAE5nU6lp6dr3bp1uuGGG1RVVaWNGzdq4cKFqq+vV3JysnJzczV79mzr/pGRkSouLtbUqVPldrvVqVMnjR8/PuB7UwAAAGx+v98f7kkEy+v1yul0qq6uzujrUfiq+9Dhq+4BoOUL5vWbHwsEAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGCcoAJl8eLFSk9Pl8PhkMPhkNvt1po1a6ztx48fV15enrp06aLOnTsrNzdXNTU1AfuorKxUTk6OoqOjFR8fr5kzZ+rUqVOhORoAANAqBBUo3bt31/z581VeXq6PP/5Y119/vcaMGaOdO3dKkmbMmKF3331XK1euVGlpqaqrq3Xrrbda929oaFBOTo5OnDihbdu26eWXX9ayZcs0Z86c0B4VAABo0Wx+v99/LjuIi4vTH/7wB912223q1q2bioqKdNttt0mSdu/erT59+qisrEyDBw/WmjVrdOONN6q6uloJCQmSpCVLlmjWrFn65ptvFBUVdcbH8Pl88vl81m2v16vk5GTV1dXJ4XCcy/Sb1QW//Xu4p9BqfDU/J9xTAACcI6/XK6fT+bNev5t8DUpDQ4NWrFih+vp6ud1ulZeX6+TJk8rMzLTG9O7dWykpKSorK5MklZWVKS0tzYoTScrKypLX67XOwpxJYWGhnE6ntSQnJzd12gAAoAUIOlB27Nihzp07y263a8qUKVq1apVSU1Pl8XgUFRWl2NjYgPEJCQnyeDySJI/HExAnp7ef3nY2BQUFqqurs5aqqqpgpw0AAFqQdsHeoVevXqqoqFBdXZ3eeOMNjR8/XqWlpc0xN4vdbpfdbm/WxwAAAOYIOlCioqLUs2dPSVJGRoY++ugjPfvss7r99tt14sQJHT58OOAsSk1NjVwulyTJ5XLpww8/DNjf6U/5nB4DAABwzt+D0tjYKJ/Pp4yMDLVv314lJSXWtj179qiyslJut1uS5Ha7tWPHDtXW1lpjNmzYIIfDodTU1HOdCgAAaCWCOoNSUFCg7OxspaSk6MiRIyoqKtKWLVu0bt06OZ1OTZw4Ufn5+YqLi5PD4dD9998vt9utwYMHS5JGjBih1NRU3X333VqwYIE8Ho9mz56tvLw83sIBAACWoAKltrZW48aN04EDB+R0OpWenq5169bphhtukCQ988wzioiIUG5urnw+n7KysvTiiy9a94+MjFRxcbGmTp0qt9utTp06afz48Zo7d25ojwoAALRo5/w9KOEQzOeow4nvQQkdvgcFAFq+8/I9KAAAAM2FQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYJKlAKCws1cOBAxcTEKD4+XjfffLP27NkTMGbYsGGy2WwBy5QpUwLGVFZWKicnR9HR0YqPj9fMmTN16tSpcz8aAADQKrQLZnBpaany8vI0cOBAnTp1Sr/73e80YsQI7dq1S506dbLGTZo0SXPnzrVuR0dHW383NDQoJydHLpdL27Zt04EDBzRu3Di1b99eTz75ZAgOCQAAtHRBBcratWsDbi9btkzx8fEqLy/X0KFDrfXR0dFyuVxn3Mf69eu1a9cubdy4UQkJCbr88ss1b948zZo1S48++qiioqKacBgAAKA1OadrUOrq6iRJcXFxAeuXL1+url27qm/fviooKNB3331nbSsrK1NaWpoSEhKsdVlZWfJ6vdq5c+cZH8fn88nr9QYsAACg9QrqDMr/1djYqOnTp+vqq69W3759rfV33XWXevTooaSkJG3fvl2zZs3Snj179Oabb0qSPB5PQJxIsm57PJ4zPlZhYaEee+yxpk4VAAC0ME0OlLy8PH322Wd67733AtZPnjzZ+jstLU2JiYkaPny49u3bp4svvrhJj1VQUKD8/HzrttfrVXJyctMmDgAAjNekt3imTZum4uJibd68Wd27d//RsYMGDZIk7d27V5LkcrlUU1MTMOb07bNdt2K32+VwOAIWAADQegUVKH6
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data['Pclass'].value_counts().plot.bar()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "b77929c9-cafb-4ead-b6f3-1d84084fc6dd",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAApyUlEQVR4nO3de3SU5YHH8d8kmQykEEKIIUlNINIKKoJcmjTVtVASMHCwWLYrBncjulBdcCXZVcCKJFALi13WyrJy3BXoHkmp7gFUXMFwr2tALqYU10VCEVQSKHJISFKHMfPsHx6mjomQy4zzzOv3c86c+F7yzPObdzL8fOfmMsYYAQAAWCQm0hMAAAD4IgoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6cZGeQGf4/X6dOnVKPXv2lMvlivR0AABAOxhjdOHCBWVkZCgm5vLnSKKyoJw6dUqZmZmRngYAAOiEDz74QFdfffVl94nKgtKzZ09JnwVMTEzs8ng+n0+vv/66xo4dK7fb3eXxbETG6Of0fBIZncDp+SQydkVDQ4MyMzMD/45fTlQWlEtP6yQmJoasoCQkJCgxMdHRdzYyRjen55PI6AROzyeRMRTa8/IMXiQLAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYJ24jv7C7t279eSTT+rAgQOqra3Vhg0bNGnSpMD2L/sK5aVLl+rhhx+WJPXv318nTpwI2r548WLNnTu3o9NBFOs/99WQjueJNVqaIw0u2yJvy5W/yrsz3l8yISzjAgCCdfgMSlNTk4YOHaoVK1a0ub22tjbosmrVKrlcLk2ePDlov4ULFwbt9+CDD3YuAQAAcJwOn0EpLCxUYWHhl25PS0sLWn7ppZc0evRoXXPNNUHre/bs2WpfAAAAqRMFpSNOnz6tV199Vb/61a9abVuyZIkWLVqkrKwsFRUVqaSkRHFxbU/H6/XK6/UGlhsaGiRJPp9PPp+vy/O8NEYoxrKVjRk9sSa048WYoJ/hEMnbz8ZjGGpkjH5OzyeRMRTjtofLGNPpR3OXy9XqNSift3TpUi1ZskSnTp1St27dAuuXLVum4cOHKzk5WW+++abmzZunadOmadmyZW2OU1ZWpvLy8lbrKyoqlJCQ0NnpAwCAr1Bzc7OKiopUX1+vxMTEy+4b1oIyaNAgFRQUaPny5ZcdZ9WqVfrJT36ixsZGeTyeVtvbOoOSmZmps2fPXjFge/h8PlVWVqqgoEBut7vL49nIxoyDy7aEdDxPjNGikX7N3x8jrz88L5I9XDYuLOO2h43HMNTIGP2cnk8iY1c0NDQoJSWlXQUlbE/x/Pa3v9WRI0f0m9/85or75ubm6tNPP9X777+vgQMHttru8XjaLC5utzukN1yox7ORTRnD9U4br98VtrFtuO1sOobhQsbo5/R8Ehk7O157he1zUJ577jmNGDFCQ4cOveK+1dXViomJUWpqarimAwAAokiHz6A0NjaqpqYmsHz8+HFVV1crOTlZWVlZkj47hfPiiy/qn//5n1v9flVVlfbu3avRo0erZ8+eqqqqUklJie6++2717t27C1EAAIBTdLig7N+/X6NHjw4sl5aWSpKKi4u1Zs0aSdK6detkjNFdd93V6vc9Ho/WrVunsrIyeb1eZWdnq6SkJDAOAABAhwvKqFGjdKXX1c6YMUMzZsxoc9vw4cO1Z8+ejl4tAAD4GuG7eAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWCesXxYIOE3/ua9G7Lo9sUZLcz77ioCOfFLu+0smhHFWABAenEEBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANbpcEHZvXu3Jk6cqIyMDLlcLm3cuDFo+z333COXyxV0ue2224L2OXfunKZOnarExEQlJSXpvvvuU2NjY5eCAAAA5+hwQWlqatLQoUO1YsWKL93ntttuU21tbeDy61//Omj71KlT9c4776iyslKbNm3S7t27NWPGjI7PHgAAOFJcR3+hsLBQhYWFl93H4/EoLS2tzW3vvvuuNm/erH379mnkyJGSpOXLl2v8+PH6xS9+oYyMjI5OCQAAOEyHC0p77Ny5U6mpqerdu7d+8IMf6Gc/+5n69OkjSaqqqlJSUlKgnEhSfn6+YmJitHfvXt1xxx2txvN6vfJ6vYHlhoYGSZLP55PP5+vyfC+NEYqxbGVjRk+sCe14MSbop9N0Np9Nx/xKbLyfhprTMzo9n0TGUIzbHi5jTKcfzV0ulzZs2KBJkyYF1q1bt04JCQnKzs7WsWPH9Oijj6pHjx6qqqpSbGysfv7zn+tXv/qVjhw5EjRWamqqysvL9cADD7S6nrKyMpWXl7daX1FRoYSEhM5OHwAAfIWam5tVVFSk+vp6JSYmXnbfkJ9BmTJlSuC/b7zxRg0ZMkQDBgzQzp07NWbMmE6NOW/ePJWWlgaWGxoalJmZqbFjx14xYHv4fD5VVlaqoKBAbre7y+PZyMaMg8u2hHQ8T4zRopF+zd8fI6/fFdKxbdDZfIfLxoVxVqFl4/001Jye0en5JDJ2xaVnQNojLE/xfN4111yjlJQU1dTUaMyYMUpLS9OZM2eC9vn000917ty5L33disfjkcfjabXe7XaH9IYL9Xg2simjtyU8JcLrd4VtbBt0NJ8tx7sjbLqfhovTMzo9n0TGzo7XXmH/HJQPP/xQH3/8sdLT0yVJeXl5On/+vA4cOBDYZ/v27fL7/crNzQ33dAAAQBTo8BmUxsZG1dTUBJaPHz+u6upqJScnKzk5WeXl5Zo8ebLS0tJ07NgxPfLII/rWt76lceM+O8183XXX6bbbbtP06dO1cuVK+Xw+zZo1S1OmTOEdPAAAQFInzqDs379fw4YN07BhwyRJpaWlGjZsmB5//HHFxsbq0KFDuv3223Xttdfqvvvu04gRI/Tb3/426CmatWvXatCgQRozZozGjx+vW265Rc8++2zoUgEAgKjW4TMoo0aN0uXe+LNly5Vf+JicnKyKioqOXjUAAPia4Lt4AACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANbpcEHZvXu3Jk6cqIyMDLlcLm3cuDGwzefzac6cObrxxhv1jW98QxkZGfqbv/kbnTp1KmiM/v37y+VyBV2WLFnS5TAAAMAZOlxQmpqaNHToUK1YsaLVtubmZh08eFDz58/XwYMHtX79eh05ckS33357q30XLlyo2trawOXBBx/sXAIAAOA4cR39hcLCQhUWFra5rVevXqqsrAxa96//+q/KycnRyZMnlZWVFVjfs2dPpaWldfTqAQDA10CHC0pH1dfXy+VyKSkpKWj9kiVLtGjRImVlZamoqEglJSWKi2t7Ol6vV16vN7Dc0NAg6bOnlHw+X5fneGmMUIxlKxszemJNaMeLMUE/naaz+Ww65ldi4/001Jye0en5JDKGYtz2cBljOv1o7nK5tGHDBk2aNKnN7Z988oluvvlmDRo0SGvXrg2sX7ZsmYYPH67k5GS
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data['Age'].hist()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "d2f480dd-d7bc-4669-ac68-70871b9fdf83",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Age</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Sex</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>female</th>\n",
" <td>0.754789</td>\n",
" <td>2.065134</td>\n",
" <td>27.915709</td>\n",
" </tr>\n",
" <tr>\n",
" <th>male</th>\n",
" <td>0.205298</td>\n",
" <td>2.335541</td>\n",
" <td>30.726645</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Age\n",
"Sex \n",
"female 0.754789 2.065134 27.915709\n",
"male 0.205298 2.335541 30.726645"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['Sex']).mean()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "84bef046-7ee8-461c-b166-87ff1271fcb9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>38.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>26.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age\n",
"0 0 3 0 22.0\n",
"1 1 1 1 38.0\n",
"2 1 3 1 26.0\n",
"3 1 1 1 35.0\n",
"4 0 3 0 35.0"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Sex'].replace(['male', 'female'], [0, 1], inplace = True)\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "32d318f6-b55f-453e-ad18-373300eafcbf",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting sklearn\n",
" Downloading sklearn-0.0.post5.tar.gz (3.7 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25hBuilding wheels for collected packages: sklearn\n",
" Building wheel for sklearn (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for sklearn: filename=sklearn-0.0.post5-py3-none-any.whl size=2359 sha256=27bbea85f6603a3901da4612f859fed8f5ab0b3ab0ea81e2b102edf3f244042c\n",
" Stored in directory: /home/sbah/.cache/pip/wheels/38/1f/8d/4f812c590e074c1e928f5cec67bf5053b71f38e2648739403a\n",
"Successfully built sklearn\n",
"Installing collected packages: sklearn\n",
"Successfully installed sklearn-0.0.post5\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install sklearn"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "4f796b2c-23c1-4871-8cc8-2717d5cbd9b5",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting scikit-learn\n",
" Downloading scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.6/9.6 MB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: numpy>=1.17.3 in /opt/jupyterhub/lib/python3.10/site-packages (from scikit-learn) (1.24.3)\n",
"Collecting scipy>=1.3.2\n",
" Downloading scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.4/34.4 MB\u001b[0m \u001b[31m38.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hCollecting threadpoolctl>=2.0.0\n",
" Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)\n",
"Collecting joblib>=1.1.1\n",
" Downloading joblib-1.2.0-py3-none-any.whl (297 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.0/298.0 KB\u001b[0m \u001b[31m64.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hInstalling collected packages: threadpoolctl, scipy, joblib, scikit-learn\n",
"Successfully installed joblib-1.2.0 scikit-learn-1.2.2 scipy-1.10.1 threadpoolctl-3.1.0\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install scikit-learn"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "794fd998-9c17-4394-b5cb-733f3a7579fa",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from sklearn.neighbors import KNeighborsClassifier"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "0b731dc4-79e2-4192-a999-01c16462e941",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"model = KNeighborsClassifier()"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "9b32b8c9-49a4-4d67-b34a-0634bba52a12",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
" Y = data['Survived']"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "2266c002-40b8-4d33-806b-9eea57f89ce1",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"X = data.drop('Survived',axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "d9cc1284-1df6-4453-9f85-c7144ea86ad8",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0 0\n",
"1 1\n",
"2 1\n",
"3 1\n",
"4 0\n",
" ..\n",
"885 0\n",
"886 0\n",
"887 1\n",
"889 1\n",
"890 0\n",
"Name: Survived, Length: 714, dtype: int64"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Y"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "1c3d641d-f9f5-4308-897d-670e870f3ba7",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>38.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>26.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>885</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>39.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>886</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>27.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>887</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>889</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>26.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>890</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>32.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>714 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Pclass Sex Age\n",
"0 3 0 22.0\n",
"1 1 1 38.0\n",
"2 3 1 26.0\n",
"3 1 1 35.0\n",
"4 3 0 35.0\n",
".. ... ... ...\n",
"885 3 1 39.0\n",
"886 2 0 27.0\n",
"887 1 1 19.0\n",
"889 1 0 26.0\n",
"890 3 0 32.0\n",
"\n",
"[714 rows x 3 columns]"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "13e2a59c-fa67-4eda-a476-901f6b16c7c8",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0.8305322128851541"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(X, Y)\n",
"model.score(X, Y)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "83d889f6-bf26-45f6-bd7d-487cec9a5907",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,\n",
" 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1,\n",
" 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,\n",
" 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n",
" 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,\n",
" 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,\n",
" 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1,\n",
" 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n",
" 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1,\n",
" 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1,\n",
" 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,\n",
" 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,\n",
" 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,\n",
" 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0,\n",
" 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,\n",
" 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1,\n",
" 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0,\n",
" 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,\n",
" 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,\n",
" 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0,\n",
" 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0,\n",
" 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,\n",
" 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,\n",
" 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,\n",
" 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n",
" 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n",
" 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1,\n",
" 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n",
" 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,\n",
" 1, 0, 1, 0, 0, 0, 0, 1, 1, 0])"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.predict(X)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "c09f26a4-c768-4e88-88b1-c3182e68269d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def survie(model, pclass=3, Sex=0, Age=26):\n",
" X = np.array([pclass, Sex, Age]).reshape(1, 3)\n",
" print(model.predict_proba(X))"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "bf549553-c0c8-4637-8183-24df40bef4f4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "a666e946-670b-4681-8f82-ef06fb47924c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.6 0.4]]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/jupyterhub/lib/python3.10/site-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"survie(model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1637e7dd-54a3-4676-a8c9-f76a226d669d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}