You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
11_Ensemble/앙상블과제_2022254026김홍열.ipynb

920 lines
325 KiB
Plaintext

1 year ago
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### [실습] 배깅(bagging, bootstrap aggregating)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.datasets import load_iris\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.ensemble import BaggingClassifier"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"iris = load_iris()\n",
"X, y = iris.data[:, [0,2]], iris.target\n",
"\n",
"model1 = DecisionTreeClassifier(max_depth =10, random_state=0).fit(X, y)\n",
"model2 = BaggingClassifier(DecisionTreeClassifier(max_depth=4), n_estimators=50, random_state=0).fit(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD5S0lEQVR4nOzdd1hUR9vA4d/ZpXcQELDQxAYq9t5770ajRpOYnvglMT1v3ugbE9NjqjFNjcbEqInR2HvvHUEpCiiCCEgvy+7O98cBZKliRZz7urwMZ2f3zK7h2eecmXlGEUIIJEmSJEmSpPue5l53QJIkSZIkSbo9ZGInSZIkSZJUQ8jETpIkSZIkqYaQiZ0kSZIkSVINIRM7SZIkSZKkGkImdpIkSZIkSTWETOwkSZIkSZJqCJnYSZIkSZIk1RAysZMkSZIkSaohZGIn3RbR0dEoisLChQur9LwePXrQo0ePO9InSZLuHz4+PkydOvVed6NCer2e1157jXr16qHRaBgxYkSVX+NOv8+bef2ZM2eiKMqd6ZB018nEroZYuHAhiqIU/bGyssLLy4v+/fvz1VdfkZGRca+7WK2EhoYyc+ZMoqOj73VXJOmOKRkXFEXB3d2dnj17sn79+nvdvfvOL7/8wieffMKYMWNYtGgRL7300r3u0n0tOzubmTNnsmPHjnvdlRrF7F53QLq9/ve//+Hr60t+fj4JCQns2LGDF198kc8//5zVq1fTvHnzO3Jeb29vcnJyMDc3r9LzNm3adEf6U5nQ0FBmzZpFjx498PHxuSd9kKS7pTAuCCG4cuUKCxcuZNCgQaxZs4YhQ4bc6+4BcO7cOTSa6n2vYdu2bdSpU4cvvvjiXnelXDfzOf7nP//hjTfeuEM9Kl92djazZs0CkCM3t5FM7GqYgQMH0qZNm6Kf33zzTbZt28aQIUMYNmwYYWFhWFtb3/bzFt4lrCoLC4vb3pfbTQhBbm7uHfncJOluKBkXHn/8cWrXrs3vv/9ebRI7S0vLe92FSiUmJuLk5HSvu1Ghm/kczczMMDOr/ulAVlYWtra297ob1V71vjySbotevXrxzjvvEBMTw5IlS0weO3v2LGPGjMHFxQUrKyvatGnD6tWrS71GamoqL730Ej4+PlhaWlK3bl0eeeQRkpKSgLLn2CUkJPDoo49St25dLC0t8fT0ZPjw4SbDn2XNsUtMTCz64rGysqJFixYsWrTIpE3h+T799FN++OEH/P39sbS0pG3bthw+fLjCz2PhwoWMHTsWgJ49exYNURUOB/j4+DBkyBA2btxImzZtsLa2Zv78+UWfw4svvki9evWwtLSkQYMGfPTRRxiNRpNzGI1G5s6dS2BgIFZWVtSuXZunnnqKa9euVdg3SbobnJycsLa2LvVl/umnn9KpUydq1aqFtbU1rVu3ZsWKFaWen5OTw/Tp03F1dcXe3p5hw4YRFxeHoijMnDnTpO2OHTto06YNVlZW+Pv7M3/+/DLndJWcG1Y4jLx3715efvll3NzcsLW1ZeTIkVy9etXkuUajkZkzZ+Ll5YWNjQ09e/YkNDT0huebZWVlMWPGjKLf60aNGvHpp58ihACux5vt27dz5syZUjGjLEIIZs+eTd26dYv6dObMmTLbViWufPnllzRr1gwrKyvc3NwYMGAAR44cKfdzzM/PZ9asWQQEBGBlZUWtWrXo0qULmzdvLmpT1r+HXq/nvffeK4qtPj4+vPXWW+Tl5Zm0K4yXe/bsoV27dlhZWeHn58evv/5a4WceHR2Nm5sbALNmzSr6TAv//5k6dSp2dnZERUUxaNAg7O3tmThxYtHncKPxdf369XTt2hVbW1vs7e0ZPHhwqX+HG/muup9U/xRdui0mT57MW2+9xaZNm3jiiScAOHPmDJ07d6ZOnTq88cYb2Nra8ueffzJixAhWrlzJyJEjAcjMzKRr166EhYXx2GOP0apVK5KSkli9ejWXLl3C1dW1zHOOHj2aM2fO8MILL+Dj40NiYiKbN28mNja23OHPnJwcevToQWRkJM8//zy+vr4sX76cqVOnkpqayv/93/+ZtF+6dCkZGRk89dRTKIrCxx9/zKhRozh//ny5w8LdunVj+vTpfPXVV7z11ls0adIEoOhvUIczJkyYwFNPPcUTTzxBo0aNyM7Opnv37sTFxfHUU09Rv3599u3bx5tvvkl8fDxz584tev5TTz3FwoULefTRR5k+fToXLlzgm2++4fjx4+zdu7fKQ9aSdCvS0tJISkpCCEFiYiJff/01mZmZTJo0yaTdl19+ybBhw5g4cSI6nY4//viDsWPH8u+//zJ48OCidlOnTuXPP/9k8uTJdOjQgZ07d5o8Xuj48eMMGDAAT09PZs2ahcFg4H//+1/RF/qNeOGFF3B2dubdd98lOjqauXPn8vzzz7Ns2bKiNm+++SYff/wxQ4cOpX///pw8eZL+/fuTm5tb6esLIRg2bBjbt2/n8ccfJzg4mI0bN/Lqq68SFxfHF198gZubG4sXL+b9998nMzOTOXPmAKYxo6T//ve/zJ49m0GDBjFo0CCOHTtGv3790Ol0Ju2qElcef/xxFi5cyMCBA5k2bRp6vZ7du3dz4MABkzuyxc2cOZM5c+Ywbdo02rVrR3p6OkeOHOHYsWP07du33P5PmzaNRYsWMWbMGGbMmMHBgweZM2cOYWFh/P333yZtIyMjGTNmDI8//jhTpkzhl19+YerUqbRu3ZrAwMAyX9/NzY158+bxzDPPMHLkSEaNGgVgMl1Ir9fTv39/unTpwqeffoqNjQ1w4/F18eLFTJkyhf79+/PRRx+RnZ3NvHnz6NKlC8ePHy/6HrqZ76pqTUg1woIFCwQgDh8+XG4bR0dH0bJly6Kfe/fuLZo1ayZyc3OLjhmNRtGpUycREBBQdOy///2vAMRff/1V6jWNRqMQQogLFy4IQCxYsEAIIcS1a9cEID755JMK+929e3fRvXv3op/nzp0rALFkyZKiYzqdTnTs2FHY2dmJ9PR0k/PVqlVLpKSkFLX9559/BCDWrFlT4XmXL18uALF9+/ZSj3l7ewtAbNiwweT4e++9J2xtbUV4eLjJ8TfeeENotVoRGxsrhBBi9+7dAhC//fabSbsNGzaUeVyS7pTCuFDyj6WlpVi4cGGp9tnZ2SY/63Q6ERQUJHr16lV07OjRowIQL774oknbqVOnCkC8++67RceGDh0qbGxsRFxcXNGxiIgIYWZmJkp+/Xh7e4spU6aU6nufPn2K4owQQrz00ktCq9WK1NRUIYQQCQkJwszMTIwYMcLk9WbOnCkAk9csy6pVqwQgZs+ebXJ8zJgxQlEUERkZWXSse/fuIjAwsMLXE0KIxMREYWFhIQYPHmzS97feeqtUn240rmzbtk0AYvr06aXOV/wcJT/HFi1aiMGDB1fY33fffdfk3+PEiRMCENOmTTNp98orrwhAbNu2zeR8gNi1a5fJ+7e0tBQzZsyo8LxXr14t9f9MoSlTpghAvPHGGybHbzS+ZmRkCCcnJ/HEE0+YtEtISBCOjo5Fx2/0u+p+IodiHyB2dnZFq2NTUlLYtm0b48aNIyMjg6SkJJKSkkhOTqZ///5EREQQFxcHwMqVK2nRokXRHbziylsib21tjYWFBTt27KjS8OO6devw8PBgwoQJRcfMzc2ZPn06mZmZ7Ny506T9Qw89hLOzc9HPXbt2BeD8+fM3fM6y+Pr60r9/f5Njy5cvp2vXrjg7Oxd9XklJSfTp0weDwcCuXbuK2jk6OtK3b1+Tdq1bt8bOzo7t27ffUt8kqaq+/fZbNm/ezObNm1myZAk9e/Zk2rRp/PXXXybtis8jvXbtGmlpaXTt2pVjx44VHd+wYQMAzz77rMlzX3jhBZOfDQYDW7ZsYcSIEXh5eRUdb9CgAQMHDrzhvj/55JMmcaZr164YDAZiYmIA2Lp1K3q9vtL+lGfdunVotVqmT59ucnzGjBkIIW5q9fCWLVvQ6XS88MILJn1/8cUXS7W90biycuVKFEXh3XffLfUaFZUqcXJy4syZM0RERNxw/9etWwfAyy+/bHJ8xowZAKxdu9bkeNOmTYt
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"x_min, x_max = X[:,0].min() - 1, X[:,0].max() + 1\n",
"y_min, y_max = X[:,1].min() - 1, X[:,1].max() + 1\n",
"xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))\n",
"\n",
"plt.subplot(121)\n",
"Z1 = model1.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)\n",
"plt.contourf(xx, yy, Z1, alpha=0.6, cmap=mpl.cm.jet)\n",
"plt.scatter(X[:,0], X[:,1], c=y, alpha=1, s=50, cmap=mpl.cm.jet, edgecolors=\"k\")\n",
"plt.title(\"Decision tree\")\n",
"plt.subplot(122)\n",
"\n",
"Z2 = model2.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)\n",
"plt.contourf(xx, yy, Z2, alpha=0.6, cmap=mpl.cm.jet)\n",
"plt.scatter(X[:,0], X[:,1],c=y,alpha=1,s=50,cmap=mpl.cm.jet,edgecolors=\"k\")\n",
"plt.title(\"Bagging of decision trees\")\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### [실습] 랜덤 포리스트 (random forest)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn import datasets\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn import metrics"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Class names : ['setosa' 'versicolor' 'virginica']\n",
"target : [0:setosa, 1:versicolor, 2:virginical]\n",
"No. of Data : 150\n",
"Featrue names : ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n",
" sepal length sepal width petal length petal width species\n",
"0 5.1 3.5 1.4 0.2 0\n",
"1 4.9 3.0 1.4 0.2 0\n",
"2 4.7 3.2 1.3 0.2 0\n",
"3 4.6 3.1 1.5 0.2 0\n",
"4 5.0 3.6 1.4 0.2 0\n",
"No. of traing data: 105\n",
"No. of test data: 45\n",
"Accuracy : 0.9333333333333333\n"
]
}
],
"source": [
"iris = datasets.load_iris()\n",
"print('Class names :', iris.target_names)\n",
"print('target : [0:setosa, 1:versicolor, 2:virginical]')\n",
"print('No. of Data :', len(iris.data))\n",
"print('Featrue names :', iris.feature_names)\n",
"\n",
"data = pd.DataFrame({\n",
" 'sepal length': iris.data[:,0], 'sepal width': iris.data[:,1], 'petal length': iris.data[:,2],\n",
" 'petal width':iris.data[:,3], 'species':iris.target\n",
"})\n",
"print(data.head()) # 일부 데이터 출력\n",
"\n",
"x = data[['sepal length', 'sepal width', 'petal length', 'petal width']] # 입력\n",
"y = data['species'] # 출력\n",
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3) # 테스트 데이터 30%\n",
"print('No. of traing data: ', len(x_train))\n",
"print('No. of test data:', len(y_test))\n",
"\n",
"forest = RandomForestClassifier(n_estimators=100) # 모델 생성\n",
"forest.fit(x_train, y_train)\n",
"\n",
"y_pred = forest.predict(x_test) # 추론 (예측)\n",
"print('Accuracy :', metrics.accuracy_score(y_test, y_pred))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### [실습] 배깅 회귀 (Bagging Regression)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.datasets import load_boston # scikit-leanr < 1.2\n",
"# from sklearn.datasets import fetch_california_housing # replace dataset\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.ensemble import BaggingRegressor\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"import matplotlib.pyplot as plt\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \n",
"0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \\\n",
"1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n",
"2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n",
"3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n",
"4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n",
"\n",
" PTRATIO B LSTAT PRICE \n",
"0 15.3 396.90 4.98 24.0 \n",
"1 17.8 396.90 9.14 21.6 \n",
"2 17.8 392.83 4.03 34.7 \n",
"3 18.7 394.63 2.94 33.4 \n",
"4 18.7 396.90 5.33 36.2 \n",
"RMSE: 4.594919\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\sklearn\\utils\\deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.\n",
"\n",
" The Boston housing prices dataset has an ethical problem. You can refer to\n",
" the documentation of this function for further details.\n",
"\n",
" The scikit-learn maintainers therefore strongly discourage the use of this\n",
" dataset unless the purpose of the code is to study and educate about\n",
" ethical issues in data science and machine learning.\n",
"\n",
" In this special case, you can fetch the dataset from the original\n",
" source::\n",
"\n",
" import pandas as pd\n",
" import numpy as np\n",
"\n",
" data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n",
" raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n",
" data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n",
" target = raw_df.values[1::2, 2]\n",
"\n",
" Alternative datasets include the California housing dataset (i.e.\n",
" :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n",
" dataset. You can load the datasets as follows::\n",
"\n",
" from sklearn.datasets import fetch_california_housing\n",
" housing = fetch_california_housing()\n",
"\n",
" for the California housing dataset and::\n",
"\n",
" from sklearn.datasets import fetch_openml\n",
" housing = fetch_openml(name=\"house_prices\", as_frame=True)\n",
"\n",
" for the Ames housing dataset.\n",
" warnings.warn(msg, category=FutureWarning)\n"
]
}
],
"source": [
"boston = load_boston() # < 1.2\n",
"data = pd.DataFrame(boston.data)\n",
"data.columns = boston.feature_names\n",
"data['PRICE'] = boston.target\n",
"print(data.head())\n",
"\n",
"# replace dataset\n",
"# california = fetch_california_housing()\n",
"# data = pd.DataFrame(california.data)\n",
"# data.columns = california.feature_names\n",
"# data['PRICE'] = california.target\n",
"# print(data.head())\n",
"\n",
"X, y = data.iloc[:,:-1],data.iloc[:,-1]\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)\n",
"bag = BaggingRegressor(base_estimator = DecisionTreeRegressor( ), n_estimators = 10,\n",
"max_features=1.0, bootstrap_features=False, random_state=0)\n",
"bag.fit(X_train,y_train)\n",
"preds = bag.predict(X_test)\n",
"rmse = np.sqrt(mean_squared_error(y_test, preds))\n",
"print(\"RMSE: %f\" % (rmse))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### [실습] AdaBoost - 회귀"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from sklearn.ensemble import AdaBoostRegressor"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAADAIUlEQVR4nOydd3gU1d6A382mdwIhCSQkCKFD6E0jiaAURTAgCCiCBb2KgGK9VwThu1dFpdguolfxKkWEgF4FFTARRAREAkiTklCTUAIJgdTNfH/MzuxsspsCmy3JeZ9nn5w5c2bO2c3uzG9+VSdJkoRAIBAIBAJBPcTN0QsQCAQCgUAgcBRCEBIIBAKBQFBvEYKQQCAQCASCeosQhAQCgUAgENRbhCAkEAgEAoGg3iIEIYFAIBAIBPUWIQgJBAKBQCCotwhBSCAQCAQCQb1FCEICgUAgEAjqLUIQEgjqCUuWLEGn05GRkeHopQichAkTJhATE+PoZQgEDkUIQgKBi/HBBx+g0+no1auX3eacMGECOp1Ofbm7uxMVFcV9993HgQMH7LYOaxw4cIBZs2ZVW8ibNWuW2fvx8PAgJiaGKVOmcPny5Vpdq0AgcC7cHb0AgUBQM5YuXUpMTAw7duzg6NGjtGzZ0i7zenl58fHHHwNQWlrKsWPHWLRoEd9//z0HDhygSZMmdlmHJQ4cOMCrr75KQkJCjTQc//73v/H39+fq1ats2rSJd999lz/++INffvml9hbrRHz00UeUlZU5ehkCgUMRgpBA4EKkp6fz66+/kpyczGOPPcbSpUuZOXOmXeZ2d3fn/vvvN+vr3bs3d911F9999x2PPvqoXdZhS0aOHEmjRo0AeOyxx7jvvvv48ssv2bFjBz179rTbOsrKyiguLsbb29tucwJ4eHjYdT6BwBkRpjGBwIVYunQpDRo04M4772TkyJEsXbrU4rj9+/dz22234ePjQ2RkJP/3f/9n8cn/66+/5s4776RJkyZ4eXnRokUL5syZg8FgqNZ6wsPDAVlI0nL8+HHuvfdeQkJC8PX1pXfv3nz33XcVjj937hwPP/wwYWFheHt7ExcXx2effVZh3IoVK+jWrRsBAQEEBgbSsWNHFi5cCMi+T/feey8AiYmJqrkrNTW1Wu9BS3x8PADHjh0z69++fTuDBg0iKCgIX19f+vXrx9atWyscn5qaSvfu3fH29qZFixZ8+OGHqhlOi06nY/LkySxdupT27dvj5eXF999/D8CZM2d46KGHCAsLw8vLi/bt2/PJJ59UmOvdd9+lffv2+Pr60qBBA7p3786yZcvU/VeuXGHatGnExMTg5eVF48aNuf322/njjz/UMZZ8hK5evcr06dOJiorCy8uL1q1b89ZbbyFJksX3sHbtWjp06KCuVXkfAoGrIDRCAoELsXTpUpKSkvD09GTMmDH8+9//ZufOnfTo0UMdk5WVRWJiIqWlpbz44ov4+fmxePFifHx8KpxvyZIl+Pv788wzz+Dv789PP/3EK6+8Ql5eHm+++WaF8RcuXADAYDBw/PhxXnjhBRo2bMhdd92ljsnOzqZv375cu3aNKVOm0LBhQz777DPuvvtuVq1axT333ANAQUEBCQkJHD16lMmTJ9O8eXO++uorJkyYwOXLl5k6dSoAGzZsYMyYMfTv35833ngDgIMHD7J161amTp3KrbfeypQpU3jnnXf4+9//Ttu2bQHUvzVB8TFq0KCB2vfTTz8xePBgunXrxsyZM3Fzc+PTTz/ltttuY8uWLarmaPfu3QwaNIiIiAheffVVDAYDs2fPJjQ01OJcP/30EytXrmTy5Mk0atSImJgYsrOz6d27typkhIaGsn79eh5++GHy8vKYNm0aIJu0pkyZwsiRI5k6dSqFhYXs3buX7du3M3bsWAAef/xxVq1axeTJk2nXrh0XL17kl19+4eDBg3Tt2tXimiRJ4u677yYlJYWHH36Yzp0788MPP/Dcc89x5swZ5s+fbzb+l19+ITk5mSeeeIKAgADeeecdRowYwcmTJ2nYsGGNP3+BwCFIAoHAJfj9998lQNqwYYMkSZJUVlYmRUZGSlOnTjUbN23aNAmQtm/frvadO3dOCgoKkgApPT1d7b927VqFeR577DHJ19dXKiwsVPsefPBBCajwatq0qbRr1y6L82/ZskXtu3LlitS8eXMpJiZGMhgMkiRJ0oIFCyRA+uKLL9RxxcXFUp8+fSR/f38pLy9PkiRJmjp1qhQYGCiVlpZa/Wy++uorCZBSUlKsjtEyc+ZMCZAOHz4snT9/XsrIyJA++eQTycfHRwoNDZWuXr0qSZL8GcfGxkoDBw6UysrKzD635s2bS7fffrvaN3ToUMnX11c6c+aM2nfkyBHJ3d1dKn+pBSQ3Nzdp//79Zv0PP/ywFBERIV24cMGs/7777pOCgoLU/9ewYcOk9u3bV/oeg4KCpCeffLLSMQ8++KAUHR2tbq9du1YCpP/7v/8zGzdy5EhJp9NJR48eNXsPnp6eZn179uyRAOndd9+tdF6BwJkQpjGBwEVYunQpYWFhJCYmArJpYvTo0axYscLMlLVu3Tp69+5t5uMSGhrKuHHjKpxTqyW6cuUKFy5cID4+nmvXrnHo0CGzsd7e3mzYsIENGzbwww8/8OGHH+Lv78+QIUP466+/zObv2bMnt9xyi9rn7+/PpEmTyMjIUKPM1q1bR3h4OGPGjFHHeXh4MGXKFPLz8/n5558BCA4O5urVq2zYsOG6PrfKaN26NaGhocTExPDQQw/RsmVL1q9fj6+vLwBpaWkcOXKEsWPHcvHiRS5cuMCFCxe4evUq/fv3Z/PmzZSVlWEwGNi4cSPDhw83cxpv2bIlgwcPtjh3v379aNeunbotSRKrV69m6NChSJKkznXhwgUGDhxIbm6uatYKDg7m9OnT7Ny50+p7Cw4OZvv27Zw9e7ban8e6devQ6/VMmTLFrH/69OlIksT69evN+gcMGECLFi3U7U6dOhEYGMjx48erPadA4GiEaUwgcAEMBgMrVqwgMTGR9PR0tb9Xr168/fbbbNq0iTvuuAOAEydOWAytb926dYW+/fv38/LLL/PTTz+Rl5dnti83N9dsW6/XM2DAALO+IUOGEBsby0svvcTq1asrnV8xVZ04cYIOHTpw4sQJYmNjcXNzszoO4IknnmDlypUMHjyYpk2bcscddzBq1CgGDRpk4ZOqGatXryYwMJDz58/zzjvvkJ6ebiYcHjlyBIAHH3zQ6jlyc3MpLCykoKDAYgSftai+5s2bm22fP3+ey5cvs3jxYhYvXmzxmHPnzgHwwgsvsHHjRnr27EnLli254447GDt2LDfffLM6du7cuTz44INERUXRrVs3hgwZwvjx47npppusvpcTJ07QpEkTAgICzPrL/08UmjVrVuEcDRo04NKlS1bnEAicDSEICQQuwE8//URmZiYrVqxgxYoVFfYvXbpUFYSqy+XLl+nXrx+BgYHMnj2bFi1a4O3tzR9//MELL7xQrbDqyMhIWrduzebNm2s0d01o3LgxaWlp/PDDD6xfv57169fz6aefMn78eIuO1TXh1ltvVaPGhg4dSseOHRk3bhy7du3Czc1N/QzefPNNOnfubPEc/v7+FBYW1nju8j5bylz333+/VcGrU6dOgCyYHD58mG+//Zbvv/+e1atX88EHH/DKK6/w6quvAjBq1Cji4+NZs2YNP/74I2+++SZvvPEGycnJVrVUNUWv11vsl8o5VgsEzowQhAQCF2Dp0qU0btyY999/v8K+5ORk1qxZw6JFi/Dx8SE6OlrVZGg5fPiw2XZqaioXL14kOTmZW2+9Ve3XapyqQ2lpKfn5+ep2dHR0hbkA1dQWHR2t/t27dy9lZWVmWqHy4wA8PT0ZOnQoQ4cOpaysjCeeeIIPP/yQGTNm0LJlywpRWdeDv78/M2fOZOLEiaxcuZL77rtPNfsEBgZW0IZpady4Md7e3hw9erTCPkt9lggNDSUgIACDwVDpXAp+fn6MHj2a0aNHU1xcTFJSEv/85z956aWX1DD8iIgInnjiCZ544gnOnTtH165d+ec//2lVEIq
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"rng = np.random.RandomState(1)\n",
"X = np.linspace(0, 6, 100)[:, np.newaxis]\n",
"y = np.sin(X).ravel() + np.sin(6*X).ravel() + rng.normal(0, 0.1, X.shape[0])\n",
"\n",
"regr_1 = DecisionTreeRegressor(max_depth=4)\n",
"regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=100, random_state=rng)\n",
"\n",
"regr_1.fit(X, y)\n",
"regr_2.fit(X, y)\n",
"y_1 = regr_1.predict(X)\n",
"y_2 = regr_2.predict(X)\n",
"\n",
"plt.figure()\n",
"plt.scatter(X, y, c=\"k\", label=\"training samples\")\n",
"plt.plot(X, y_1, c=\"g\", label=\"n_estimators=1\", linewidth=2)\n",
"plt.plot(X, y_2, c=\"r\", label=\"n_estimators=100\", linewidth=2)\n",
"plt.xlabel(\"data\")\n",
"plt.ylabel(\"target\")\n",
"plt.title(\"AdaBoost Regression\")\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### [실습] Gradient Boosting 기반 회귀"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn import datasets\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn import ensemble\n",
"from sklearn.metrics import mean_squared_error, r2_score\n",
"from sklearn.model_selection import cross_val_predict"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(506, 13) (506,)\n",
"['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'\n",
" 'B' 'LSTAT']\n",
" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \n",
"0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \\\n",
"1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n",
"2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n",
"3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n",
"4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n",
"\n",
" PTRATIO B LSTAT MEDV \n",
"0 15.3 396.90 4.98 24.0 \n",
"1 17.8 396.90 9.14 21.6 \n",
"2 17.8 392.83 4.03 34.7 \n",
"3 18.7 394.63 2.94 33.4 \n",
"4 18.7 396.90 5.33 36.2 \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\sklearn\\utils\\deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.\n",
"\n",
" The Boston housing prices dataset has an ethical problem. You can refer to\n",
" the documentation of this function for further details.\n",
"\n",
" The scikit-learn maintainers therefore strongly discourage the use of this\n",
" dataset unless the purpose of the code is to study and educate about\n",
" ethical issues in data science and machine learning.\n",
"\n",
" In this special case, you can fetch the dataset from the original\n",
" source::\n",
"\n",
" import pandas as pd\n",
" import numpy as np\n",
"\n",
" data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n",
" raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n",
" data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n",
" target = raw_df.values[1::2, 2]\n",
"\n",
" Alternative datasets include the California housing dataset (i.e.\n",
" :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n",
" dataset. You can load the datasets as follows::\n",
"\n",
" from sklearn.datasets import fetch_california_housing\n",
" housing = fetch_california_housing()\n",
"\n",
" for the California housing dataset and::\n",
"\n",
" from sklearn.datasets import fetch_openml\n",
" housing = fetch_openml(name=\"house_prices\", as_frame=True)\n",
"\n",
" for the Ames housing dataset.\n",
" warnings.warn(msg, category=FutureWarning)\n"
]
}
],
"source": [
"boston = datasets.load_boston() # Boston 집값 데이터, 13개 속성, 마지막 중간값 정보\n",
"print(boston.data.shape, boston.target.shape)\n",
"print(boston.feature_names)\n",
"\n",
"data = pd.DataFrame(boston.data, columns=boston.feature_names)\n",
"data = pd.concat([data, pd.Series(boston.target, name='MEDV')], axis=1)\n",
"print(data.head())\n",
"X = data.iloc[:,:-1]\n",
"y = data.iloc[:,-1]\n",
"x_training_set, x_test_set, y_training_set, y_test_set = train_test_split(X, y, test_size=0.10, random_state=42, shuffle=True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\sklearn\\ensemble\\_gb.py:294: FutureWarning: The loss 'ls' was deprecated in v1.0 and will be removed in version 1.2. Use 'squared_error' which is equivalent.\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"R2 sq: 0.9800347273281852\n",
"Mean squared error: 5.88\n",
"Test Variance score: 0.91\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAHHCAYAAACle7JuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABlDElEQVR4nO3dd1yVZf8H8M9hI1OGgAooOHDvLYpimisVCFDJnWk4wPEz03Jkae7MWZlaKjFEe8pKy0RxgApiZS5IEWU7GCLrcP/+ME4eOYd5OAM+79eL1/Nw39e5zxdu7Xy87muIBEEQQERERKSBtFRdABEREVF1McgQERGRxmKQISIiIo3FIENEREQai0GGiIiINBaDDBEREWksBhkiIiLSWAwyREREpLEYZIiIiEhjMcgQ1VMikQgrV65UdRnlmjJlCoyNjVVdhtrav38/RCIR7t27Jznm5uYGNzc3ldX0Klk1EikSgwxROe7evYs5c+agVatWaNCgARo0aIC2bdvC398ff/zxh6rLq1Vubm4QiUQVftU0DOXl5WHlypWIiIhQSN3K1KxZM6nfRaNGjeDq6oqjR4+qurQq0eR7QKSj6gKI1NWPP/4IHx8f6OjoYOLEiejUqRO0tLRw8+ZNhIeHY9euXbh79y4cHR1VXWqtWLZsGWbMmCH5/vLly9i2bRvef/99tGnTRnK8Y8eONXqfvLw8rFq1CgDUqiehsjp37oyFCxcCAJKTk7Fnzx54eHhg165dmDVrltLrOXnyZJVfo+n3gOo3BhkiGRISEuDr6wtHR0ecOnUKdnZ2Uuc//fRT7Ny5E1pa5XdqPnv2DEZGRrVZaq157bXXpL43MDDAtm3b8Nprr5X7YafJP3N1NGnSBH5+fpLvJ02ahBYtWmDLli1yg0xxcTFKSkqgp6en8Hpq45pE6oyPlohkWL9+PZ49e4Z9+/aVCTEAoKOjg3nz5sHe3l5yrHQ8R0JCAkaMGAETExNMnDgRwIsP94ULF8Le3h76+vpo3bo1Nm7ciJc3n7937x5EIhH2799f5v1efYSzcuVKiEQixMfHY8qUKTA3N4eZmRmmTp2KvLw8qdcWFBQgMDAQ1tbWMDExwRtvvIEHDx7U8DckXcfff/+NCRMmoGHDhujfvz8A+WM1pkyZgmbNmkl+ZmtrawDAqlWr5D6uevjwIcaOHQtjY2NYW1tj0aJFEIvF5dY2atQoODk5yTzXp08fdO/eXfL9r7/+iv79+8Pc3BzGxsZo3bo13n///Ur+FqTZ2tqiTZs2uHv3ruRnFIlE2LhxI7Zu3QpnZ2fo6+vj77//BgDcvHkTXl5esLCwgIGBAbp3747//e9/Za57/fp1DB48GIaGhmjatCnWrFmDkpKSMu1k/d7z8/OxcuVKtGrVCgYGBrCzs4OHhwcSEhIqdQ8UXSORIrFHhkiGH3/8ES1atECvXr2q9Lri4mIMGzYM/fv3x8aNG9GgQQMIgoA33ngDp0+fxvTp09G5c2ecOHECixcvxsOHD7Fly5Zq1+nt7Y3mzZtj7dq1iI2NxVdffYVGjRrh008/lbSZMWMGDh48iAkTJqBv3774/fffMXLkyGq/pyxvvvkmWrZsiU8++UQqnFXE2toau3btwuzZszFu3Dh4eHgAkH5cJRaLMWzYMPTq1QsbN27Eb7/9hk2bNsHZ2RmzZ8+We20fHx9MmjQJly9fRo8ePSTHExMTERUVhQ0bNgB48eE7atQodOzYEatXr4a+vj7i4+Nx/vz5qv4aAABFRUVISkqCpaWl1PF9+/YhPz8fM2fOhL6+PiwsLHD9+nX069cPTZo0wXvvvQcjIyOEhIRg7NixOHLkCMaNGwcASE1NxaBBg1BcXCxp98UXX8DQ0LDCesRiMUaNGoVTp07B19cX8+fPR05ODn799Vf89ddfGDJkSLn3QBk1EtWIQERSsrKyBADC2LFjy5x78uSJkJGRIfnKy8uTnJs8ebIAQHjvvfekXnPs2DEBgLBmzRqp415eXoJIJBLi4+MFQRCEu3fvCgCEffv2lXlfAMKKFSsk369YsUIAIEybNk2q3bhx4wRLS0vJ93FxcQIA4d1335VqN2HChDLXrEhoaKgAQDh9+nSZOsaPH1+m/cCBA4WBAweWOT558mTB0dFR8n1GRobcWkp/p6tXr5Y63qVLF6Fbt27l1puVlSXo6+sLCxculDq+fv16QSQSCYmJiYIgCMKWLVsEAEJGRka515PF0dFRGDp0qOTPw7Vr1wRfX18BgDB37lxBEP67r6ampkJ6errU693d3YUOHToI+fn5kmMlJSVC3759hZYtW0qOBQQECACE6OhoybH09HTBzMxMACDcvXtXcvzV3/vXX38tABA2b95cpv6SkhJBEMq/B7VRI5Ei8dES0Suys7MBQOa0Xzc3N1hbW0u+duzYUabNq70EP/30E7S1tTFv3jyp4wsXLoQgCPj555+rXeurYzBcXV3x6NEjyc/w008/AUCZ9w4ICKj2e1amDkWT9XP+888/5b7G1NQUw4cPR0hIiFQvUXBwMHr37g0HBwcAgLm5OQDg+++/r9ZjkJMnT0r+PHTq1AmhoaF46623pHrFAMDT01PyCAcAHj9+jN9//x3e3t7IyclBZmYmMjMz8ejRIwwbNgx37tzBw4cPAby4j71790bPnj0lr7e2tpY8uizPkSNHYGVlhblz55Y5JxKJyn2tsmokqgkGGaJXmJiYAAByc3PLnNuzZw9+/fVXHDx4UOZrdXR00LRpU6ljiYmJaNy4seS6pUpn/iQmJla71tIP41INGzYEADx58kRybS0tLTg7O0u1a926dbXfU5bmzZsr9HovMzAwkAoAwIufs/RnLI+Pjw+SkpJw8eJFAC8GccfExMDHx0eqTb9+/TBjxgzY2NjA19cXISEhlQ41vXr1wq+//orffvsNFy5cQGZmJr755psyj1Re/R3Fx8dDEAR88MEHUuHY2toaK1asAACkp6cDeHEfW7ZsWea9K3MfExIS0Lp1a+joVH0kgbJqJKoJjpEheoWZmRns7Ozw119/lTlXOmZG3uJe+vr6Fc5kkkfev47LG9Sqra0t87hQhXEqiiBrHIRIJJJZR0WDdF8l72esjNGjR6NBgwYICQlB3759ERISAi0tLbz55puSNoaGhjh79ixOnz6N48eP45dffkFwcDAGDx6MkydPVvj+VlZWGDJkSIW1vPo7Kg1KixYtwrBhw2S+pkWLFhVetzZpQo1EDDJEMowcORJfffUVLl26JNVVXh2Ojo747bffkJOTI9Urc/PmTcl54L/elKdPn0q9viY9No6OjigpKZH8q7zUrVu3qn3NymrYsKHMxz+v/jwVPd6oCSMjI4waNQqhoaHYvHkzgoOD4erqisaNG0u109LSgru7O9zd3bF582Z88sknWLZsGU6fPl2pkFIdpTOqdHV1K3wPR0dH3Llzp8zxytxHZ2dnREdHo6ioCLq6ujLbyLsHyqqRqCb4aIlIhv/7v/9DgwYNMG3aNKSlpZU5X5UejxEjRkAsFmP79u1Sx7ds2QKRSIThw4cDeDGmw8rKCmfPnpVqt3Pnzmr8BC+UXnvbtm1Sx7du3Vrta1aWs7Mzbt68iYyMDMmxa9eulZkN1KBBAwBlA5yi+Pj4IDk5GV999RWuXbsm9VgJeDEO5FWdO3cG8GLqem1p1KgR3NzcsGfPHqSkpJQ5//LvbcSIEYiKisKlS5ekzh86dKjC9/H09ERmZmaZP3/Af3+O5d0DZdVIVBPskSGSoWXLljh8+DDGjx+P1q1bS1b2FQQBd+/exeHDh6GlpVVmPIwso0ePxqBBg7Bs2TLcu3cPnTp1wsmTJ/H9998jICBAavzKjBkzsG7dOsyYMQPdu3fH2bNncfv27Wr/HJ07d8b48eOxc+dOZGVloW/fvjh16hTi4+Orfc3KmjZtGjZv3oxhw4Zh+vTpSE9Px+7du9GuXTvJYGTgxSOXtm3bIjg4GK1atYKFhQXat2+P9u3bK6SO0jV9Fi1aBG1tbXh6ekqdX716Nc6ePYuRI0fC0dER6enp2LlzJ5o2bSpZE6e27NixA/3790e
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"params = {'n_estimators':500, 'max_depth':4, 'min_samples_split':2, 'learning_rate':0.01, 'loss':'ls'}\n",
"model = ensemble.GradientBoostingRegressor(**params)\n",
"model.fit(x_training_set, y_training_set)\n",
"model_score = model.score(x_training_set, y_training_set)\n",
"print('R2 sq: ', model_score)\n",
"\n",
"y_predicted = model.predict(x_test_set)\n",
"print('Mean squared error: %.2f'% mean_squared_error(y_test_set, y_predicted))\n",
"print('Test Variance score: %.2f' % r2_score(y_test_set, y_predicted))\n",
"\n",
"fig, ax = plt.subplots()\n",
"ax.scatter(y_test_set, y_predicted, edgecolors=(0,0,0))\n",
"ax.plot([y_test_set.min(), y_test_set.max()], [y_test_set.min(), y_test_set.max()], 'k--', lw=4)\n",
"ax.set_xlabel('Actual')\n",
"ax.set_ylabel('Predicted')\n",
"ax.set_title('Ground Truth vs Predicted')\n",
"plt.show()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### [실습] Gradient Boosting 기반 분류"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import make_hastie_10_2\n",
"from sklearn.ensemble import GradientBoostingClassifier\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(12000, 10) (12000,)\n",
"[[ 1.76405235 0.40015721 0.97873798 2.2408932 1.86755799 -0.97727788\n",
" 0.95008842 -0.15135721 -0.10321885 0.4105985 ]\n",
" [ 0.14404357 1.45427351 0.76103773 0.12167502 0.44386323 0.33367433\n",
" 1.49407907 -0.20515826 0.3130677 -0.85409574]\n",
" [-2.55298982 0.6536186 0.8644362 -0.74216502 2.26975462 -1.45436567\n",
" 0.04575852 -0.18718385 1.53277921 1.46935877]\n",
" [ 0.15494743 0.37816252 -0.88778575 -1.98079647 -0.34791215 0.15634897\n",
" 1.23029068 1.20237985 -0.38732682 -0.30230275]\n",
" [-1.04855297 -1.42001794 -1.70627019 1.9507754 -0.50965218 -0.4380743\n",
" -1.25279536 0.77749036 -1.61389785 -0.21274028]]\n",
"[ 1. -1. 1. -1. 1.]\n",
"Accuracy score (training): 0.879\n",
"Accuracy score (testing): 0.819\n"
]
}
],
"source": [
"X, y = make_hastie_10_2(random_state=0)\n",
"X_train, X_test = X[:2000], X[2000:]\n",
"y_train, y_test = y[:2000], y[2000:]\n",
"print(X.shape, y.shape)\n",
"print(X[0:5,:])\n",
"print(y[0:5])\n",
"\n",
"clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0)\n",
"clf.fit(X_train, y_train)\n",
"print('Accuracy score (training): {0:.3f}'.format(clf.score(X_train, y_train)))\n",
"print('Accuracy score (testing): {0:.3f}'.format(clf.score(X_test, y_test)))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### [실습] XGBoosting 기반 회귀"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.datasets import load_boston\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"import xgboost as xgb"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \n",
"0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \\\n",
"1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n",
"2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n",
"3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n",
"4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n",
"\n",
" PTRATIO B LSTAT PRICE \n",
"0 15.3 396.90 4.98 24.0 \n",
"1 17.8 396.90 9.14 21.6 \n",
"2 17.8 392.83 4.03 34.7 \n",
"3 18.7 394.63 2.94 33.4 \n",
"4 18.7 396.90 5.33 36.2 \n",
"RMSE: 10.423243\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\sklearn\\utils\\deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.\n",
"\n",
" The Boston housing prices dataset has an ethical problem. You can refer to\n",
" the documentation of this function for further details.\n",
"\n",
" The scikit-learn maintainers therefore strongly discourage the use of this\n",
" dataset unless the purpose of the code is to study and educate about\n",
" ethical issues in data science and machine learning.\n",
"\n",
" In this special case, you can fetch the dataset from the original\n",
" source::\n",
"\n",
" import pandas as pd\n",
" import numpy as np\n",
"\n",
" data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n",
" raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n",
" data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n",
" target = raw_df.values[1::2, 2]\n",
"\n",
" Alternative datasets include the California housing dataset (i.e.\n",
" :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n",
" dataset. You can load the datasets as follows::\n",
"\n",
" from sklearn.datasets import fetch_california_housing\n",
" housing = fetch_california_housing()\n",
"\n",
" for the California housing dataset and::\n",
"\n",
" from sklearn.datasets import fetch_openml\n",
" housing = fetch_openml(name=\"house_prices\", as_frame=True)\n",
"\n",
" for the Ames housing dataset.\n",
" warnings.warn(msg, category=FutureWarning)\n"
]
}
],
"source": [
"boston = load_boston()\n",
"data = pd.DataFrame(boston.data)\n",
"data.columns = boston.feature_names\n",
"data['PRICE'] = boston.target\n",
"print(data.head())\n",
"X, y = data.iloc[:,:-1], data.iloc[:,-1]\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)\n",
"xg_reg = xgb.XGBRegressor(objective='reg:squarederror', colsample_bytree=0.3, learning_rate=0.1, max_depth=5, alpha=10, n_estimators=10)\n",
"xg_reg.fit(X_train, y_train)\n",
"preds = xg_reg.predict(X_test)\n",
"rmse = np.sqrt(mean_squared_error(y_test, preds))\n",
"print('RMSE: %f' % (rmse))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### [실습] LightGBM"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from lightgbm import LGBMClassifier, LGBMRegressor\n",
"from lightgbm import plot_importance, plot_metric, plot_tree\n",
"from sklearn.datasets import load_iris\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.model_selection import cross_validate"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
" _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
" _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1]\tvalid_0's multi_logloss: 0.95847\n",
"[2]\tvalid_0's multi_logloss: 0.832184\n",
"[3]\tvalid_0's multi_logloss: 0.731164\n",
"[4]\tvalid_0's multi_logloss: 0.641056\n",
"[5]\tvalid_0's multi_logloss: 0.571726\n",
"[6]\tvalid_0's multi_logloss: 0.507286\n",
"[7]\tvalid_0's multi_logloss: 0.454933\n",
"[8]\tvalid_0's multi_logloss: 0.410205\n",
"[9]\tvalid_0's multi_logloss: 0.372194\n",
"[10]\tvalid_0's multi_logloss: 0.333919\n",
"[11]\tvalid_0's multi_logloss: 0.310212\n",
"[12]\tvalid_0's multi_logloss: 0.282326\n",
"[13]\tvalid_0's multi_logloss: 0.257165\n",
"[14]\tvalid_0's multi_logloss: 0.240836\n",
"[15]\tvalid_0's multi_logloss: 0.225383\n",
"[16]\tvalid_0's multi_logloss: 0.211583\n",
"[17]\tvalid_0's multi_logloss: 0.199289\n",
"[18]\tvalid_0's multi_logloss: 0.186269\n",
"[19]\tvalid_0's multi_logloss: 0.171556\n",
"[20]\tvalid_0's multi_logloss: 0.168245\n",
"[21]\tvalid_0's multi_logloss: 0.161065\n",
"[22]\tvalid_0's multi_logloss: 0.151371\n",
"[23]\tvalid_0's multi_logloss: 0.148081\n",
"[24]\tvalid_0's multi_logloss: 0.143843\n",
"[25]\tvalid_0's multi_logloss: 0.140169\n",
"[26]\tvalid_0's multi_logloss: 0.138303\n",
"[27]\tvalid_0's multi_logloss: 0.134058\n",
"[28]\tvalid_0's multi_logloss: 0.130884\n",
"[29]\tvalid_0's multi_logloss: 0.128082\n",
"[30]\tvalid_0's multi_logloss: 0.124975\n",
"[31]\tvalid_0's multi_logloss: 0.122225\n",
"[32]\tvalid_0's multi_logloss: 0.120298\n",
"[33]\tvalid_0's multi_logloss: 0.117257\n",
"[34]\tvalid_0's multi_logloss: 0.115021\n",
"[35]\tvalid_0's multi_logloss: 0.115037\n",
"[36]\tvalid_0's multi_logloss: 0.115831\n",
"[37]\tvalid_0's multi_logloss: 0.113318\n",
"[38]\tvalid_0's multi_logloss: 0.115651\n",
"[39]\tvalid_0's multi_logloss: 0.115772\n",
"[40]\tvalid_0's multi_logloss: 0.114953\n",
"[41]\tvalid_0's multi_logloss: 0.117056\n",
"[42]\tvalid_0's multi_logloss: 0.115412\n",
"[43]\tvalid_0's multi_logloss: 0.118359\n",
"[44]\tvalid_0's multi_logloss: 0.117129\n",
"[45]\tvalid_0's multi_logloss: 0.119174\n",
"[46]\tvalid_0's multi_logloss: 0.117789\n",
"[47]\tvalid_0's multi_logloss: 0.121333\n",
"[48]\tvalid_0's multi_logloss: 0.120375\n",
"[49]\tvalid_0's multi_logloss: 0.124128\n",
"[50]\tvalid_0's multi_logloss: 0.123394\n",
"[51]\tvalid_0's multi_logloss: 0.126631\n",
"[52]\tvalid_0's multi_logloss: 0.129833\n",
"[53]\tvalid_0's multi_logloss: 0.129069\n",
"[54]\tvalid_0's multi_logloss: 0.135166\n",
"[55]\tvalid_0's multi_logloss: 0.134996\n",
"[56]\tvalid_0's multi_logloss: 0.13912\n",
"[57]\tvalid_0's multi_logloss: 0.138818\n",
"[58]\tvalid_0's multi_logloss: 0.142758\n",
"[59]\tvalid_0's multi_logloss: 0.142228\n",
"[60]\tvalid_0's multi_logloss: 0.142928\n",
"[61]\tvalid_0's multi_logloss: 0.142513\n",
"[62]\tvalid_0's multi_logloss: 0.143485\n",
"[63]\tvalid_0's multi_logloss: 0.143408\n",
"[64]\tvalid_0's multi_logloss: 0.148199\n",
"[65]\tvalid_0's multi_logloss: 0.148074\n",
"[66]\tvalid_0's multi_logloss: 0.156199\n",
"[67]\tvalid_0's multi_logloss: 0.15898\n",
"[68]\tvalid_0's multi_logloss: 0.157612\n",
"[69]\tvalid_0's multi_logloss: 0.162526\n",
"[70]\tvalid_0's multi_logloss: 0.166269\n",
"[71]\tvalid_0's multi_logloss: 0.168114\n",
"[72]\tvalid_0's multi_logloss: 0.173203\n",
"[73]\tvalid_0's multi_logloss: 0.181871\n",
"[74]\tvalid_0's multi_logloss: 0.181307\n",
"[75]\tvalid_0's multi_logloss: 0.186251\n",
"[76]\tvalid_0's multi_logloss: 0.185765\n",
"[77]\tvalid_0's multi_logloss: 0.190847\n",
"[78]\tvalid_0's multi_logloss: 0.190228\n",
"[79]\tvalid_0's multi_logloss: 0.195371\n",
"[80]\tvalid_0's multi_logloss: 0.199459\n",
"[81]\tvalid_0's multi_logloss: 0.198517\n",
"[82]\tvalid_0's multi_logloss: 0.203972\n",
"[83]\tvalid_0's multi_logloss: 0.213262\n",
"[84]\tvalid_0's multi_logloss: 0.212185\n",
"[85]\tvalid_0's multi_logloss: 0.217603\n",
"[86]\tvalid_0's multi_logloss: 0.227068\n",
"[87]\tvalid_0's multi_logloss: 0.225914\n",
"[88]\tvalid_0's multi_logloss: 0.230099\n",
"[89]\tvalid_0's multi_logloss: 0.229018\n",
"[90]\tvalid_0's multi_logloss: 0.23464\n",
"[91]\tvalid_0's multi_logloss: 0.24434\n",
"[92]\tvalid_0's multi_logloss: 0.243782\n",
"[93]\tvalid_0's multi_logloss: 0.24814\n",
"[94]\tvalid_0's multi_logloss: 0.25793\n",
"[95]\tvalid_0's multi_logloss: 0.257366\n",
"[96]\tvalid_0's multi_logloss: 0.261762\n",
"[97]\tvalid_0's multi_logloss: 0.260774\n",
"[98]\tvalid_0's multi_logloss: 0.270632\n",
"[99]\tvalid_0's multi_logloss: 0.269316\n",
"[100]\tvalid_0's multi_logloss: 0.269535\n",
"[101]\tvalid_0's multi_logloss: 0.279374\n",
"[102]\tvalid_0's multi_logloss: 0.278105\n",
"[103]\tvalid_0's multi_logloss: 0.279826\n",
"[104]\tvalid_0's multi_logloss: 0.282811\n",
"[105]\tvalid_0's multi_logloss: 0.29269\n",
"[106]\tvalid_0's multi_logloss: 0.297696\n",
"[107]\tvalid_0's multi_logloss: 0.297028\n",
"[108]\tvalid_0's multi_logloss: 0.29694\n",
"[109]\tvalid_0's multi_logloss: 0.30682\n",
"[110]\tvalid_0's multi_logloss: 0.306206\n",
"[111]\tvalid_0's multi_logloss: 0.303895\n",
"[112]\tvalid_0's multi_logloss: 0.300907\n",
"[113]\tvalid_0's multi_logloss: 0.304274\n",
"[114]\tvalid_0's multi_logloss: 0.314218\n",
"[115]\tvalid_0's multi_logloss: 0.312988\n",
"[116]\tvalid_0's multi_logloss: 0.317589\n",
"[117]\tvalid_0's multi_logloss: 0.323073\n",
"[118]\tvalid_0's multi_logloss: 0.333026\n",
"[119]\tvalid_0's multi_logloss: 0.332652\n",
"[120]\tvalid_0's multi_logloss: 0.337212\n",
"[121]\tvalid_0's multi_logloss: 0.334481\n",
"[122]\tvalid_0's multi_logloss: 0.340022\n",
"[123]\tvalid_0's multi_logloss: 0.350061\n",
"[124]\tvalid_0's multi_logloss: 0.351676\n",
"[125]\tvalid_0's multi_logloss: 0.348515\n",
"[126]\tvalid_0's multi_logloss: 0.358595\n",
"[127]\tvalid_0's multi_logloss: 0.356737\n",
"[128]\tvalid_0's multi_logloss: 0.351512\n",
"[129]\tvalid_0's multi_logloss: 0.361591\n",
"[130]\tvalid_0's multi_logloss: 0.35978\n",
"[131]\tvalid_0's multi_logloss: 0.357317\n",
"[132]\tvalid_0's multi_logloss: 0.367439\n",
"[133]\tvalid_0's multi_logloss: 0.365665\n",
"[134]\tvalid_0's multi_logloss: 0.365745\n",
"[135]\tvalid_0's multi_logloss: 0.375832\n",
"[136]\tvalid_0's multi_logloss: 0.374115\n",
"[137]\tvalid_0's multi_logloss: 0.376748\n",
"avg fit time: 0.5514350891113281 (+/- 0.3701610138582717)\n",
"avg fit time: 0.010002517700195312 (+/- 0.009552237668971902)\n",
"avg fit time: 0.9600000000000002 (+/- 0.04898979485566355)\n"
]
},
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABiWUlEQVR4nO3dd3hUZdoG8Hv6JJn03kgCoRMgJJSAUjSAiChWBKSpKAK7Yj7WFUGaK6iriAVBXRULKFYsNGNoIkhNqKGmAel10jPlfH+EjISEZBKSnMzM/buuXLtz5pwzzzwg3Lzve86RCIIggIiIiMhKSMUugIiIiKglMdwQERGRVWG4ISIiIqvCcENERERWheGGiIiIrArDDREREVkVhhsiIiKyKgw3REREZFUYboiIiMiqMNwQEdavXw+JRIKUlJRW+4ylS5dCIpFYzHnFlpKSAolEgvXr1zfreIlEgqVLl7ZoTUSWguGGqA3VhAiJRIJ9+/bVeV8QBAQGBkIikeCee+5p1me8//77zf4LkZpm48aNWL16tdhlENENGG6IRKBWq7Fx48Y62/fs2YMrV65ApVI1+9zNCTdTpkxBeXk5goKCmv25Ylm0aBHKy8tF+ezWDDdBQUEoLy/HlClTmnV8eXk5Fi1a1MJVEVkGhhsiEdx999349ttvodfra23fuHEjIiIi4OPj0yZ1lJaWAgBkMhnUarVFTe/U1C6Xy6FWq0WupnEVFRUwGo1m7y+RSKBWqyGTyZr1eWq1GnK5vFnHElk6hhsiEUycOBF5eXmIjY01bauqqsJ3332HSZMm1XuM0WjE6tWr0bNnT6jVanh7e+Ppp59GQUGBaZ/g4GCcPn0ae/bsMU1/DR8+HMDfU2J79uzB7Nmz4eXlhYCAgFrv3bjmZtu2bRg2bBgcHR3h5OSE/v371zvidKN9+/ahf//+UKvV6NSpEz744IM6+zS0puTG9SI162rOnDmDSZMmwdXVFbfddlut9248fu7cudi8eTN69eoFlUqFnj17Yvv27XU+a/fu3YiMjKxVqznreIYPH44tW7YgNTXV1Ovg4GDTOSUSCb7++mssWrQI/v7+sLe3h1arRX5+PubPn4+wsDBoNBo4OTlhzJgxOH78eKP9mT59OjQaDa5evYrx48dDo9HA09MT8+fPh8FgMKuHFy9exPTp0+Hi4gJnZ2fMmDEDZWVltY4tLy/HP//5T3h4eMDR0RH33nsvrl69ynU8ZDEY64lEEBwcjKioKHz11VcYM2YMgOogUVRUhEcffRTvvPNOnWOefvpprF+/HjNmzMA///lPJCcn47333kN8fDz+/PNPKBQKrF69Gv/4xz+g0WiwcOFCAIC3t3et88yePRuenp5YvHixafSjPuvXr8fjjz+Onj17YsGCBXBxcUF8fDy2b99+0wAGACdPnsSoUaPg6emJpUuXQq/XY8mSJXXqaI6HH34YnTt3xooVKyAIQoP77tu3Dz/88ANmz54NR0dHvPPOO3jwwQeRlpYGd3d3AEB8fDzuuusu+Pr6YtmyZTAYDFi+fDk8PT0brWXhwoUoKirClStX8NZbbwEANBpNrX1efvllKJVKzJ8/H5WVlVAqlThz5gw2b96Mhx9+GCEhIcjKysIHH3yAYcOG4cyZM/Dz82vwcw0GA0aPHo2BAwfijTfewO+//44333wTnTp1wjPPPNNo3Y888ghCQkKwcuVKHDt2DP/73//g5eWF1157zbTP9OnT8c0332DKlCkYNGgQ9uzZg7FjxzZ6bqJ2QyCiNvPpp58KAITDhw8L7733nuDo6CiUlZUJgiAIDz/8sDBixAhBEAQhKChIGDt2rOm4P/74QwAgbNiwodb5tm/fXmd7z549hWHDht30s2+77TZBr9fX+15ycrIgCIJQWFgoODo6CgMHDhTKy8tr7Ws0Ghv8juPHjxfUarWQmppq2nbmzBlBJpMJ1/+Rk5ycLAAQPv300zrnACAsWbLE9HrJkiUCAGHixIl19q1578bjlUqlcPHiRdO248ePCwCEd99917Rt3Lhxgr29vXD16lXTtgsXLghyubzOOeszduxYISgoqM72Xbt2CQCEjh07mn59a1RUVAgGg6HWtuTkZEGlUgnLly+vte3G/kybNk0AUGs/QRCE8PBwISIiok4P6uvh448/Xmu/+++/X3B3dze9Pnr0qABAmDdvXq39pk+fXuecRO0Vp6WIRPLII4+gvLwcv/76K4qLi/Hrr7/edETk22+/hbOzM0aOHInc3FzTT0REBDQaDXbt2mX2586cObPRdRyxsbEoLi7GCy+8UGc9S0PTNQaDATt27MD48ePRoUMH0/bu3btj9OjRZtd4M7NmzTJ73+joaHTq1Mn0unfv3nByckJSUpKp1t9//x3jx4+vNVoSGhpqGk27VdOmTYOdnV2tbSqVClKp1FRDXl4eNBoNunbtimPHjpl13hv7cPvtt5u+V3OOzcvLg1arBQDT1N3s2bNr7fePf/zDrPMTtQecliISiaenJ6Kjo7Fx40aUlZXBYDDgoYceqnffCxcuoKioCF5eXvW+n52dbfbnhoSENLrPpUuXAAC9evUy+7wAkJOTg/LycnTu3LnOe127dsXWrVubdL4bmVN7jevDVQ1XV1fTGqXs7GyUl5cjNDS0zn71bWuO+uo1Go14++238f777yM5ObnWWpma6bKGqNXqOtNm13+vxtzYF1dXVwBAQUEBnJyckJqaCqlUWqf2luoJUVtguCES0aRJkzBz5kxkZmZizJgxcHFxqXc/o9EILy8vbNiwod73zVkjUuPGkQSx3GwE6MaFsddrSu03G50SGlmr05Lqq3fFihV46aWX8Pjjj+Pll1+Gm5sbpFIp5s2bZ9bVVM29eqqx49uyL0StjeGGSET3338/nn76afz111/YtGnTTffr1KkTfv/9dwwZMqTRv+Bb4nLumumcU6dONelf7J6enrCzs8OFCxfqvHfu3Llar2tGDAoLC2ttT01NbWK1zePl5QW1Wo2LFy/Wea++bfVpTq+/++47jBgxAh9//HGt7YWFhfDw8Gjy+VpaUFAQjEYjkpOTa43AmdsTovaAa26IRKTRaLB27VosXboU48aNu+l+jzzyCAwGA15++eU67+n1+loBwcHBoU5gaKpRo0bB0dERK1euREVFRa33GvoXvkwmw+jRo7F582akpaWZticmJmLHjh219nVycoKHhwf27t1ba/v7779/S7WbSyaTITo6Gps3b0Z6erpp+8WLF7Ft2zazzuHg4ICioqImf+6NPfz2229x9erVJp2ntdSsjbrx1+Hdd98VoxyiZuHIDZHIpk2b1ug+w4YNw9NPP42VK1ciISEBo0aNgkKhwIULF/Dtt9/i7bffNq3XiYiIwNq1a/Gf//wHoaGh8PLywh133NGkmpycnPDWW2/hySefRP/+/U33ljl+/DjKysrw2Wef3fTYZcuWYfv27bj99tsxe/Zs6PV6vPvuu+jZsydOnDhRa98nn3wSr776Kp588klERkZi7969OH/+fJNqvRVLly7Fb7/9hiFDhuCZZ56BwWDAe++9h169eiEhIaHR4yMiIrBp0ybExMSgf//+0Gg0DYZUALjnnnuwfPlyzJgxA4MHD8bJkyexYcMGdOzYsYW+1a2JiIjAgw8+iNWrVyMvL890KXjNr4sl3eiRbBfDDZGFWLduHSIiIvDBBx/gxRdfhFwuR3BwMB577DEMGTLEtN/ixYuRmpqK119/HcXFxRg2bFiTww0APPHEE/Dy8sKrr76Kl19+GQqFAt26dcNzzz3X4HG9e/fGjh07EBMTg8WLFyMgIADLli1DRkZGnXCzePFi5OTk4LvvvsM333yDMWPGYNu2bTddON3SIiIisG3bNsyfPx8vvfQSAgMDsXz5ciQmJuLs2bONHj979mwkJCTg008/xVtvvYWgoKBGw82LL76I0tJSbNy4EZs2bUK/fv2wZcsWvPDCCy31tW7Z559/Dh8fH3z11Vf48ccfER0djU2bNqFr164WcTdoIonAVWRERLWMHz8ep0+frnftkK1KSEhAeHg4vvzyS0yePFnscog
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA30AAAPxCAYAAABO+MQaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABeq0lEQVR4nO3de5yWc/748fc9NU0qmUpJNp1kKxWtylZsDlFytsuyUcTuIpbCD19byinEZpfUd9lvfR3XHizWIZLz+Zh1SCuKlsgiUStTc/3+8Oj+GhXTNDO3Pj2fj8c8dF/Xdd/3537PPDSvrvuQy7IsCwAAAJJUVOgFAAAAUHNEHwAAQMJEHwAAQMJEHwAAQMJEHwAAQMJEHwAAQMJEHwAAQMJEHwAAQMJEHwAAQMJEHwAU2LRp0yKXy8X8+fMLvRQAEiT6AKh1qyJnTV9nnnlmjdzn448/HmPHjo3FixfXyO1vzJYtWxZjx46NBx98sNBLAWAN6hZ6AQBsvM4999xo165dhW1du3atkft6/PHHY9y4cXHUUUdFaWlpjdxHVR155JFx2GGHRUlJSaGXUiXLli2LcePGRUTErrvuWtjFALAa0QdAwey9997Rs2fPQi9jvSxdujQaNmy4XrdRp06dqFOnTjWtqPaUl5fHF198UehlAPAtPL0TgO+su+++O3bZZZdo2LBhbLrpprHPPvvEK6+8UuGYf/zjH3HUUUdF+/bto379+tGyZcsYPnx4fPjhh/ljxo4dG6effnpERLRr1y7/VNL58+fH/PnzI5fLxbRp01a7/1wuF2PHjq1wO7lcLl599dX42c9+Fk2aNImdd945v//666+PHXfcMTbZZJNo2rRpHHbYYbFgwYJvfZxrek1f27ZtY999940HH3wwevbsGZtsskl069Yt/xTKW265Jbp16xb169ePHXfcMV544YUKt3nUUUdFo0aN4s0334yBAwdGw4YNo1WrVnHuuedGlmUVjl26dGmceuqp0bp16ygpKYnvf//7cemll652XC6XixNPPDFuuOGG2G677aKkpCSmTJkSzZs3j4iIcePG5We7am6V+f58dbZz587Nn43dbLPN4uijj45ly5atNrPrr78+evfuHQ0aNIgmTZrEj370o7j33nsrHFOZnx+AjYEzfQAUzCeffBL//ve/K2zbfPPNIyLiuuuui2HDhsXAgQPj4osvjmXLlsXkyZNj5513jhdeeCHatm0bEREzZsyIN998M44++uho2bJlvPLKK/H73/8+XnnllXjyyScjl8vFwQcfHP/85z/jpptuiokTJ+bvo3nz5vHBBx+s87oPOeSQ6NixY1x44YX5MLrgggti9OjRceihh8axxx4bH3zwQVxxxRXxox/9KF544YUqPaV07ty58bOf/Sx++ctfxhFHHBGXXnpp7LfffjFlypT4r//6rzjhhBMiImL8+PFx6KGHxpw5c6Ko6P/+PXflypUxaNCg+OEPfxiXXHJJTJ8+Pc4555xYsWJFnHvuuRERkWVZ7L///vHAAw/EMcccEzvssEPcc889cfrpp8c777wTEydOrLCm+++/P/70pz/FiSeeGJtvvnlsv/32MXny5Dj++OPjoIMOioMPPjgiIrp37x4Rlfv+fNWhhx4a7dq1i/Hjx8fzzz8f11xzTbRo0SIuvvji/DHjxo2LsWPHRt++fePcc8+NevXqxVNPPRX3339/7LXXXhFR+Z8fgI1CBgC1bOrUqVlErPEry7Ls008/zUpLS7Of//znFa733nvvZZtttlmF7cuWLVvt9m+66aYsIrKHH344v23ChAlZRGTz5s2rcOy8efOyiMimTp262u1ERHbOOefkL59zzjlZRGSHH354hePmz5+f1alTJ7vgggsqbH/ppZeyunXrrrZ9bfP46tratGmTRUT2+OOP57fdc889WURkm2yySfbWW2/lt//3f/93FhHZAw88kN82bNiwLCKyk046Kb+tvLw822effbJ69eplH3zwQZZlWXbrrbdmEZGdf/75Fdb0k5/8JMvlctncuXMrzKOoqCh75ZVXKhz7wQcfrDarVSr7/Vk12+HDh1c49qCDDsqaNWuWv/z6669nRUVF2UEHHZStXLmywrHl5eVZlq3bzw/AxsDTOwEomEmTJsWMGTMqfEV8eXZo8eLFcfjhh8e///3v/FedOnVip512igceeCB/G5tsskn+z59//nn8+9//jh/+8IcREfH888/XyLqPO+64CpdvueWWKC8vj0MPPbTCelu2bBkdO3assN510aVLl+jTp0/+8k477RQREbvvvntsvfXWq21/8803V7uNE088Mf/nVU/P/OKLL+K+++6LiIi77ror6tSpE7/61a8qXO/UU0+NLMvi7rvvrrC9f//+0aVLl0o/hnX9/nx9trvsskt8+OGHsWTJkoiIuPXWW6O8vDzGjBlT4azmqscXsW4/PwAbA0/vBKBgevfuvcY3cnn99dcj4su4WZPGjRvn//zRRx/FuHHj4o9//GMsWrSownGffPJJNa72/3z9HUdff/31yLIsOnbsuMbji4uLq3Q/Xw27iIjNNtssIiJat269xu0ff/xxhe1FRUXRvn37Ctu23XbbiIj86wffeuutaNWqVWy66aYVjuvcuXN+/1d9/bF/m3X9/nz9MTdp0iQivnxsjRs3jjfeeCOKioq+MTzX5ecHYGMg+gD4zikvL4+IL1+X1bJly9X21637f399HXroofH444/H6aefHjvssEM0atQoysvLY9CgQfnb+SZff03ZKitXrlzrdb569mrVenO5XNx9991rfBfORo0afes61mRt7+i5tu3Z1954pSZ8/bF/m3X9/lTHY1uXnx+AjYH/6wHwndOhQ4eIiGjRokUMGDBgrcd9/PHHMXPmzBg3blyMGTMmv33VmZ6vWlvcrTqT9PUPbf/6Ga5vW2+WZdGuXbv8mbTvgvLy8njzzTcrrOmf//xnRET+jUzatGkT9913X3z66acVzva99tpr+f3fZm2zXZfvT2V16NAhysvL49VXX40ddthhrcdEfPvPD8DGwmv6APjOGThwYDRu3DguvPDCKCsrW23/qnfcXHVW6OtngS6//PLVrrPqs/S+HneNGzeOzTffPB5++OEK26+66qpKr/fggw+OOnXqxLhx41ZbS5Zlq308QW268sorK6zlyiuvjOLi4thjjz0iImLw4MGxcuXKCsdFREycODFyuVzsvffe33ofDRo0iIjVZ7su35/KOvDAA6OoqCjOPffc1c4Urrqfyv78AGwsnOkD4DuncePGMXny5DjyyCPjBz/4QRx22GHRvHnzePvtt+POO++Mfv36xZVXXhmNGzeOH/3oR3HJJZdEWVlZbLXVVnHvvffGvHnzVrvNHXfcMSIizj777DjssMOiuLg49ttvv2jYsGEce+yxcdFFF8Wxxx4bPXv2jIcffjh/RqwyOnToEOeff36cddZZMX/+/DjwwANj0003jXnz5sXf/va3+MUvfhGnnXZatc2nsurXrx/Tp0+PYcOGxU477RR333133HnnnfFf//Vf+c/W22+//WK33XaLs88+O+bPnx/bb7993HvvvXHbbbfFKaeckj9r9k022WST6NKlS9x8882x7bbbRtOmTaNr167RtWvXSn9/KmubbbaJs88+O84777zYZZdd4uCDD46SkpJ45plnolWrVjF+/PhK//wAbCxEHwDfST/72c+iVatWcdFFF8WECRNi+fLlsdVWW8Uuu+wSRx99dP64G2+8MU466aSYNGlSZFkWe+21V9x9993RqlWrCrfXq1evOO+882LKlCkxffr0KC8vj3nz5kXDhg1jzJgx8cEHH8Rf/vKX+NOf/hR777133H333dGiRYtKr/fMM8+MbbfdNiZOnBjjxo2LiC/fcGWvvfaK/fffv3qGso7q1KkT06dPj+OPPz5OP/302HTTTeOcc86p8FTLoqKiuP3222PMmDFx8803x9SpU6Nt27YxYcKEOPXUUyt9X9dcc02cdNJJMXLkyPjiiy/inHPOia5du1b6+7Muzj333GjXrl1cccUVcfbZZ0eDBg2ie/f
"text/plain": [
"<Figure size 1000x1200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAACI4AAAH9CAYAAAB2/6WWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAACFPklEQVR4nOzde7xVdZ0//s/hIje5eAMBBQID73gJBUtR0FFLNC1uWjoVMDjlpAOFF7AScmrC6jtdIDDLaATBMmHSaoBJS0UDFS8ZJCioIIgKKoginN8f85tP67M6e7M5nCv7+fzr9X68917rszd777Mvi/WuqKysrAwAAAAAAAAAAJSdJvW9AAAAAAAAAAAA6ocDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAylSz+l4AAAAAsG948803k/qdd96JeevWrUlvy5YtMe/atSvpZS/73nvv1chadu7cWa3ttG7dOqlbtGhRre20bds25mbN0q9jWrVqVWUOIYQOHToU7LVs2bJaawEAAADIcsYRAAAAAAAAAIAy5cARAAAAAAAAAIAy5cARAAAAAAAAAIAy1Wz3FwEAAABq2yuvvBLzunXrCvZee+21pJetX3/99YK9fL1p06aCvTfffDPpZet33nkn6W3dujVQPyoqKmLu0KFD0mvdunXMbdq0SXoHHXRQzAceeGDBXjbv7noHH3xwUh9yyCExH3rooUmvW7duVa4TAAAAqB/OOAIAAAAAAAAAUKYcOAIAAAAAAAAAUKYqKisrK+t7EQAAAFDf8iNYVq1aVWUOIYTnn38+5jVr1iS99evXx/zSSy8lvWydvVwIIbz33nslrbNFixZJXepokRDS8SL50SLZul27dkmvffv2Mbds2TLpZceg5K+Xvez++++f9Nq2bRtzs2bpJN3sbazuKJP89fL3W6nyY3t27txZre288cYbBXvZcT/bt29Pelu2bIl527ZtSS972c2bNye97OP57bffTnrZsUTVHX1U7Hp7Ij9ip2vXrjEfdthhSa9Lly4xH3744UkvWx9xxBFJr1evXgW3mR33AwAAAOXKGUcAAAAAAAAAAMqUA0cAAAAAAAAAAMqUA0cAAAAAAAAAAMpURWVlZWV9LwIAAAB2J/vxdfXq1UnvqaeeivnPf/5z0lu1alXMzz33XMHeyy+/XPJaunTpEnO3bt0K9g477LCk17Vr1yovF0IIhx9+eMFetm7Tpk3J64S6smvXrqR+9dVXY37llVeS3osvvhhz/nm3bt26Ki8XQgjr168v2FuzZk3Mb7/9dsF1tmjRIql79uwZ8xFHHJH0evXqFXPv3r2T3jHHHBPz8ccfn/Q6dOhQcP8AAADQEDnjCAAAAAAAAABAmXLgCAAAAAAAAABAmTKqBgAAgFr11ltvxfzYY48lvSeffDLm7LiZfC+EEJ555pmY86MoKioqYu7Ro0fSy46fyI6e2F0vW+dHWLRq1SoADdOGDRuSOjuiqti4qmK9lStXJr3NmzcX3H927NRxxx2X9IrVJ5xwQtI76qijYm7SxP/9AgAAoPb41AkAAAAAAAAAUKYcOAIAAAAAAAAAUKYcOAIAAAAAAAAAUKYqKisrK+t7EQAAADQ8O3fuTOq//OUvMS9btizpZet879FHH415x44dSa9Dhw4xH3PMMUkvXx999NExn3zyyUmvb9++Mbdt2zYA1KZ169bF/Oc//znpPfPMMzHnXw+LXXb79u1Jb//99485+xoXQvoa+JGPfCTpnXHGGTF36tSp6hsAAAAAGc44AgAAAAAAAABQphw4AgAAAAAAAABQpoyqAQAA2Mft2rUrqZcvXx7zokWLkt7ChQtjfuihh5LeW2+9FXObNm2SXnZswqmnnpr0+vfvH/Mpp5yS9A477LCiawfYl2XHdz355JNJ75FHHqky5+uVK1cmvexXfb169Up6Z511VsyDBw9OeoMGDYq5Y8eOu107AAAA+w5nHAEAAAAAAAAAKFMOHAEAAAAAAAAAKFMOHAEAAAAAAAAAKFMVldnBpwAAADQaL7/8csz33ntv0lu4cGHMixcvTnqbNm2K+ZBDDkl6gwYNivnMM89Mev3794/52GOPTXrNmjUrcdUA1KQ33ngjqR955JGYH3744aS3aNGiKi8XQgg7d+6M+bjjjkt6Z599dsznnHNO0jvrrLNibtGiRanLBgAAoAFxxhEAAAAAAAAAgDLlwBEAAAAAAAAAgDJlVA0AAEAD8vzzzyf1/PnzY543b17Se+ihh2Ju1apV0jvttNNizo4YyNcnnnhi0mvSxP8vACgHW7duTersWJvsuLN8/dhjjyW97N+f7LizEEIYOnRozB//+MeTXrt27fZswQAAANQa3wgCAAAAAAAAAJQpB44AAAAAAAAAAJQpB44AAAAAAAAAAJSpisrKysr6XgQAAMC+bO3atUn9k5/8JKnnzZsX8zPPPJP0Dj744JgvvPDCpHfxxRfHfPbZZye9li1bVm+xAFDEiy++mNS/+tWvYr777ruT3gMPPBBz8+bNk94555wT82WXXZb0Pv7xj8fcokWL6i4VAACAEjnjCAAAAAAAAABAmXLgCAAAAAAAAABAmTKqBgAAoJree++9mO+5556k9+Mf/zjm//7v/056hxxySFIPHz485uz4mRBCOP3002Nu2rRp9RcLAHVs06ZNMc+fPz/p/eIXv4j5t7/9bdLr0KFDzJ/61KeS3qhRo5L62GOP3dtlAgAAlD1nHAEAAAAAAAAAKFMOHAEAAAAAAAAAKFMOHAEAAAAAAAAAKFMVlZWVlfW9CAAAgIZq48aNMX/3u99NejNnzoz5jTfeSHrnnXdezJ/73OeS3gUXXJDUzZs339tlAkCj9fLLLyf17bffHvNtt92W9FatWpXUp556aszjx49PepdccknMTZr4/3MAAACF+MQEAAAAAAAAAFCmHDgCAAAAAAAAAFCmjKoBAADK3tq1a2OeOnVq0vvxj38c8/7775/0vvCFL8T82c9+Nul17dq1JpcIAGUp/9Xl73//+6T+4Q9/GPMvf/nLpNe7d++YJ0yYkPQuu+yymI2MAwAAyp0zjgAAAAAAAAAAlCkHjgAAAAAAAAAAlCkHjgAAAAAAAAAAlKmKyvygUAAAgH3Q66+/HvMNN9yQ9H784x/H3Llz56Q3fvz4mD/3uc8lvdatW9fkEgGAvfCXv/wlqb/5zW/G/J//+Z9Jr0uXLjH/27/9W9IbOXJkLawOAACg4XLGEQAAAAAAAACAMuXAEQAAAAAAAACAMmVUDQAAsM/YtWtXzLfddlvSu+6662Ju3rx50ps8eXLMl19+edLLXxYAaHzWrFmT1Nm//T/5yU+S3sCBA2P+wQ9+kPSOOuqoWlgdAABA/XLGEQAAAAAAAACAMuXAEQAAAAAAAACAMuXAEQAAAAAAAACAMlVRWVlZWd+LAAAAqI5Vq1Yl9ac+9amYly5dmvQ+//nPx3zTTTclvXbt2tXC6gCAxuCRRx5J6ux7hieffDLpXXvttTF/5StfSXpNmzathdUBAADUPmccAQAAAAAAAAAoUw4cAQAAAAAAAAAoU0bVAAAAjcp9990X82WXXZb0evToEfNPf/rTpHf88cfX5rIAgH3Ezp07Y54+fXrSGz9+fMynn3560ps9e3bMBx10UC2tDgAAoOY54wgAAAAAAAAAQJly4AgAAAAAAAAAQJly4AgAAAAAAAAAQJmqqKysrKzvRQAAABTy9a9/PalvvPHGmD/96U8nvWnTpsXcqlWr2l0YAFB2HnvssZg
"text/plain": [
"<Figure size 2800x1400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"iris = load_iris()\n",
"X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=123)\n",
"lgbmc = LGBMClassifier(n_estimators=400)\n",
"evals = [(X_test, y_test)]\n",
"lgbmc.fit(X_train, y_train, early_stopping_rounds=100, eval_metric='logloss', eval_set=evals, verbose=True)\n",
"preds = lgbmc.predict(X_test)\n",
"\n",
"cross_val = cross_validate(\n",
" estimator=lgbmc,\n",
" X=iris.data, y=iris.target,\n",
" cv=5\n",
")\n",
"\n",
"print('avg fit time: {} (+/- {})'.format(cross_val['fit_time'].mean(), cross_val['fit_time'].std()))\n",
"print('avg fit time: {} (+/- {})'.format(cross_val['score_time'].mean(), cross_val['score_time'].std()))\n",
"print('avg fit time: {} (+/- {})'.format(cross_val['test_score'].mean(), cross_val['test_score'].std()))\n",
"\n",
"plot_metric(lgbmc)\n",
"plot_importance(lgbmc, figsize=(10,12))\n",
"plot_tree(lgbmc, figsize=(28,14))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}