You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
920 lines
325 KiB
Plaintext
920 lines
325 KiB
Plaintext
1 year ago
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### [실습] 배깅(bagging, bootstrap aggregating)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import matplotlib as mpl\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"from sklearn.datasets import load_iris\n",
|
||
|
"from sklearn.tree import DecisionTreeClassifier\n",
|
||
|
"from sklearn.ensemble import BaggingClassifier"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"iris = load_iris()\n",
|
||
|
"X, y = iris.data[:, [0,2]], iris.target\n",
|
||
|
"\n",
|
||
|
"model1 = DecisionTreeClassifier(max_depth =10, random_state=0).fit(X, y)\n",
|
||
|
"model2 = BaggingClassifier(DecisionTreeClassifier(max_depth=4), n_estimators=50, random_state=0).fit(X, y)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD5S0lEQVR4nOzdd1hUR9vA4d/ZpXcQELDQxAYq9t5770ajRpOYnvglMT1v3ugbE9NjqjFNjcbEqInR2HvvHUEpCiiCCEgvy+7O98cBZKliRZz7urwMZ2f3zK7h2eecmXlGEUIIJEmSJEmSpPue5l53QJIkSZIkSbo9ZGInSZIkSZJUQ8jETpIkSZIkqYaQiZ0kSZIkSVINIRM7SZIkSZKkGkImdpIkSZIkSTWETOwkSZIkSZJqCJnYSZIkSZIk1RAysZMkSZIkSaohZGIn3RbR0dEoisLChQur9LwePXrQo0ePO9InSZLuHz4+PkydOvVed6NCer2e1157jXr16qHRaBgxYkSVX+NOv8+bef2ZM2eiKMqd6ZB018nEroZYuHAhiqIU/bGyssLLy4v+/fvz1VdfkZGRca+7WK2EhoYyc+ZMoqOj73VXJOmOKRkXFEXB3d2dnj17sn79+nvdvfvOL7/8wieffMKYMWNYtGgRL7300r3u0n0tOzubmTNnsmPHjnvdlRrF7F53QLq9/ve//+Hr60t+fj4JCQns2LGDF198kc8//5zVq1fTvHnzO3Jeb29vcnJyMDc3r9LzNm3adEf6U5nQ0FBmzZpFjx498PHxuSd9kKS7pTAuCCG4cuUKCxcuZNCgQaxZs4YhQ4bc6+4BcO7cOTSa6n2vYdu2bdSpU4cvvvjiXnelXDfzOf7nP//hjTfeuEM9Kl92djazZs0CkCM3t5FM7GqYgQMH0qZNm6Kf33zzTbZt28aQIUMYNmwYYWFhWFtb3/bzFt4lrCoLC4vb3pfbTQhBbm7uHfncJOluKBkXHn/8cWrXrs3vv/9ebRI7S0vLe92FSiUmJuLk5HSvu1Ghm/kczczMMDOr/ulAVlYWtra297ob1V71vjySbotevXrxzjvvEBMTw5IlS0weO3v2LGPGjMHFxQUrKyvatGnD6tWrS71GamoqL730Ej4+PlhaWlK3bl0eeeQRkpKSgLLn2CUkJPDoo49St25dLC0t8fT0ZPjw4SbDn2XNsUtMTCz64rGysqJFixYsWrTIpE3h+T799FN++OEH/P39sbS0pG3bthw+fLjCz2PhwoWMHTsWgJ49exYNURUOB/j4+DBkyBA2btxImzZtsLa2Zv78+UWfw4svvki9evWwtLSkQYMGfPTRRxiNRpNzGI1G5s6dS2BgIFZWVtSuXZunnnqKa9euVdg3SbobnJycsLa2LvVl/umnn9KpUydq1aqFtbU1rVu3ZsWKFaWen5OTw/Tp03F1dcXe3p5hw4YRFxeHoijMnDnTpO2OHTto06YNVlZW+Pv7M3/+/DLndJWcG1Y4jLx3715efvll3NzcsLW1ZeTIkVy9etXkuUajkZkzZ+Ll5YWNjQ09e/YkNDT0huebZWVlMWPGjKLf60aNGvHpp58ihACux5vt27dz5syZUjGjLEIIZs+eTd26dYv6dObMmTLbViWufPnllzRr1gwrKyvc3NwYMGAAR44cKfdzzM/PZ9asWQQEBGBlZUWtWrXo0qULmzdvLmpT1r+HXq/nvffeK4qtPj4+vPXWW+Tl5Zm0K4yXe/bsoV27dlhZWeHn58evv/5a4WceHR2Nm5sbALNmzSr6TAv//5k6dSp2dnZERUUxaNAg7O3tmThxYtHncKPxdf369XTt2hVbW1vs7e0ZPHhwqX+HG/muup9U/xRdui0mT57MW2+9xaZNm3jiiScAOHPmDJ07d6ZOnTq88cYb2Nra8ueffzJixAhWrlzJyJEjAcjMzKRr166EhYXx2GOP0apVK5KSkli9ejWXLl3C1dW1zHOOHj2aM2fO8MILL+Dj40NiYiKbN28mNja23OHPnJwcevToQWRkJM8//zy+vr4sX76cqVOnkpqayv/93/+ZtF+6dCkZGRk89dRTKIrCxx9/zKhRozh//ny5w8LdunVj+vTpfPXVV7z11ls0adIEoOhvUIczJkyYwFNPPcUTTzxBo0aNyM7Opnv37sTFxfHUU09Rv3599u3bx5tvvkl8fDxz584tev5TTz3FwoULefTRR5k+fToXLlzgm2++4fjx4+zdu7fKQ9aSdCvS0tJISkpCCEFiYiJff/01mZmZTJo0yaTdl19+ybBhw5g4cSI6nY4//viDsWPH8u+//zJ48OCidlOnTuXPP/9k8uTJdOjQgZ07d5o8Xuj48eMMGDAAT09PZs2ahcFg4H//+1/RF/qNeOGFF3B2dubdd98lOjqauXPn8vzzz7Ns2bKiNm+++SYff/wxQ4cOpX///pw8eZL+/fuTm5tb6esLIRg2bBjbt2/n8ccfJzg4mI0bN/Lqq68SFxfHF198gZubG4sXL+b9998nMzOTOXPmAKYxo6T//ve/zJ49m0GDBjFo0CCOHTtGv3790Ol0Ju2qElcef/xxFi5cyMCBA5k2bRp6vZ7du3dz4MABkzuyxc2cOZM5c+Ywbdo02rVrR3p6OkeOHOHYsWP07du33P5PmzaNRYsWMWbMGGbMmMHBgweZM2cOYWFh/P333yZtIyMjGTNmDI8//jhTpkzhl19+YerUqbRu3ZrAwMAyX9/NzY158+bxzDPPMHLkSEaNGgVgMl1Ir9fTv39/unTpwqeffoqNjQ1w4/F18eLFTJkyhf79+/PRRx+RnZ3NvHnz6NKlC8ePHy/6HrqZ76pqTUg1woIFCwQgDh8+XG4bR0dH0bJly6Kfe/fuLZo1ayZyc3OLjhmNRtGpUycREBBQdOy///2vAMRff/1V6jWNRqMQQogLFy4IQCxYsEAIIcS1a9cEID755JMK+929e3fRvXv3op/nzp0rALFkyZKiYzqdTnTs2FHY2dmJ9PR0k/PVqlVLpKSkFLX9559/BCDWrFlT4XmXL18uALF9+/ZSj3l7ewtAbNiwweT4e++9J2xtbUV4eLjJ8TfeeENotVoRGxsrhBBi9+7dAhC//fabSbsNGzaUeVyS7pTCuFDyj6WlpVi4cGGp9tnZ2SY/63Q6ERQUJHr16lV07OjRowIQL774oknbqVOnCkC8++67RceGDh0qbGxsRFxcXNGxiIgIYWZmJkp+/Xh7e4spU6aU6nufPn2K4owQQrz00ktCq9WK1NRUIYQQCQkJwszMTIwYMcLk9WbOnCkAk9csy6pVqwQgZs+ebXJ8zJgxQlEUERkZWXSse/fuIjAwsMLXE0KIxMREYWFhIQYPHmzS97feeqtUn240rmzbtk0AYvr06aXOV/wcJT/HFi1aiMGDB1fY33fffdfk3+PEiRMCENOmTTNp98orrwhAbNu2zeR8gNi1a5fJ+7e0tBQzZsyo8LxXr14t9f9MoSlTpghAvPHGGybHbzS+ZmRkCCcnJ/HEE0+YtEtISBCOjo5Fx2/0u+p+IodiHyB2dnZFq2NTUlLYtm0b48aNIyMjg6SkJJKSkkhOTqZ///5EREQQFxcHwMqVK2nRokXRHbziylsib21tjYWFBTt27KjS8OO6devw8PBgwoQJRcfMzc2ZPn06mZmZ7Ny506T9Qw89hLOzc9HPXbt2BeD8+fM3fM6y+Pr60r9/f5Njy5cvp2vXrjg7Oxd9XklJSfTp0weDwcCuXbuK2jk6OtK3b1+Tdq1bt8bOzo7t27ffUt8kqaq+/fZbNm/ezObNm1myZAk9e/Zk2rRp/PXXXybtis8jvXbtGmlpaXTt2pVjx44VHd+wYQMAzz77rMlzX3jhBZOfDQYDW7ZsYcSIEXh5eRUdb9CgAQMHDrzhvj/55JMmcaZr164YDAZiYmIA2Lp1K3q9vtL+lGfdunVotVqmT59ucnzGjBkIIW5q9fCWLVvQ6XS88MILJn1/8cUXS7W90biycuVKFEXh3XffLfUaFZUqcXJy4syZM0RERNxw/9etWwfAyy+/bHJ8xowZAKxdu9bkeNOmTYt
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 2 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"x_min, x_max = X[:,0].min() - 1, X[:,0].max() + 1\n",
|
||
|
"y_min, y_max = X[:,1].min() - 1, X[:,1].max() + 1\n",
|
||
|
"xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))\n",
|
||
|
"\n",
|
||
|
"plt.subplot(121)\n",
|
||
|
"Z1 = model1.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)\n",
|
||
|
"plt.contourf(xx, yy, Z1, alpha=0.6, cmap=mpl.cm.jet)\n",
|
||
|
"plt.scatter(X[:,0], X[:,1], c=y, alpha=1, s=50, cmap=mpl.cm.jet, edgecolors=\"k\")\n",
|
||
|
"plt.title(\"Decision tree\")\n",
|
||
|
"plt.subplot(122)\n",
|
||
|
"\n",
|
||
|
"Z2 = model2.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)\n",
|
||
|
"plt.contourf(xx, yy, Z2, alpha=0.6, cmap=mpl.cm.jet)\n",
|
||
|
"plt.scatter(X[:,0], X[:,1],c=y,alpha=1,s=50,cmap=mpl.cm.jet,edgecolors=\"k\")\n",
|
||
|
"plt.title(\"Bagging of decision trees\")\n",
|
||
|
"plt.tight_layout()\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### [실습] 랜덤 포리스트 (random forest)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"from sklearn import datasets\n",
|
||
|
"from sklearn.model_selection import train_test_split\n",
|
||
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
||
|
"from sklearn import metrics"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Class names : ['setosa' 'versicolor' 'virginica']\n",
|
||
|
"target : [0:setosa, 1:versicolor, 2:virginical]\n",
|
||
|
"No. of Data : 150\n",
|
||
|
"Featrue names : ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n",
|
||
|
" sepal length sepal width petal length petal width species\n",
|
||
|
"0 5.1 3.5 1.4 0.2 0\n",
|
||
|
"1 4.9 3.0 1.4 0.2 0\n",
|
||
|
"2 4.7 3.2 1.3 0.2 0\n",
|
||
|
"3 4.6 3.1 1.5 0.2 0\n",
|
||
|
"4 5.0 3.6 1.4 0.2 0\n",
|
||
|
"No. of traing data: 105\n",
|
||
|
"No. of test data: 45\n",
|
||
|
"Accuracy : 0.9333333333333333\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"iris = datasets.load_iris()\n",
|
||
|
"print('Class names :', iris.target_names)\n",
|
||
|
"print('target : [0:setosa, 1:versicolor, 2:virginical]')\n",
|
||
|
"print('No. of Data :', len(iris.data))\n",
|
||
|
"print('Featrue names :', iris.feature_names)\n",
|
||
|
"\n",
|
||
|
"data = pd.DataFrame({\n",
|
||
|
" 'sepal length': iris.data[:,0], 'sepal width': iris.data[:,1], 'petal length': iris.data[:,2],\n",
|
||
|
" 'petal width':iris.data[:,3], 'species':iris.target\n",
|
||
|
"})\n",
|
||
|
"print(data.head()) # 일부 데이터 출력\n",
|
||
|
"\n",
|
||
|
"x = data[['sepal length', 'sepal width', 'petal length', 'petal width']] # 입력\n",
|
||
|
"y = data['species'] # 출력\n",
|
||
|
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3) # 테스트 데이터 30%\n",
|
||
|
"print('No. of traing data: ', len(x_train))\n",
|
||
|
"print('No. of test data:', len(y_test))\n",
|
||
|
"\n",
|
||
|
"forest = RandomForestClassifier(n_estimators=100) # 모델 생성\n",
|
||
|
"forest.fit(x_train, y_train)\n",
|
||
|
"\n",
|
||
|
"y_pred = forest.predict(x_test) # 추론 (예측)\n",
|
||
|
"print('Accuracy :', metrics.accuracy_score(y_test, y_pred))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### [실습] 배깅 회귀 (Bagging Regression)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"from sklearn.datasets import load_boston # scikit-leanr < 1.2\n",
|
||
|
"# from sklearn.datasets import fetch_california_housing # replace dataset\n",
|
||
|
"from sklearn.metrics import mean_squared_error\n",
|
||
|
"from sklearn.model_selection import train_test_split\n",
|
||
|
"from sklearn.ensemble import BaggingRegressor\n",
|
||
|
"from sklearn.tree import DecisionTreeRegressor\n",
|
||
|
"import matplotlib.pyplot as plt\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \n",
|
||
|
"0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \\\n",
|
||
|
"1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n",
|
||
|
"2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n",
|
||
|
"3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n",
|
||
|
"4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n",
|
||
|
"\n",
|
||
|
" PTRATIO B LSTAT PRICE \n",
|
||
|
"0 15.3 396.90 4.98 24.0 \n",
|
||
|
"1 17.8 396.90 9.14 21.6 \n",
|
||
|
"2 17.8 392.83 4.03 34.7 \n",
|
||
|
"3 18.7 394.63 2.94 33.4 \n",
|
||
|
"4 18.7 396.90 5.33 36.2 \n",
|
||
|
"RMSE: 4.594919\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\sklearn\\utils\\deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.\n",
|
||
|
"\n",
|
||
|
" The Boston housing prices dataset has an ethical problem. You can refer to\n",
|
||
|
" the documentation of this function for further details.\n",
|
||
|
"\n",
|
||
|
" The scikit-learn maintainers therefore strongly discourage the use of this\n",
|
||
|
" dataset unless the purpose of the code is to study and educate about\n",
|
||
|
" ethical issues in data science and machine learning.\n",
|
||
|
"\n",
|
||
|
" In this special case, you can fetch the dataset from the original\n",
|
||
|
" source::\n",
|
||
|
"\n",
|
||
|
" import pandas as pd\n",
|
||
|
" import numpy as np\n",
|
||
|
"\n",
|
||
|
" data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n",
|
||
|
" raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n",
|
||
|
" data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n",
|
||
|
" target = raw_df.values[1::2, 2]\n",
|
||
|
"\n",
|
||
|
" Alternative datasets include the California housing dataset (i.e.\n",
|
||
|
" :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n",
|
||
|
" dataset. You can load the datasets as follows::\n",
|
||
|
"\n",
|
||
|
" from sklearn.datasets import fetch_california_housing\n",
|
||
|
" housing = fetch_california_housing()\n",
|
||
|
"\n",
|
||
|
" for the California housing dataset and::\n",
|
||
|
"\n",
|
||
|
" from sklearn.datasets import fetch_openml\n",
|
||
|
" housing = fetch_openml(name=\"house_prices\", as_frame=True)\n",
|
||
|
"\n",
|
||
|
" for the Ames housing dataset.\n",
|
||
|
" warnings.warn(msg, category=FutureWarning)\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"boston = load_boston() # < 1.2\n",
|
||
|
"data = pd.DataFrame(boston.data)\n",
|
||
|
"data.columns = boston.feature_names\n",
|
||
|
"data['PRICE'] = boston.target\n",
|
||
|
"print(data.head())\n",
|
||
|
"\n",
|
||
|
"# replace dataset\n",
|
||
|
"# california = fetch_california_housing()\n",
|
||
|
"# data = pd.DataFrame(california.data)\n",
|
||
|
"# data.columns = california.feature_names\n",
|
||
|
"# data['PRICE'] = california.target\n",
|
||
|
"# print(data.head())\n",
|
||
|
"\n",
|
||
|
"X, y = data.iloc[:,:-1],data.iloc[:,-1]\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)\n",
|
||
|
"bag = BaggingRegressor(base_estimator = DecisionTreeRegressor( ), n_estimators = 10,\n",
|
||
|
"max_features=1.0, bootstrap_features=False, random_state=0)\n",
|
||
|
"bag.fit(X_train,y_train)\n",
|
||
|
"preds = bag.predict(X_test)\n",
|
||
|
"rmse = np.sqrt(mean_squared_error(y_test, preds))\n",
|
||
|
"print(\"RMSE: %f\" % (rmse))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### [실습] AdaBoost - 회귀"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"from sklearn.tree import DecisionTreeRegressor\n",
|
||
|
"from sklearn.ensemble import AdaBoostRegressor"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAADAIUlEQVR4nOydd3gU1d6A382mdwIhCSQkCKFD6E0jiaAURTAgCCiCBb2KgGK9VwThu1dFpdguolfxKkWEgF4FFTARRAREAkiTklCTUAIJgdTNfH/MzuxsspsCmy3JeZ9nn5w5c2bO2c3uzG9+VSdJkoRAIBAIBAJBPcTN0QsQCAQCgUAgcBRCEBIIBAKBQFBvEYKQQCAQCASCeosQhAQCgUAgENRbhCAkEAgEAoGg3iIEIYFAIBAIBPUWIQgJBAKBQCCotwhBSCAQCAQCQb1FCEICgUAgEAjqLUIQEgjqCUuWLEGn05GRkeHopQichAkTJhATE+PoZQgEDkUIQgKBi/HBBx+g0+no1auX3eacMGECOp1Ofbm7uxMVFcV9993HgQMH7LYOaxw4cIBZs2ZVW8ibNWuW2fvx8PAgJiaGKVOmcPny5Vpdq0AgcC7cHb0AgUBQM5YuXUpMTAw7duzg6NGjtGzZ0i7zenl58fHHHwNQWlrKsWPHWLRoEd9//z0HDhygSZMmdlmHJQ4cOMCrr75KQkJCjTQc//73v/H39+fq1ats2rSJd999lz/++INffvml9hbrRHz00UeUlZU5ehkCgUMRgpBA4EKkp6fz66+/kpyczGOPPcbSpUuZOXOmXeZ2d3fn/vvvN+vr3bs3d911F9999x2PPvqoXdZhS0aOHEmjRo0AeOyxx7jvvvv48ssv2bFjBz179rTbOsrKyiguLsbb29tucwJ4eHjYdT6BwBkRpjGBwIVYunQpDRo04M4772TkyJEsXbrU4rj9+/dz22234ePjQ2RkJP/3f/9n8cn/66+/5s4776RJkyZ4eXnRokUL5syZg8FgqNZ6wsPDAVlI0nL8+HHuvfdeQkJC8PX1pXfv3nz33XcVjj937hwPP/wwYWFheHt7ExcXx2effVZh3IoVK+jWrRsBAQEEBgbSsWNHFi5cCMi+T/feey8AiYmJqrkrNTW1Wu9BS3x8PADHjh0z69++fTuDBg0iKCgIX19f+vXrx9atWyscn5qaSvfu3fH29qZFixZ8+OGHqhlOi06nY/LkySxdupT27dvj5eXF999/D8CZM2d46KGHCAsLw8vLi/bt2/PJJ59UmOvdd9+lffv2+Pr60qBBA7p3786yZcvU/VeuXGHatGnExMTg5eVF48aNuf322/njjz/UMZZ8hK5evcr06dOJiorCy8uL1q1b89ZbbyFJksX3sHbtWjp06KCuVXkfAoGrIDRCAoELsXTpUpKSkvD09GTMmDH8+9//ZufOnfTo0UMdk5WVRWJiIqWlpbz44ov4+fmxePFifHx8KpxvyZIl+Pv788wzz+Dv789PP/3EK6+8Ql5eHm+++WaF8RcuXADAYDBw/PhxXnjhBRo2bMhdd92ljsnOzqZv375cu3aNKVOm0LBhQz777DPuvvtuVq1axT333ANAQUEBCQkJHD16lMmTJ9O8eXO++uorJkyYwOXLl5k6dSoAGzZsYMyYMfTv35833ngDgIMHD7J161amTp3KrbfeypQpU3jnnXf4+9//Ttu2bQHUvzVB8TFq0KCB2vfTTz8xePBgunXrxsyZM3Fzc+PTTz/ltttuY8uWLarmaPfu3QwaNIiIiAheffVVDAYDs2fPJjQ01OJcP/30EytXrmTy5Mk0atSImJgYsrOz6d27typkhIaGsn79eh5++GHy8vKYNm0aIJu0pkyZwsiRI5k6dSqFhYXs3buX7du3M3bsWAAef/xxVq1axeTJk2nXrh0XL17kl19+4eDBg3Tt2tXimiRJ4u677yYlJYWHH36Yzp0788MPP/Dcc89x5swZ5s+fbzb+l19+ITk5mSeeeIKAgADeeecdRowYwcmTJ2nYsGGNP3+BwCFIAoHAJfj9998lQNqwYYMkSZJUVlYmRUZGSlOnTjUbN23aNAmQtm/frvadO3dOCgoKkgApPT1d7b927VqFeR577DHJ19dXKiwsVPsefPBBCajwatq0qbRr1y6L82/ZskXtu3LlitS8eXMpJiZGMhgMkiRJ0oIFCyRA+uKLL9RxxcXFUp8+fSR/f38pLy9PkiRJmjp1qhQYGCiVlpZa/Wy++uorCZBSUlKsjtEyc+ZMCZAOHz4snT9/XsrIyJA++eQTycfHRwoNDZWuXr0qSZL8GcfGxkoDBw6UysrKzD635s2bS7fffrvaN3ToUMnX11c6c+aM2nfkyBHJ3d1dKn+pBSQ3Nzdp//79Zv0PP/ywFBERIV24cMGs/7777pOCgoLU/9ewYcOk9u3bV/oeg4KCpCeffLLSMQ8++KAUHR2tbq9du1YCpP/7v/8zGzdy5EhJp9NJR48eNXsPnp6eZn179uyRAOndd9+tdF6BwJkQpjGBwEVYunQpYWFhJCYmArJpYvTo0axYscLMlLVu3Tp69+5t5uMSGhrKuHHjKpxTqyW6cuUKFy5cID4+nmvXrnHo0CGzsd7e3mzYsIENGzbwww8/8OGHH+Lv78+QIUP466+/zObv2bMnt9xyi9rn7+/PpEmTyMjIUKPM1q1bR3h4OGPGjFHHeXh4MGXKFPLz8/n5558BCA4O5urVq2zYsOG6PrfKaN26NaGhocTExPDQQw/RsmVL1q9fj6+vLwBpaWkcOXKEsWPHcvHiRS5cuMCFCxe4evUq/fv3Z/PmzZSVlWEwGNi4cSPDhw83cxpv2bIlgwcPtjh3v379aNeunbotSRKrV69m6NChSJKkznXhwgUGDhxIbm6uatYKDg7m9OnT7Ny50+p7Cw4OZvv27Zw9e7ban8e6devQ6/VMmTLFrH/69OlIksT69evN+gcMGECLFi3U7U6dOhEYGMjx48erPadA4GiEaUwgcAEMBgMrVqwgMTGR9PR0tb9Xr168/fbbbNq0iTvuuAOAEydOWAytb926dYW+/fv38/LLL/PTTz+Rl5dnti83N9dsW6/XM2DAALO+IUOGEBsby0svvcTq1asrnV8xVZ04cYIOHTpw4sQJYmNjcXNzszoO4IknnmDlypUMHjyYpk2bcscddzBq1CgGDRpk4ZOqGatXryYwMJDz58/zzjvvkJ6ebiYcHjlyBIAHH3zQ6jlyc3MpLCykoKDAYgSftai+5s2bm22fP3+ey5cvs3jxYhYvXmzxmHPnzgHwwgsvsHHjRnr27EnLli254447GDt2LDfffLM6du7cuTz44INERUXRrVs3hgwZwvjx47npppusvpcTJ07QpEkTAgICzPrL/08UmjVrVuEcDRo04NKlS1bnEAicDSEICQQuwE8//URmZiYrVqxgxYoVFfYvXbpUFYSqy+XLl+nXrx+BgYHMnj2bFi1a4O3tzR9//MELL7xQrbDqyMhIWrduzebNm2s0d01o3LgxaWlp/PDDD6xfv57169fz6aefMn78eIuO1TXh1ltvVaPGhg4dSseOHRk3bhy7du3Czc1N/QzefPNNOnfubPEc/v7+FBYW1nju8j5bylz333+/VcGrU6dOgCyYHD58mG+//Zbvv/+e1atX88EHH/DKK6/w6quvAjBq1Cji4+NZs2YNP/74I2+++SZvvPEGycnJVrVUNUWv11vsl8o5VgsEzowQhAQCF2Dp0qU0btyY999/v8K+5ORk1qxZw6JFi/Dx8SE6OlrVZGg5fPiw2XZqaioXL14kOTmZW2+9Ve3XapyqQ2lpKfn5+ep2dHR0hbkA1dQWHR2t/t27dy9lZWVmWqHy4wA8PT0ZOnQoQ4cOpaysjCeeeIIPP/yQGTNm0LJlywpRWdeDv78/M2fOZOLEiaxcuZL77rtPNfsEBgZW0IZpady4Md7e3hw9erTCPkt9lggNDSUgIACDwVDpXAp+fn6MHj2a0aNHU1xcTFJSEv/85z956aWX1DD8iIgInnjiCZ544gnOnTtH165d+ec//2lVEIq
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"rng = np.random.RandomState(1)\n",
|
||
|
"X = np.linspace(0, 6, 100)[:, np.newaxis]\n",
|
||
|
"y = np.sin(X).ravel() + np.sin(6*X).ravel() + rng.normal(0, 0.1, X.shape[0])\n",
|
||
|
"\n",
|
||
|
"regr_1 = DecisionTreeRegressor(max_depth=4)\n",
|
||
|
"regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=100, random_state=rng)\n",
|
||
|
"\n",
|
||
|
"regr_1.fit(X, y)\n",
|
||
|
"regr_2.fit(X, y)\n",
|
||
|
"y_1 = regr_1.predict(X)\n",
|
||
|
"y_2 = regr_2.predict(X)\n",
|
||
|
"\n",
|
||
|
"plt.figure()\n",
|
||
|
"plt.scatter(X, y, c=\"k\", label=\"training samples\")\n",
|
||
|
"plt.plot(X, y_1, c=\"g\", label=\"n_estimators=1\", linewidth=2)\n",
|
||
|
"plt.plot(X, y_2, c=\"r\", label=\"n_estimators=100\", linewidth=2)\n",
|
||
|
"plt.xlabel(\"data\")\n",
|
||
|
"plt.ylabel(\"target\")\n",
|
||
|
"plt.title(\"AdaBoost Regression\")\n",
|
||
|
"plt.legend()\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### [실습] Gradient Boosting 기반 회귀"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"from sklearn import datasets\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"from sklearn.model_selection import train_test_split\n",
|
||
|
"from sklearn.metrics import mean_squared_error\n",
|
||
|
"from sklearn import ensemble\n",
|
||
|
"from sklearn.metrics import mean_squared_error, r2_score\n",
|
||
|
"from sklearn.model_selection import cross_val_predict"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"(506, 13) (506,)\n",
|
||
|
"['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'\n",
|
||
|
" 'B' 'LSTAT']\n",
|
||
|
" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \n",
|
||
|
"0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \\\n",
|
||
|
"1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n",
|
||
|
"2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n",
|
||
|
"3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n",
|
||
|
"4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n",
|
||
|
"\n",
|
||
|
" PTRATIO B LSTAT MEDV \n",
|
||
|
"0 15.3 396.90 4.98 24.0 \n",
|
||
|
"1 17.8 396.90 9.14 21.6 \n",
|
||
|
"2 17.8 392.83 4.03 34.7 \n",
|
||
|
"3 18.7 394.63 2.94 33.4 \n",
|
||
|
"4 18.7 396.90 5.33 36.2 \n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\sklearn\\utils\\deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.\n",
|
||
|
"\n",
|
||
|
" The Boston housing prices dataset has an ethical problem. You can refer to\n",
|
||
|
" the documentation of this function for further details.\n",
|
||
|
"\n",
|
||
|
" The scikit-learn maintainers therefore strongly discourage the use of this\n",
|
||
|
" dataset unless the purpose of the code is to study and educate about\n",
|
||
|
" ethical issues in data science and machine learning.\n",
|
||
|
"\n",
|
||
|
" In this special case, you can fetch the dataset from the original\n",
|
||
|
" source::\n",
|
||
|
"\n",
|
||
|
" import pandas as pd\n",
|
||
|
" import numpy as np\n",
|
||
|
"\n",
|
||
|
" data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n",
|
||
|
" raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n",
|
||
|
" data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n",
|
||
|
" target = raw_df.values[1::2, 2]\n",
|
||
|
"\n",
|
||
|
" Alternative datasets include the California housing dataset (i.e.\n",
|
||
|
" :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n",
|
||
|
" dataset. You can load the datasets as follows::\n",
|
||
|
"\n",
|
||
|
" from sklearn.datasets import fetch_california_housing\n",
|
||
|
" housing = fetch_california_housing()\n",
|
||
|
"\n",
|
||
|
" for the California housing dataset and::\n",
|
||
|
"\n",
|
||
|
" from sklearn.datasets import fetch_openml\n",
|
||
|
" housing = fetch_openml(name=\"house_prices\", as_frame=True)\n",
|
||
|
"\n",
|
||
|
" for the Ames housing dataset.\n",
|
||
|
" warnings.warn(msg, category=FutureWarning)\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"boston = datasets.load_boston() # Boston 집값 데이터, 13개 속성, 마지막 중간값 정보\n",
|
||
|
"print(boston.data.shape, boston.target.shape)\n",
|
||
|
"print(boston.feature_names)\n",
|
||
|
"\n",
|
||
|
"data = pd.DataFrame(boston.data, columns=boston.feature_names)\n",
|
||
|
"data = pd.concat([data, pd.Series(boston.target, name='MEDV')], axis=1)\n",
|
||
|
"print(data.head())\n",
|
||
|
"X = data.iloc[:,:-1]\n",
|
||
|
"y = data.iloc[:,-1]\n",
|
||
|
"x_training_set, x_test_set, y_training_set, y_test_set = train_test_split(X, y, test_size=0.10, random_state=42, shuffle=True)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\sklearn\\ensemble\\_gb.py:294: FutureWarning: The loss 'ls' was deprecated in v1.0 and will be removed in version 1.2. Use 'squared_error' which is equivalent.\n",
|
||
|
" warnings.warn(\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"R2 sq: 0.9800347273281852\n",
|
||
|
"Mean squared error: 5.88\n",
|
||
|
"Test Variance score: 0.91\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAHHCAYAAACle7JuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABlDElEQVR4nO3dd1yVZf8H8M9hI1OGgAooOHDvLYpimisVCFDJnWk4wPEz03Jkae7MWZlaKjFEe8pKy0RxgApiZS5IEWU7GCLrcP/+ME4eOYd5OAM+79eL1/Nw39e5zxdu7Xy87muIBEEQQERERKSBtFRdABEREVF1McgQERGRxmKQISIiIo3FIENEREQai0GGiIiINBaDDBEREWksBhkiIiLSWAwyREREpLEYZIiIiEhjMcgQ1VMikQgrV65UdRnlmjJlCoyNjVVdhtrav38/RCIR7t27Jznm5uYGNzc3ldX0Klk1EikSgwxROe7evYs5c+agVatWaNCgARo0aIC2bdvC398ff/zxh6rLq1Vubm4QiUQVftU0DOXl5WHlypWIiIhQSN3K1KxZM6nfRaNGjeDq6oqjR4+qurQq0eR7QKSj6gKI1NWPP/4IHx8f6OjoYOLEiejUqRO0tLRw8+ZNhIeHY9euXbh79y4cHR1VXWqtWLZsGWbMmCH5/vLly9i2bRvef/99tGnTRnK8Y8eONXqfvLw8rFq1CgDUqiehsjp37oyFCxcCAJKTk7Fnzx54eHhg165dmDVrltLrOXnyZJVfo+n3gOo3BhkiGRISEuDr6wtHR0ecOnUKdnZ2Uuc//fRT7Ny5E1pa5XdqPnv2DEZGRrVZaq157bXXpL43MDDAtm3b8Nprr5X7YafJP3N1NGnSBH5+fpLvJ02ahBYtWmDLli1yg0xxcTFKSkqgp6en8Hpq45pE6oyPlohkWL9+PZ49e4Z9+/aVCTEAoKOjg3nz5sHe3l5yrHQ8R0JCAkaMGAETExNMnDgRwIsP94ULF8Le3h76+vpo3bo1Nm7ciJc3n7937x5EIhH2799f5v1efYSzcuVKiEQixMfHY8qUKTA3N4eZmRmmTp2KvLw8qdcWFBQgMDAQ1tbWMDExwRtvvIEHDx7U8DckXcfff/+NCRMmoGHDhujfvz8A+WM1pkyZgmbNmkl+ZmtrawDAqlWr5D6uevjwIcaOHQtjY2NYW1tj0aJFEIvF5dY2atQoODk5yTzXp08fdO/eXfL9r7/+iv79+8Pc3BzGxsZo3bo13n///Ur+FqTZ2tqiTZs2uHv3ruRnFIlE2LhxI7Zu3QpnZ2fo6+vj77//BgDcvHkTXl5esLCwgIGBAbp3747//e9/Za57/fp1DB48GIaGhmjatCnWrFmDkpKSMu1k/d7z8/OxcuVKtGrVCgYGBrCzs4OHhwcSEhIqdQ8UXSORIrFHhkiGH3/8ES1atECvXr2q9Lri4mIMGzYM/fv3x8aNG9GgQQMIgoA33ngDp0+fxvTp09G5c2ecOHECixcvxsOHD7Fly5Zq1+nt7Y3mzZtj7dq1iI2NxVdffYVGjRrh008/lbSZMWMGDh48iAkTJqBv3774/fffMXLkyGq/pyxvvvkmWrZsiU8++UQqnFXE2toau3btwuzZszFu3Dh4eHgAkH5cJRaLMWzYMPTq1QsbN27Eb7/9hk2bNsHZ2RmzZ8+We20fHx9MmjQJly9fRo8ePSTHExMTERUVhQ0bNgB48eE7atQodOzYEatXr4a+vj7i4+Nx/vz5qv4aAABFRUVISkqCpaWl1PF9+/YhPz8fM2fOhL6+PiwsLHD9+nX069cPTZo0wXvvvQcjIyOEhIRg7NixOHLkCMaNGwcASE1NxaBBg1BcXCxp98UXX8DQ0LDCesRiMUaNGoVTp07B19cX8+fPR05ODn799Vf89ddfGDJkSLn3QBk1EtWIQERSsrKyBADC2LFjy5x78uSJkJGRIfnKy8uTnJs8ebIAQHjvvfekXnPs2DEBgLBmzRqp415eXoJIJBLi4+MFQRCEu3fvCgCEffv2lXlfAMKKFSsk369YsUIAIEybNk2q3bhx4wRLS0vJ93FxcQIA4d1335VqN2HChDLXrEhoaKgAQDh9+nSZOsaPH1+m/cCBA4WBAweWOT558mTB0dFR8n1GRobcWkp/p6tXr5Y63qVLF6Fbt27l1puVlSXo6+sLCxculDq+fv16QSQSCYmJiYIgCMKWLVsEAEJGRka515PF0dFRGDp0qOTPw7Vr1wRfX18BgDB37lxBEP67r6ampkJ6errU693d3YUOHToI+fn5kmMlJSVC3759hZYtW0qOBQQECACE6OhoybH09HTBzMxMACDcvXtXcvzV3/vXX38tABA2b95cpv6SkhJBEMq/B7VRI5Ei8dES0Suys7MBQOa0Xzc3N1hbW0u+duzYUabNq70EP/30E7S1tTFv3jyp4wsXLoQgCPj555+rXeurYzBcXV3x6NEjyc/w008/AUCZ9w4ICKj2e1amDkWT9XP+888/5b7G1NQUw4cPR0hIiFQvUXBwMHr37g0HBwcAgLm5OQDg+++/r9ZjkJMnT0r+PHTq1AmhoaF46623pHrFAMDT01PyCAcAHj9+jN9//x3e3t7IyclBZmYmMjMz8ejRIwwbNgx37tzBw4cPAby4j71790bPnj0lr7e2tpY8uizPkSNHYGVlhblz55Y5JxKJyn2tsmokqgkGGaJXmJiYAAByc3PLnNuzZw9+/fVXHDx4UOZrdXR00LRpU6ljiYmJaNy4seS6pUpn/iQmJla71tIP41INGzYEADx58kRybS0tLTg7O0u1a926dbXfU5bmzZsr9HovMzAwkAoAwIufs/RnLI+Pjw+SkpJw8eJFAC8GccfExMDHx0eqTb9+/TBjxgzY2NjA19cXISEhlQ41vXr1wq+//orffvsNFy5cQGZmJr755psyj1Re/R3Fx8dDEAR88MEHUuHY2toaK1asAACkp6cDeHEfW7ZsWea9K3MfExIS0Lp1a+joVH0kgbJqJKoJjpEheoWZmRns7Ozw119/lTlXOmZG3uJe+vr6Fc5kkkfev47LG9Sqra0t87hQhXEqiiBrHIRIJJJZR0WDdF8l72esjNGjR6NBgwYICQlB3759ERISAi0tLbz55puSNoaGhjh79ixOnz6N48eP45dffkFwcDAGDx6MkydPVvj+VlZWGDJkSIW1vPo7Kg1KixYtwrBhw2S+pkWLFhVetzZpQo1EDDJEMowcORJfffUVLl26JNVVXh2Ojo747bffkJOTI9Urc/PmTcl54L/elKdPn0q9viY9No6OjigpKZH8q7zUrVu3qn3NymrYsKHMxz+v/jwVPd6oCSMjI4waNQqhoaHYvHkzgoOD4erqisaNG0u109LSgru7O9zd3bF582Z88sknWLZsGU6fPl2pkFIdpTOqdHV1K3wPR0dH3Llzp8zxytxHZ2dnREdHo6ioCLq6ujLbyLsHyqqRqCb4aIlIhv/7v/9DgwYNMG3aNKSlpZU5X5UejxEjRkAsFmP79u1Sx7ds2QKRSIThw4cDeDGmw8rKCmfPnpVqt3Pnzmr8BC+UXnvbtm1Sx7du3Vrta1aWs7Mzbt68iYyMDMmxa9eulZkN1KBBAwBlA5yi+Pj4IDk5GV999RWuXbsm9VgJeDEO5FWdO3cG8GLqem1p1KgR3NzcsGfPHqSkpJQ5//LvbcSIEYiKisKlS5ekzh86dKjC9/H09ERmZmaZP3/Af3+O5d0DZdVIVBPskSGSoWXLljh8+DDGjx+P1q1bS1b2FQQBd+/exeHDh6GlpVVmPIwso0ePxqBBg7Bs2TLcu3cPnTp1wsmTJ/H9998jICBAavzKjBkzsG7dOsyYMQPdu3fH2bNncfv27Wr/HJ07d8b48eOxc+dOZGVloW/fvjh16hTi4+Orfc3KmjZtGjZv3oxhw4Zh+vTpSE9Px+7du9GuXTvJYGTgxSOXtm3bIjg4GK1atYKFhQXat2+P9u3bK6SO0jV9Fi1aBG1tbXh6ekqdX716Nc6ePYuRI0fC0dER6enp2LlzJ5o2bSpZE6e27NixA/3790e
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"params = {'n_estimators':500, 'max_depth':4, 'min_samples_split':2, 'learning_rate':0.01, 'loss':'ls'}\n",
|
||
|
"model = ensemble.GradientBoostingRegressor(**params)\n",
|
||
|
"model.fit(x_training_set, y_training_set)\n",
|
||
|
"model_score = model.score(x_training_set, y_training_set)\n",
|
||
|
"print('R2 sq: ', model_score)\n",
|
||
|
"\n",
|
||
|
"y_predicted = model.predict(x_test_set)\n",
|
||
|
"print('Mean squared error: %.2f'% mean_squared_error(y_test_set, y_predicted))\n",
|
||
|
"print('Test Variance score: %.2f' % r2_score(y_test_set, y_predicted))\n",
|
||
|
"\n",
|
||
|
"fig, ax = plt.subplots()\n",
|
||
|
"ax.scatter(y_test_set, y_predicted, edgecolors=(0,0,0))\n",
|
||
|
"ax.plot([y_test_set.min(), y_test_set.max()], [y_test_set.min(), y_test_set.max()], 'k--', lw=4)\n",
|
||
|
"ax.set_xlabel('Actual')\n",
|
||
|
"ax.set_ylabel('Predicted')\n",
|
||
|
"ax.set_title('Ground Truth vs Predicted')\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### [실습] Gradient Boosting 기반 분류"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 11,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from sklearn.datasets import make_hastie_10_2\n",
|
||
|
"from sklearn.ensemble import GradientBoostingClassifier\n",
|
||
|
"import matplotlib.pyplot as plt"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 14,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"(12000, 10) (12000,)\n",
|
||
|
"[[ 1.76405235 0.40015721 0.97873798 2.2408932 1.86755799 -0.97727788\n",
|
||
|
" 0.95008842 -0.15135721 -0.10321885 0.4105985 ]\n",
|
||
|
" [ 0.14404357 1.45427351 0.76103773 0.12167502 0.44386323 0.33367433\n",
|
||
|
" 1.49407907 -0.20515826 0.3130677 -0.85409574]\n",
|
||
|
" [-2.55298982 0.6536186 0.8644362 -0.74216502 2.26975462 -1.45436567\n",
|
||
|
" 0.04575852 -0.18718385 1.53277921 1.46935877]\n",
|
||
|
" [ 0.15494743 0.37816252 -0.88778575 -1.98079647 -0.34791215 0.15634897\n",
|
||
|
" 1.23029068 1.20237985 -0.38732682 -0.30230275]\n",
|
||
|
" [-1.04855297 -1.42001794 -1.70627019 1.9507754 -0.50965218 -0.4380743\n",
|
||
|
" -1.25279536 0.77749036 -1.61389785 -0.21274028]]\n",
|
||
|
"[ 1. -1. 1. -1. 1.]\n",
|
||
|
"Accuracy score (training): 0.879\n",
|
||
|
"Accuracy score (testing): 0.819\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"X, y = make_hastie_10_2(random_state=0)\n",
|
||
|
"X_train, X_test = X[:2000], X[2000:]\n",
|
||
|
"y_train, y_test = y[:2000], y[2000:]\n",
|
||
|
"print(X.shape, y.shape)\n",
|
||
|
"print(X[0:5,:])\n",
|
||
|
"print(y[0:5])\n",
|
||
|
"\n",
|
||
|
"clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0)\n",
|
||
|
"clf.fit(X_train, y_train)\n",
|
||
|
"print('Accuracy score (training): {0:.3f}'.format(clf.score(X_train, y_train)))\n",
|
||
|
"print('Accuracy score (testing): {0:.3f}'.format(clf.score(X_test, y_test)))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### [실습] XGBoosting 기반 회귀"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 15,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"from sklearn.datasets import load_boston\n",
|
||
|
"from sklearn.metrics import mean_squared_error\n",
|
||
|
"from sklearn.model_selection import train_test_split\n",
|
||
|
"import xgboost as xgb"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 19,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \n",
|
||
|
"0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \\\n",
|
||
|
"1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n",
|
||
|
"2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n",
|
||
|
"3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n",
|
||
|
"4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n",
|
||
|
"\n",
|
||
|
" PTRATIO B LSTAT PRICE \n",
|
||
|
"0 15.3 396.90 4.98 24.0 \n",
|
||
|
"1 17.8 396.90 9.14 21.6 \n",
|
||
|
"2 17.8 392.83 4.03 34.7 \n",
|
||
|
"3 18.7 394.63 2.94 33.4 \n",
|
||
|
"4 18.7 396.90 5.33 36.2 \n",
|
||
|
"RMSE: 10.423243\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\sklearn\\utils\\deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.\n",
|
||
|
"\n",
|
||
|
" The Boston housing prices dataset has an ethical problem. You can refer to\n",
|
||
|
" the documentation of this function for further details.\n",
|
||
|
"\n",
|
||
|
" The scikit-learn maintainers therefore strongly discourage the use of this\n",
|
||
|
" dataset unless the purpose of the code is to study and educate about\n",
|
||
|
" ethical issues in data science and machine learning.\n",
|
||
|
"\n",
|
||
|
" In this special case, you can fetch the dataset from the original\n",
|
||
|
" source::\n",
|
||
|
"\n",
|
||
|
" import pandas as pd\n",
|
||
|
" import numpy as np\n",
|
||
|
"\n",
|
||
|
" data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n",
|
||
|
" raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n",
|
||
|
" data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n",
|
||
|
" target = raw_df.values[1::2, 2]\n",
|
||
|
"\n",
|
||
|
" Alternative datasets include the California housing dataset (i.e.\n",
|
||
|
" :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n",
|
||
|
" dataset. You can load the datasets as follows::\n",
|
||
|
"\n",
|
||
|
" from sklearn.datasets import fetch_california_housing\n",
|
||
|
" housing = fetch_california_housing()\n",
|
||
|
"\n",
|
||
|
" for the California housing dataset and::\n",
|
||
|
"\n",
|
||
|
" from sklearn.datasets import fetch_openml\n",
|
||
|
" housing = fetch_openml(name=\"house_prices\", as_frame=True)\n",
|
||
|
"\n",
|
||
|
" for the Ames housing dataset.\n",
|
||
|
" warnings.warn(msg, category=FutureWarning)\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"boston = load_boston()\n",
|
||
|
"data = pd.DataFrame(boston.data)\n",
|
||
|
"data.columns = boston.feature_names\n",
|
||
|
"data['PRICE'] = boston.target\n",
|
||
|
"print(data.head())\n",
|
||
|
"X, y = data.iloc[:,:-1], data.iloc[:,-1]\n",
|
||
|
"\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)\n",
|
||
|
"xg_reg = xgb.XGBRegressor(objective='reg:squarederror', colsample_bytree=0.3, learning_rate=0.1, max_depth=5, alpha=10, n_estimators=10)\n",
|
||
|
"xg_reg.fit(X_train, y_train)\n",
|
||
|
"preds = xg_reg.predict(X_test)\n",
|
||
|
"rmse = np.sqrt(mean_squared_error(y_test, preds))\n",
|
||
|
"print('RMSE: %f' % (rmse))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### [실습] LightGBM"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from lightgbm import LGBMClassifier, LGBMRegressor\n",
|
||
|
"from lightgbm import plot_importance, plot_metric, plot_tree\n",
|
||
|
"from sklearn.datasets import load_iris\n",
|
||
|
"from sklearn.model_selection import train_test_split\n",
|
||
|
"from sklearn.model_selection import cross_validate"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
|
||
|
" _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
|
||
|
"C:\\Users\\pinb\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
|
||
|
" _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[1]\tvalid_0's multi_logloss: 0.95847\n",
|
||
|
"[2]\tvalid_0's multi_logloss: 0.832184\n",
|
||
|
"[3]\tvalid_0's multi_logloss: 0.731164\n",
|
||
|
"[4]\tvalid_0's multi_logloss: 0.641056\n",
|
||
|
"[5]\tvalid_0's multi_logloss: 0.571726\n",
|
||
|
"[6]\tvalid_0's multi_logloss: 0.507286\n",
|
||
|
"[7]\tvalid_0's multi_logloss: 0.454933\n",
|
||
|
"[8]\tvalid_0's multi_logloss: 0.410205\n",
|
||
|
"[9]\tvalid_0's multi_logloss: 0.372194\n",
|
||
|
"[10]\tvalid_0's multi_logloss: 0.333919\n",
|
||
|
"[11]\tvalid_0's multi_logloss: 0.310212\n",
|
||
|
"[12]\tvalid_0's multi_logloss: 0.282326\n",
|
||
|
"[13]\tvalid_0's multi_logloss: 0.257165\n",
|
||
|
"[14]\tvalid_0's multi_logloss: 0.240836\n",
|
||
|
"[15]\tvalid_0's multi_logloss: 0.225383\n",
|
||
|
"[16]\tvalid_0's multi_logloss: 0.211583\n",
|
||
|
"[17]\tvalid_0's multi_logloss: 0.199289\n",
|
||
|
"[18]\tvalid_0's multi_logloss: 0.186269\n",
|
||
|
"[19]\tvalid_0's multi_logloss: 0.171556\n",
|
||
|
"[20]\tvalid_0's multi_logloss: 0.168245\n",
|
||
|
"[21]\tvalid_0's multi_logloss: 0.161065\n",
|
||
|
"[22]\tvalid_0's multi_logloss: 0.151371\n",
|
||
|
"[23]\tvalid_0's multi_logloss: 0.148081\n",
|
||
|
"[24]\tvalid_0's multi_logloss: 0.143843\n",
|
||
|
"[25]\tvalid_0's multi_logloss: 0.140169\n",
|
||
|
"[26]\tvalid_0's multi_logloss: 0.138303\n",
|
||
|
"[27]\tvalid_0's multi_logloss: 0.134058\n",
|
||
|
"[28]\tvalid_0's multi_logloss: 0.130884\n",
|
||
|
"[29]\tvalid_0's multi_logloss: 0.128082\n",
|
||
|
"[30]\tvalid_0's multi_logloss: 0.124975\n",
|
||
|
"[31]\tvalid_0's multi_logloss: 0.122225\n",
|
||
|
"[32]\tvalid_0's multi_logloss: 0.120298\n",
|
||
|
"[33]\tvalid_0's multi_logloss: 0.117257\n",
|
||
|
"[34]\tvalid_0's multi_logloss: 0.115021\n",
|
||
|
"[35]\tvalid_0's multi_logloss: 0.115037\n",
|
||
|
"[36]\tvalid_0's multi_logloss: 0.115831\n",
|
||
|
"[37]\tvalid_0's multi_logloss: 0.113318\n",
|
||
|
"[38]\tvalid_0's multi_logloss: 0.115651\n",
|
||
|
"[39]\tvalid_0's multi_logloss: 0.115772\n",
|
||
|
"[40]\tvalid_0's multi_logloss: 0.114953\n",
|
||
|
"[41]\tvalid_0's multi_logloss: 0.117056\n",
|
||
|
"[42]\tvalid_0's multi_logloss: 0.115412\n",
|
||
|
"[43]\tvalid_0's multi_logloss: 0.118359\n",
|
||
|
"[44]\tvalid_0's multi_logloss: 0.117129\n",
|
||
|
"[45]\tvalid_0's multi_logloss: 0.119174\n",
|
||
|
"[46]\tvalid_0's multi_logloss: 0.117789\n",
|
||
|
"[47]\tvalid_0's multi_logloss: 0.121333\n",
|
||
|
"[48]\tvalid_0's multi_logloss: 0.120375\n",
|
||
|
"[49]\tvalid_0's multi_logloss: 0.124128\n",
|
||
|
"[50]\tvalid_0's multi_logloss: 0.123394\n",
|
||
|
"[51]\tvalid_0's multi_logloss: 0.126631\n",
|
||
|
"[52]\tvalid_0's multi_logloss: 0.129833\n",
|
||
|
"[53]\tvalid_0's multi_logloss: 0.129069\n",
|
||
|
"[54]\tvalid_0's multi_logloss: 0.135166\n",
|
||
|
"[55]\tvalid_0's multi_logloss: 0.134996\n",
|
||
|
"[56]\tvalid_0's multi_logloss: 0.13912\n",
|
||
|
"[57]\tvalid_0's multi_logloss: 0.138818\n",
|
||
|
"[58]\tvalid_0's multi_logloss: 0.142758\n",
|
||
|
"[59]\tvalid_0's multi_logloss: 0.142228\n",
|
||
|
"[60]\tvalid_0's multi_logloss: 0.142928\n",
|
||
|
"[61]\tvalid_0's multi_logloss: 0.142513\n",
|
||
|
"[62]\tvalid_0's multi_logloss: 0.143485\n",
|
||
|
"[63]\tvalid_0's multi_logloss: 0.143408\n",
|
||
|
"[64]\tvalid_0's multi_logloss: 0.148199\n",
|
||
|
"[65]\tvalid_0's multi_logloss: 0.148074\n",
|
||
|
"[66]\tvalid_0's multi_logloss: 0.156199\n",
|
||
|
"[67]\tvalid_0's multi_logloss: 0.15898\n",
|
||
|
"[68]\tvalid_0's multi_logloss: 0.157612\n",
|
||
|
"[69]\tvalid_0's multi_logloss: 0.162526\n",
|
||
|
"[70]\tvalid_0's multi_logloss: 0.166269\n",
|
||
|
"[71]\tvalid_0's multi_logloss: 0.168114\n",
|
||
|
"[72]\tvalid_0's multi_logloss: 0.173203\n",
|
||
|
"[73]\tvalid_0's multi_logloss: 0.181871\n",
|
||
|
"[74]\tvalid_0's multi_logloss: 0.181307\n",
|
||
|
"[75]\tvalid_0's multi_logloss: 0.186251\n",
|
||
|
"[76]\tvalid_0's multi_logloss: 0.185765\n",
|
||
|
"[77]\tvalid_0's multi_logloss: 0.190847\n",
|
||
|
"[78]\tvalid_0's multi_logloss: 0.190228\n",
|
||
|
"[79]\tvalid_0's multi_logloss: 0.195371\n",
|
||
|
"[80]\tvalid_0's multi_logloss: 0.199459\n",
|
||
|
"[81]\tvalid_0's multi_logloss: 0.198517\n",
|
||
|
"[82]\tvalid_0's multi_logloss: 0.203972\n",
|
||
|
"[83]\tvalid_0's multi_logloss: 0.213262\n",
|
||
|
"[84]\tvalid_0's multi_logloss: 0.212185\n",
|
||
|
"[85]\tvalid_0's multi_logloss: 0.217603\n",
|
||
|
"[86]\tvalid_0's multi_logloss: 0.227068\n",
|
||
|
"[87]\tvalid_0's multi_logloss: 0.225914\n",
|
||
|
"[88]\tvalid_0's multi_logloss: 0.230099\n",
|
||
|
"[89]\tvalid_0's multi_logloss: 0.229018\n",
|
||
|
"[90]\tvalid_0's multi_logloss: 0.23464\n",
|
||
|
"[91]\tvalid_0's multi_logloss: 0.24434\n",
|
||
|
"[92]\tvalid_0's multi_logloss: 0.243782\n",
|
||
|
"[93]\tvalid_0's multi_logloss: 0.24814\n",
|
||
|
"[94]\tvalid_0's multi_logloss: 0.25793\n",
|
||
|
"[95]\tvalid_0's multi_logloss: 0.257366\n",
|
||
|
"[96]\tvalid_0's multi_logloss: 0.261762\n",
|
||
|
"[97]\tvalid_0's multi_logloss: 0.260774\n",
|
||
|
"[98]\tvalid_0's multi_logloss: 0.270632\n",
|
||
|
"[99]\tvalid_0's multi_logloss: 0.269316\n",
|
||
|
"[100]\tvalid_0's multi_logloss: 0.269535\n",
|
||
|
"[101]\tvalid_0's multi_logloss: 0.279374\n",
|
||
|
"[102]\tvalid_0's multi_logloss: 0.278105\n",
|
||
|
"[103]\tvalid_0's multi_logloss: 0.279826\n",
|
||
|
"[104]\tvalid_0's multi_logloss: 0.282811\n",
|
||
|
"[105]\tvalid_0's multi_logloss: 0.29269\n",
|
||
|
"[106]\tvalid_0's multi_logloss: 0.297696\n",
|
||
|
"[107]\tvalid_0's multi_logloss: 0.297028\n",
|
||
|
"[108]\tvalid_0's multi_logloss: 0.29694\n",
|
||
|
"[109]\tvalid_0's multi_logloss: 0.30682\n",
|
||
|
"[110]\tvalid_0's multi_logloss: 0.306206\n",
|
||
|
"[111]\tvalid_0's multi_logloss: 0.303895\n",
|
||
|
"[112]\tvalid_0's multi_logloss: 0.300907\n",
|
||
|
"[113]\tvalid_0's multi_logloss: 0.304274\n",
|
||
|
"[114]\tvalid_0's multi_logloss: 0.314218\n",
|
||
|
"[115]\tvalid_0's multi_logloss: 0.312988\n",
|
||
|
"[116]\tvalid_0's multi_logloss: 0.317589\n",
|
||
|
"[117]\tvalid_0's multi_logloss: 0.323073\n",
|
||
|
"[118]\tvalid_0's multi_logloss: 0.333026\n",
|
||
|
"[119]\tvalid_0's multi_logloss: 0.332652\n",
|
||
|
"[120]\tvalid_0's multi_logloss: 0.337212\n",
|
||
|
"[121]\tvalid_0's multi_logloss: 0.334481\n",
|
||
|
"[122]\tvalid_0's multi_logloss: 0.340022\n",
|
||
|
"[123]\tvalid_0's multi_logloss: 0.350061\n",
|
||
|
"[124]\tvalid_0's multi_logloss: 0.351676\n",
|
||
|
"[125]\tvalid_0's multi_logloss: 0.348515\n",
|
||
|
"[126]\tvalid_0's multi_logloss: 0.358595\n",
|
||
|
"[127]\tvalid_0's multi_logloss: 0.356737\n",
|
||
|
"[128]\tvalid_0's multi_logloss: 0.351512\n",
|
||
|
"[129]\tvalid_0's multi_logloss: 0.361591\n",
|
||
|
"[130]\tvalid_0's multi_logloss: 0.35978\n",
|
||
|
"[131]\tvalid_0's multi_logloss: 0.357317\n",
|
||
|
"[132]\tvalid_0's multi_logloss: 0.367439\n",
|
||
|
"[133]\tvalid_0's multi_logloss: 0.365665\n",
|
||
|
"[134]\tvalid_0's multi_logloss: 0.365745\n",
|
||
|
"[135]\tvalid_0's multi_logloss: 0.375832\n",
|
||
|
"[136]\tvalid_0's multi_logloss: 0.374115\n",
|
||
|
"[137]\tvalid_0's multi_logloss: 0.376748\n",
|
||
|
"avg fit time: 0.5514350891113281 (+/- 0.3701610138582717)\n",
|
||
|
"avg fit time: 0.010002517700195312 (+/- 0.009552237668971902)\n",
|
||
|
"avg fit time: 0.9600000000000002 (+/- 0.04898979485566355)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<Axes: >"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABiWUlEQVR4nO3dd3hUZdoG8Hv6JJn03kgCoRMgJJSAUjSAiChWBKSpKAK7Yj7WFUGaK6iriAVBXRULKFYsNGNoIkhNqKGmAel10jPlfH+EjISEZBKSnMzM/buuXLtz5pwzzzwg3Lzve86RCIIggIiIiMhKSMUugIiIiKglMdwQERGRVWG4ISIiIqvCcENERERWheGGiIiIrArDDREREVkVhhsiIiKyKgw3REREZFUYboiIiMiqMNwQEdavXw+JRIKUlJRW+4ylS5dCIpFYzHnFlpKSAolEgvXr1zfreIlEgqVLl7ZoTUSWguGGqA3VhAiJRIJ9+/bVeV8QBAQGBkIikeCee+5p1me8//77zf4LkZpm48aNWL16tdhlENENGG6IRKBWq7Fx48Y62/fs2YMrV65ApVI1+9zNCTdTpkxBeXk5goKCmv25Ylm0aBHKy8tF+ezWDDdBQUEoLy/HlClTmnV8eXk5Fi1a1MJVEVkGhhsiEdx999349ttvodfra23fuHEjIiIi4OPj0yZ1lJaWAgBkMhnUarVFTe/U1C6Xy6FWq0WupnEVFRUwGo1m7y+RSKBWqyGTyZr1eWq1GnK5vFnHElk6hhsiEUycOBF5eXmIjY01bauqqsJ3332HSZMm1XuM0WjE6tWr0bNnT6jVanh7e+Ppp59GQUGBaZ/g4GCcPn0ae/bsMU1/DR8+HMDfU2J79uzB7Nmz4eXlhYCAgFrv3bjmZtu2bRg2bBgcHR3h5OSE/v371zvidKN9+/ahf//+UKvV6NSpEz744IM6+zS0puTG9SI162rOnDmDSZMmwdXVFbfddlut9248fu7cudi8eTN69eoFlUqFnj17Yvv27XU+a/fu3YiMjKxVqznreIYPH44tW7YgNTXV1Ovg4GDTOSUSCb7++mssWrQI/v7+sLe3h1arRX5+PubPn4+wsDBoNBo4OTlhzJgxOH78eKP9mT59OjQaDa5evYrx48dDo9HA09MT8+fPh8FgMKuHFy9exPTp0+Hi4gJnZ2fMmDEDZWVltY4tLy/HP//5T3h4eMDR0RH33nsvrl69ynU8ZDEY64lEEBwcjKioKHz11VcYM2YMgOogUVRUhEcffRTvvPNOnWOefvpprF+/HjNmzMA///lPJCcn47333kN8fDz+/PNPKBQKrF69Gv/4xz+g0WiwcOFCAIC3t3et88yePRuenp5YvHixafSjPuvXr8fjjz+Onj17YsGCBXBxcUF8fDy2b99+0wAGACdPnsSoUaPg6emJpUuXQq/XY8mSJXXqaI6HH34YnTt3xooVKyAIQoP77tu3Dz/88ANmz54NR0dHvPPOO3jwwQeRlpYGd3d3AEB8fDzuuusu+Pr6YtmyZTAYDFi+fDk8PT0brWXhwoUoKirClStX8NZbbwEANBpNrX1efvllKJVKzJ8/H5WVlVAqlThz5gw2b96Mhx9+GCEhIcjKysIHH3yAYcOG4cyZM/Dz82vwcw0GA0aPHo2BAwfijTfewO+//44333wTnTp1wjPPPNNo3Y888ghCQkKwcuVKHDt2DP/73//g5eWF1157zbTP9OnT8c0332DKlCkYNGgQ9uzZg7FjxzZ6bqJ2QyCiNvPpp58KAITDhw8L7733nuDo6CiUlZUJgiAIDz/8sDBixAhBEAQhKChIGDt2rOm4P/74QwAgbNiwodb5tm/fXmd7z549hWHDht30s2+77TZBr9fX+15ycrIgCIJQWFgoODo6CgMHDhTKy8tr7Ws0Ghv8juPHjxfUarWQmppq2nbmzBlBJpMJ1/+Rk5ycLAAQPv300zrnACAsWbLE9HrJkiUCAGHixIl19q1578bjlUqlcPHiRdO248ePCwCEd99917Rt3Lhxgr29vXD16lXTtgsXLghyubzOOeszduxYISgoqM72Xbt2CQCEjh07mn59a1RUVAgGg6HWtuTkZEGlUgnLly+vte3G/kybNk0AUGs/QRCE8PBwISIiok4P6uvh448/Xmu/+++/X3B3dze9Pnr0qABAmDdvXq39pk+fXuecRO0Vp6WIRPLII4+gvLwcv/76K4qLi/Hrr7/edETk22+/hbOzM0aOHInc3FzTT0REBDQaDXbt2mX2586cObPRdRyxsbEoLi7GCy+8UGc9S0PTNQaDATt27MD48ePRoUMH0/bu3btj9OjRZtd4M7NmzTJ73+joaHTq1Mn0unfv3nByckJSUpKp1t9//x3jx4+vNVoSGhpqGk27VdOmTYOdnV2tbSqVClKp1FRDXl4eNBoNunbtimPHjpl13hv7cPvtt5u+V3OOzcvLg1arBQDT1N3s2bNr7fePf/zDrPMTtQecliISiaenJ6Kjo7Fx40aUlZXBYDDgoYceqnffCxcuoKioCF5eXvW+n52dbfbnhoSENLrPpUuXAAC9evUy+7wAkJOTg/LycnTu3LnOe127dsXWrVubdL4bmVN7jevDVQ1XV1fTGqXs7GyUl5cjNDS0zn71bWuO+uo1Go14++238f777yM5ObnWWpma6bKGqNXqOtNm13+vxtzYF1dXVwBAQUEBnJyckJqaCqlUWqf2luoJUVtguCES0aRJkzBz5kxkZmZizJgxcHFxqXc/o9EILy8vbNiwod73zVkjUuPGkQSx3GwE6MaFsddrSu03G50SGlmr05Lqq3fFihV46aWX8Pjjj+Pll1+Gm5sbpFIp5s2bZ9bVVM29eqqx49uyL0StjeGGSET3338/nn76afz111/YtGnTTffr1KkTfv/9dwwZMqTRv+Bb4nLumumcU6dONelf7J6enrCzs8OFCxfqvHfu3Llar2tGDAoLC2ttT01NbWK1zePl5QW1Wo2LFy/Wea++bfVpTq+/++47jBgxAh9//HGt7YWFhfDw8Gjy+VpaUFAQjEYjkpOTa43AmdsTovaAa26IRKTRaLB27VosXboU48aNu+l+jzzyCAwGA15++eU67+n1+loBwcHBoU5gaKpRo0bB0dERK1euREVFRa33GvoXvkwmw+jRo7F582akpaWZticmJmLHjh219nVycoKHhwf27t1ba/v7779/S7WbSyaTITo6Gps3b0Z6erpp+8WLF7Ft2zazzuHg4ICioqImf+6NPfz2229x9erVJp2ntdSsjbrx1+Hdd98VoxyiZuHIDZHIpk2b1ug+w4YNw9NPP42VK1ciISEBo0aNgkKhwIULF/Dtt9/i7bffNq3XiYiIwNq1a/Gf//wHoaGh8PLywh133NGkmpycnPDWW2/hySefRP/+/U33ljl+/DjKysrw2Wef3fTYZcuWYfv27bj99tsxe/Zs6PV6vPvuu+jZsydOnDhRa98nn3wSr776Kp588klERkZi7969OH/+fJNqvRVLly7Fb7/9hiFDhuCZZ56BwWDAe++9h169eiEhIaHR4yMiIrBp0ybExMSgf//+0Gg0DYZUALjnnnuwfPlyzJgxA4MHD8bJkyexYcMGdOzYsYW+1a2JiIjAgw8+iNWrVyMvL890KXjNr4sl3eiRbBfDDZGFWLduHSIiIvDBBx/gxRdfhFwuR3BwMB577DEMGTLEtN/ixYuRmpqK119/HcXFxRg2bFiTww0APPHEE/Dy8sKrr76Kl19+GQqFAt26dcNzzz3X4HG9e/fGjh07EBMTg8WLFyMgIADLli1DRkZGnXCzePFi5OTk4LvvvsM333yDMWPGYNu2bTddON3SIiIisG3bNsyfPx8vvfQSAgMDsXz5ciQmJuLs2bONHj979mwkJCTg008/xVtvvYWgoKBGw82LL76I0tJSbNy4EZs2bUK/fv2wZcsWvPDCCy31tW7Z559/Dh8fH3z11Vf48ccfER0djU2bNqFr164WcTdoIonAVWRERLWMHz8ep0+frnftkK1KSEhAeHg4vvzyS0yePFnscog
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA30AAAPxCAYAAABO+MQaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABeq0lEQVR4nO3de5yWc/748fc9NU0qmUpJNp1kKxWtylZsDlFytsuyUcTuIpbCD19byinEZpfUd9lvfR3XHizWIZLz+Zh1SCuKlsgiUStTc/3+8Oj+GhXTNDO3Pj2fj8c8dF/Xdd/3537PPDSvrvuQy7IsCwAAAJJUVOgFAAAAUHNEHwAAQMJEHwAAQMJEHwAAQMJEHwAAQMJEHwAAQMJEHwAAQMJEHwAAQMJEHwAAQMJEHwAU2LRp0yKXy8X8+fMLvRQAEiT6AKh1qyJnTV9nnnlmjdzn448/HmPHjo3FixfXyO1vzJYtWxZjx46NBx98sNBLAWAN6hZ6AQBsvM4999xo165dhW1du3atkft6/PHHY9y4cXHUUUdFaWlpjdxHVR155JFx2GGHRUlJSaGXUiXLli2LcePGRUTErrvuWtjFALAa0QdAwey9997Rs2fPQi9jvSxdujQaNmy4XrdRp06dqFOnTjWtqPaUl5fHF198UehlAPAtPL0TgO+su+++O3bZZZdo2LBhbLrpprHPPvvEK6+8UuGYf/zjH3HUUUdF+/bto379+tGyZcsYPnx4fPjhh/ljxo4dG6effnpERLRr1y7/VNL58+fH/PnzI5fLxbRp01a7/1wuF2PHjq1wO7lcLl599dX42c9+Fk2aNImdd945v//666+PHXfcMTbZZJNo2rRpHHbYYbFgwYJvfZxrek1f27ZtY999940HH3wwevbsGZtsskl069Yt/xTKW265Jbp16xb169ePHXfcMV544YUKt3nUUUdFo0aN4s0334yBAwdGw4YNo1WrVnHuuedGlmUVjl26dGmceuqp0bp16ygpKYnvf//7cemll652XC6XixNPPDFuuOGG2G677aKkpCSmTJkSzZs3j4iIcePG5We7am6V+f58dbZz587Nn43dbLPN4uijj45ly5atNrPrr78+evfuHQ0aNIgmTZrEj370o7j33nsrHFOZnx+AjYEzfQAUzCeffBL//ve/K2zbfPPNIyLiuuuui2HDhsXAgQPj4osvjmXLlsXkyZNj5513jhdeeCHatm0bEREzZsyIN998M44++uho2bJlvPLKK/H73/8+XnnllXjyyScjl8vFwQcfHP/85z/jpptuiokTJ+bvo3nz5vHBBx+s87oPOeSQ6NixY1x44YX5MLrgggti9OjRceihh8axxx4bH3zwQVxxxRXxox/9KF544YUqPaV07ty58bOf/Sx++ctfxhFHHBGXXnpp7LfffjFlypT4r//6rzjhhBMiImL8+PFx6KGHxpw5c6Ko6P/+PXflypUxaNCg+OEPfxiXXHJJTJ8+Pc4555xYsWJFnHvuuRERkWVZ7L///vHAAw/EMcccEzvssEPcc889cfrpp8c777wTEydOrLCm+++/P/70pz/FiSeeGJtvvnlsv/32MXny5Dj++OPjoIMOioMPPjgiIrp37x4Rlfv+fNWhhx4a7dq1i/Hjx8fzzz8f11xzTbRo0SIuvvji/DHjxo2LsWPHRt++fePcc8+NevXqxVNPPRX3339/7LXXXhFR+Z8fgI1CBgC1bOrUqVlErPEry7Ls008/zUpLS7Of//znFa733nvvZZtttlmF7cuWLVvt9m+66aYsIrKHH344v23ChAlZRGTz5s2rcOy8efOyiMimTp262u1ERHbOOefkL59zzjlZRGSHH354hePmz5+f1alTJ7vgggsqbH/ppZeyunXrrrZ9bfP46tratGmTRUT2+OOP57fdc889WURkm2yySfbWW2/lt//3f/93FhHZAw88kN82bNiwLCKyk046Kb+tvLw822effbJ69eplH3zwQZZlWXbrrbdmEZGdf/75Fdb0k5/8JMvlctncuXMrzKOoqCh75ZVXKhz7wQcfrDarVSr7/Vk12+HDh1c49qCDDsqaNWuWv/z6669nRUVF2UEHHZStXLmywrHl5eVZlq3bzw/AxsDTOwEomEmTJsWMGTMqfEV8eXZo8eLFcfjhh8e///3v/FedOnVip512igceeCB/G5tsskn+z59//nn8+9//jh/+8IcREfH888/XyLqPO+64CpdvueWWKC8vj0MPPbTCelu2bBkdO3assN510aVLl+jTp0/+8k477RQREbvvvntsvfXWq21/8803V7uNE088Mf/nVU/P/OKLL+K+++6LiIi77ror6tSpE7/61a8qXO/UU0+NLMvi7rvvrrC9f//+0aVLl0o/hnX9/nx9trvsskt8+OGHsWTJkoiIuPXWW6O8vDzGjBlT4azmqscXsW4/PwAbA0/vBKBgevfuvcY3cnn99dcj4su4WZPGjRvn//zRRx/FuHHj4o9//GMsWrSownGffPJJNa72/3z9HUdff/31yLIsOnbsuMbji4uLq3Q/Xw27iIjNNtssIiJat269xu0ff/xxhe1FRUXRvn37Ctu23XbbiIj86wffeuutaNWqVWy66aYVjuvcuXN+/1d9/bF/m3X9/nz9MTdp0iQivnxsjRs3jjfeeCOKioq+MTzX5ecHYGMg+gD4zikvL4+IL1+X1bJly9X21637f399HXroofH444/H6aefHjvssEM0atQoysvLY9CgQfnb+SZff03ZKitXrlzrdb569mrVenO5XNx9991rfBfORo0afes61mRt7+i5tu3Z1954pSZ8/bF/m3X9/lTHY1uXnx+AjYH/6wHwndOhQ4eIiGjRokUMGDBgrcd9/PHHMXPmzBg3blyMGTMmv33VmZ6vWlvcrTqT9PUPbf/6Ga5vW2+WZdGuXbv8mbTvgvLy8njzzTcrrOmf//xnRET+jUzatGkT9913X3z66acVzva99tpr+f3fZm2zXZfvT2V16NAhysvL49VXX40ddthhrcdEfPvPD8DGwmv6APjOGThwYDRu3DguvPDCKCsrW23/qnfcXHVW6OtngS6//PLVrrPqs/S+HneNGzeOzTffPB5++OEK26+66qpKr/fggw+OOnXqxLhx41ZbS5Zlq308QW268sorK6zlyiuvjOLi4thjjz0iImLw4MGxcuXKCsdFREycODFyuVzsvffe33ofDRo0iIjVZ7su35/KOvDAA6OoqCjOPffc1c4Urrqfyv78AGwsnOkD4DuncePGMXny5DjyyCPjBz/4QRx22GHRvHnzePvtt+POO++Mfv36xZVXXhmNGzeOH/3oR3HJJZdEWVlZbLXVVnHvvffGvHnzVrvNHXfcMSIizj777DjssMOiuLg49ttvv2jYsGEce+yxcdFFF8Wxxx4bPXv2jIcffjh/RqwyOnToEOeff36cddZZMX/+/DjwwANj0003jXnz5sXf/va3+MUvfhGnnXZatc2nsurXrx/Tp0+PYcOGxU477RR333133HnnnfFf//Vf+c/W22+//WK33XaLs88+O+bPnx/bb7993HvvvXHbbbfFKaeckj9r9k022WST6NKlS9x8882x7bbbRtOmTaNr167RtWvXSn9/KmubbbaJs88+O84777zYZZdd4uCDD46SkpJ45plnolWrVjF+/PhK//wAbCxEHwDfST/72c+iVatWcdFFF8WECRNi+fLlsdVWW8Uuu+wSRx99dP64G2+8MU466aSYNGlSZFkWe+21V9x9993RqlWrCrfXq1evOO+882LKlCkxffr0KC8vj3nz5kXDhg1jzJgx8cEHH8Rf/vKX+NOf/hR777133H333dGiRYtKr/fMM8+MbbfdNiZOnBjjxo2LiC/fcGWvvfaK/fffv3qGso7q1KkT06dPj+OPPz5OP/302HTTTeOcc86p8FTLoqKiuP3222PMmDFx8803x9SpU6Nt27YxYcKEOPXUUyt9X9dcc02cdNJJMXLkyPjiiy/inHPOia5du1b6+7Muzj333GjXrl1cccUVcfbZZ0eDBg2ie/f
|
||
|
"text/plain": [
|
||
|
"<Figure size 1000x1200 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAACI4AAAH9CAYAAAB2/6WWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAACFPklEQVR4nOzde7xVdZ0//s/hIje5eAMBBQID73gJBUtR0FFLNC1uWjoVMDjlpAOFF7AScmrC6jtdIDDLaATBMmHSaoBJS0UDFS8ZJCioIIgKKoginN8f85tP67M6e7M5nCv7+fzr9X68917rszd777Mvi/WuqKysrAwAAAAAAAAAAJSdJvW9AAAAAAAAAAAA6ocDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAypQDRwAAAAAAAAAAylSz+l4AAAAAsG948803k/qdd96JeevWrUlvy5YtMe/atSvpZS/73nvv1chadu7cWa3ttG7dOqlbtGhRre20bds25mbN0q9jWrVqVWUOIYQOHToU7LVs2bJaawEAAADIcsYRAAAAAAAAAIAy5cARAAAAAAAAAIAy5cARAAAAAAAAAIAy1Wz3FwEAAABq2yuvvBLzunXrCvZee+21pJetX3/99YK9fL1p06aCvTfffDPpZet33nkn6W3dujVQPyoqKmLu0KFD0mvdunXMbdq0SXoHHXRQzAceeGDBXjbv7noHH3xwUh9yyCExH3rooUmvW7duVa4TAAAAqB/OOAIAAAAAAAAAUKYcOAIAAAAAAAAAUKYqKisrK+t7EQAAAFDf8iNYVq1aVWUOIYTnn38+5jVr1iS99evXx/zSSy8lvWydvVwIIbz33nslrbNFixZJXepokRDS8SL50SLZul27dkmvffv2Mbds2TLpZceg5K+Xvez++++f9Nq2bRtzs2bpJN3sbazuKJP89fL3W6nyY3t27txZre288cYbBXvZcT/bt29Pelu2bIl527ZtSS972c2bNye97OP57bffTnrZsUTVHX1U7Hp7Ij9ip2vXrjEfdthhSa9Lly4xH3744UkvWx9xxBFJr1evXgW3mR33AwAAAOXKGUcAAAAAAAAAAMqUA0cAAAAAAAAAAMqUA0cAAAAAAAAAAMpURWVlZWV9LwIAAAB2J/vxdfXq1UnvqaeeivnPf/5z0lu1alXMzz33XMHeyy+/XPJaunTpEnO3bt0K9g477LCk17Vr1yovF0IIhx9+eMFetm7Tpk3J64S6smvXrqR+9dVXY37llVeS3osvvhhz/nm3bt26Ki8XQgjr168v2FuzZk3Mb7/9dsF1tmjRIql79uwZ8xFHHJH0evXqFXPv3r2T3jHHHBPz8ccfn/Q6dOhQcP8AAADQEDnjCAAAAAAAAABAmXLgCAAAAAAAAABAmTKqBgAAgFr11ltvxfzYY48lvSeffDLm7LiZfC+EEJ555pmY86MoKioqYu7Ro0fSy46fyI6e2F0vW+dHWLRq1SoADdOGDRuSOjuiqti4qmK9lStXJr3NmzcX3H927NRxxx2X9IrVJ5xwQtI76qijYm7SxP/9AgAAoPb41AkAAAAAAAAAUKYcOAIAAAAAAAAAUKYcOAIAAAAAAAAAUKYqKisrK+t7EQAAADQ8O3fuTOq//OUvMS9btizpZet879FHH415x44dSa9Dhw4xH3PMMUkvXx999NExn3zyyUmvb9++Mbdt2zYA1KZ169bF/Oc//znpPfPMMzHnXw+LXXb79u1Jb//99485+xoXQvoa+JGPfCTpnXHGGTF36tSp6hsAAAAAGc44AgAAAAAAAABQphw4AgAAAAAAAABQpoyqAQAA2Mft2rUrqZcvXx7zokWLkt7ChQtjfuihh5LeW2+9FXObNm2SXnZswqmnnpr0+vfvH/Mpp5yS9A477LCiawfYl2XHdz355JNJ75FHHqky5+uVK1cmvexXfb169Up6Z511VsyDBw9OeoMGDYq5Y8eOu107AAAA+w5nHAEAAAAAAAAAKFMOHAEAAAAAAAAAKFMOHAEAAAAAAAAAKFMVldnBpwAAADQaL7/8csz33ntv0lu4cGHMixcvTnqbNm2K+ZBDDkl6gwYNivnMM89Mev3794/52GOPTXrNmjUrcdUA1KQ33ngjqR955JGYH3744aS3aNGiKi8XQgg7d+6M+bjjjkt6Z599dsznnHNO0jvrrLNibtGiRanLBgAAoAFxxhEAAAAAAAAAgDLlwBEAAAAAAAAAgDJlVA0AAEAD8vzzzyf1/PnzY543b17Se+ihh2Ju1apV0jvttNNizo4YyNcnnnhi0mvSxP8vACgHW7duTersWJvsuLN8/dhjjyW97N+f7LizEEIYOnRozB//+MeTXrt27fZswQAAANQa3wgCAAAAAAAAAJQpB44AAAAAAAAAAJQpB44AAAAAAAAAAJSpisrKysr6XgQAAMC+bO3atUn9k5/8JKnnzZsX8zPPPJP0Dj744JgvvPDCpHfxxRfHfPbZZye9li1bVm+xAFDEiy++mNS/+tWvYr777ruT3gMPPBBz8+bNk94555wT82WXXZb0Pv7xj8fcokWL6i4VAACAEjnjCAAAAAAAAABAmXLgCAAAAAAAAABAmTKqBgAAoJree++9mO+5556k9+Mf/zjm//7v/056hxxySFIPHz485uz4mRBCOP3002Nu2rRp9RcLAHVs06ZNMc+fPz/p/eIXv4j5t7/9bdLr0KFDzJ/61KeS3qhRo5L62GOP3dtlAgAAlD1nHAEAAAAAAAAAKFMOHAEAAAAAAAAAKFMOHAEAAAAAAAAAKFMVlZWVlfW9CAAAgIZq48aNMX/3u99NejNnzoz5jTfeSHrnnXdezJ/73OeS3gUXXJDUzZs339tlAkCj9fLLLyf17bffHvNtt92W9FatWpXUp556aszjx49PepdccknMTZr4/3MAAACF+MQEAAAAAAAAAFCmHDgCAAAAAAAAAFCmjKoBAADK3tq1a2OeOnVq0vvxj38c8/7775/0vvCFL8T82c9+Nul17dq1JpcIAGUp/9Xl73//+6T+4Q9/GPMvf/nLpNe7d++YJ0yYkPQuu+yymI2MAwAAyp0zjgAAAAAAAAAAlCkHjgAAAAAAAAAAlCkHjgAAAAAAAAAAlKmKyvygUAAAgH3Q66+/HvMNN9yQ9H784x/H3Llz56Q3fvz4mD/3uc8lvdatW9fkEgGAvfCXv/wlqb/5zW/G/J//+Z9Jr0uXLjH/27/9W9IbOXJkLawOAACg4XLGEQAAAAAAAACAMuXAEQAAAAAAAACAMmVUDQAAsM/YtWtXzLfddlvSu+6662Ju3rx50ps8eXLMl19+edLLXxYAaHzWrFmT1Nm//T/5yU+S3sCBA2P+wQ9+kPSOOuqoWlgdAABA/XLGEQAAAAAAAACAMuXAEQAAAAAAAACAMuXAEQAAAAAAAACAMlVRWVlZWd+LAAAAqI5Vq1Yl9ac+9amYly5dmvQ+//nPx3zTTTclvXbt2tXC6gCAxuCRRx5J6ux7hieffDLpXXvttTF/5StfSXpNmzathdUBAADUPmccAQAAAAAAAAAoUw4cAQAAAAAAAAAoU0bVAAAAjcp9990X82WXXZb0evToEfNPf/rTpHf88cfX5rIAgH3Ezp07Y54+fXrSGz9+fMynn3560ps9e3bMBx10UC2tDgAAoOY54wgAAAAAAAAAQJly4AgAAAAAAAAAQJly4AgAAAAAAAAAQJmqqKysrKzvRQAAABTy9a9/PalvvPHGmD/96U8nvWnTpsXcqlWr2l0YAFB2HnvssZg
|
||
|
"text/plain": [
|
||
|
"<Figure size 2800x1400 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"iris = load_iris()\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=123)\n",
|
||
|
"lgbmc = LGBMClassifier(n_estimators=400)\n",
|
||
|
"evals = [(X_test, y_test)]\n",
|
||
|
"lgbmc.fit(X_train, y_train, early_stopping_rounds=100, eval_metric='logloss', eval_set=evals, verbose=True)\n",
|
||
|
"preds = lgbmc.predict(X_test)\n",
|
||
|
"\n",
|
||
|
"cross_val = cross_validate(\n",
|
||
|
" estimator=lgbmc,\n",
|
||
|
" X=iris.data, y=iris.target,\n",
|
||
|
" cv=5\n",
|
||
|
")\n",
|
||
|
"\n",
|
||
|
"print('avg fit time: {} (+/- {})'.format(cross_val['fit_time'].mean(), cross_val['fit_time'].std()))\n",
|
||
|
"print('avg fit time: {} (+/- {})'.format(cross_val['score_time'].mean(), cross_val['score_time'].std()))\n",
|
||
|
"print('avg fit time: {} (+/- {})'.format(cross_val['test_score'].mean(), cross_val['test_score'].std()))\n",
|
||
|
"\n",
|
||
|
"plot_metric(lgbmc)\n",
|
||
|
"plot_importance(lgbmc, figsize=(10,12))\n",
|
||
|
"plot_tree(lgbmc, figsize=(28,14))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.10.11"
|
||
|
},
|
||
|
"orig_nbformat": 4
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|