|
|
|
@ -22,6 +22,7 @@
|
|
|
|
|
- 이 메타 모델이 최종 예측을 생성한다.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### 배깅(Bagging) 예제 코드[¶]()
|
|
|
|
|
|
|
|
|
@ -64,7 +65,7 @@
|
|
|
|
|
plt.show()
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
![result](./images/11_2.png)
|
|
|
|
|
![result](./images/11_1.png)
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
@ -190,3 +191,127 @@
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
</details>
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### 부스팅(Boosting) 예제 코드[¶]()
|
|
|
|
|
|
|
|
|
|
<details>
|
|
|
|
|
<summary>Code View</summary>
|
|
|
|
|
|
|
|
|
|
<summary>AdaBoost - Regression</summary>
|
|
|
|
|
<div markdown="1">
|
|
|
|
|
|
|
|
|
|
````python
|
|
|
|
|
import numpy as np
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
from sklearn.tree import DecisionTreeRegressor
|
|
|
|
|
from sklearn.ensemble import AdaBoostRegressor
|
|
|
|
|
|
|
|
|
|
rng = np.random.RandomState(1)
|
|
|
|
|
X = np.linspace(0, 6, 100)[:, np.newaxis]
|
|
|
|
|
y = np.sin(X).ravel() + np.sin(6*X).ravel() + rng.normal(0, 0.1, X.shape[0])
|
|
|
|
|
|
|
|
|
|
regr_1 = DecisionTreeRegressor(max_depth=4)
|
|
|
|
|
regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=100, random_state=rng)
|
|
|
|
|
|
|
|
|
|
regr_1.fit(X, y)
|
|
|
|
|
regr_2.fit(X, y)
|
|
|
|
|
y_1 = regr_1.predict(X)
|
|
|
|
|
y_2 = regr_2.predict(X)
|
|
|
|
|
|
|
|
|
|
plt.figure()
|
|
|
|
|
plt.scatter(X, y, c="k", label="training samples")
|
|
|
|
|
plt.plot(X, y_1, c="g", label="n_estimators=1", linewidth=2)
|
|
|
|
|
plt.plot(X, y_2, c="r", label="n_estimators=100", linewidth=2)
|
|
|
|
|
plt.xlabel("data")
|
|
|
|
|
plt.ylabel("target")
|
|
|
|
|
plt.title("AdaBoost Regression")
|
|
|
|
|
plt.legend()
|
|
|
|
|
plt.show()
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
![result](./images/11_2.png)
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<summary>Gradient Boosting - Regression</summary>
|
|
|
|
|
<div markdown="1">
|
|
|
|
|
|
|
|
|
|
````python
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from sklearn import datasets
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
|
from sklearn import ensemble
|
|
|
|
|
from sklearn.metrics import mean_squared_error, r2_score
|
|
|
|
|
from sklearn.model_selection import cross_val_predict
|
|
|
|
|
|
|
|
|
|
boston = datasets.load_boston() # Boston 집값 데이터, 13개 속성, 마지막 중간값 정보
|
|
|
|
|
print(boston.data.shape, boston.target.shape)
|
|
|
|
|
print(boston.feature_names)
|
|
|
|
|
|
|
|
|
|
data = pd.DataFrame(boston.data, columns=boston.feature_names)
|
|
|
|
|
data = pd.concat([data, pd.Series(boston.target, name='MEDV')], axis=1)
|
|
|
|
|
print(data.head())
|
|
|
|
|
X = data.iloc[:,:-1]
|
|
|
|
|
y = data.iloc[:,-1]
|
|
|
|
|
x_training_set, x_test_set, y_training_set, y_test_set = train_test_split(X, y, test_size=0.10, random_state=42, shuffle=True)
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<summary>Result</summary>
|
|
|
|
|
<div markdown="1">
|
|
|
|
|
|
|
|
|
|
````planetext
|
|
|
|
|
(506, 13) (506,)
|
|
|
|
|
['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
|
|
|
|
|
'B' 'LSTAT']
|
|
|
|
|
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX
|
|
|
|
|
0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0
|
|
|
|
|
1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0
|
|
|
|
|
2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0
|
|
|
|
|
3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0
|
|
|
|
|
4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0
|
|
|
|
|
|
|
|
|
|
PTRATIO B LSTAT MEDV
|
|
|
|
|
0 15.3 396.90 4.98 24.0
|
|
|
|
|
1 17.8 396.90 9.14 21.6
|
|
|
|
|
2 17.8 392.83 4.03 34.7
|
|
|
|
|
3 18.7 394.63 2.94 33.4
|
|
|
|
|
4 18.7 396.90 5.33 36.2
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<div markdown="1">
|
|
|
|
|
|
|
|
|
|
````python
|
|
|
|
|
params = {'n_estimators':500, 'max_depth':4, 'min_samples_split':2, 'learning_rate':0.01, 'loss':'ls'}
|
|
|
|
|
model = ensemble.GradientBoostingRegressor(**params)
|
|
|
|
|
model.fit(x_training_set, y_training_set)
|
|
|
|
|
model_score = model.score(x_training_set, y_training_set)
|
|
|
|
|
print('R2 sq: ', model_score)
|
|
|
|
|
|
|
|
|
|
y_predicted = model.predict(x_test_set)
|
|
|
|
|
print('Mean squared error: %.2f'% mean_squared_error(y_test_set, y_predicted))
|
|
|
|
|
print('Test Variance score: %.2f' % r2_score(y_test_set, y_predicted))
|
|
|
|
|
|
|
|
|
|
fig, ax = plt.subplots()
|
|
|
|
|
ax.scatter(y_test_set, y_predicted, edgecolors=(0,0,0))
|
|
|
|
|
ax.plot([y_test_set.min(), y_test_set.max()], [y_test_set.min(), y_test_set.max()], 'k--', lw=4)
|
|
|
|
|
ax.set_xlabel('Actual')
|
|
|
|
|
ax.set_ylabel('Predicted')
|
|
|
|
|
ax.set_title('Ground Truth vs Predicted')
|
|
|
|
|
plt.show()
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
![result](./images/11_3.png)
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</details>
|
|
|
|
|