diff --git a/README.md b/README.md index 91c028c..3b1f3ca 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ - 이 메타 모델이 최종 예측을 생성한다. +--- ### 배깅(Bagging) 예제 코드[¶]() @@ -64,7 +65,7 @@ plt.show() ```` -![result](./images/11_2.png) +![result](./images/11_1.png) @@ -190,3 +191,127 @@ + +--- + +### 부스팅(Boosting) 예제 코드[¶]() + +
+Code View + +AdaBoost - Regression +
+ +````python + import numpy as np + import matplotlib.pyplot as plt + from sklearn.tree import DecisionTreeRegressor + from sklearn.ensemble import AdaBoostRegressor + + rng = np.random.RandomState(1) + X = np.linspace(0, 6, 100)[:, np.newaxis] + y = np.sin(X).ravel() + np.sin(6*X).ravel() + rng.normal(0, 0.1, X.shape[0]) + + regr_1 = DecisionTreeRegressor(max_depth=4) + regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=100, random_state=rng) + + regr_1.fit(X, y) + regr_2.fit(X, y) + y_1 = regr_1.predict(X) + y_2 = regr_2.predict(X) + + plt.figure() + plt.scatter(X, y, c="k", label="training samples") + plt.plot(X, y_1, c="g", label="n_estimators=1", linewidth=2) + plt.plot(X, y_2, c="r", label="n_estimators=100", linewidth=2) + plt.xlabel("data") + plt.ylabel("target") + plt.title("AdaBoost Regression") + plt.legend() + plt.show() +```` + +![result](./images/11_2.png) + +
+ +Gradient Boosting - Regression +
+ +````python +import numpy as np +import pandas as pd +from sklearn import datasets +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_squared_error +from sklearn import ensemble +from sklearn.metrics import mean_squared_error, r2_score +from sklearn.model_selection import cross_val_predict + +boston = datasets.load_boston() # Boston 집값 데이터, 13개 속성, 마지막 중간값 정보 +print(boston.data.shape, boston.target.shape) +print(boston.feature_names) + +data = pd.DataFrame(boston.data, columns=boston.feature_names) +data = pd.concat([data, pd.Series(boston.target, name='MEDV')], axis=1) +print(data.head()) +X = data.iloc[:,:-1] +y = data.iloc[:,-1] +x_training_set, x_test_set, y_training_set, y_test_set = train_test_split(X, y, test_size=0.10, random_state=42, shuffle=True) +```` + +
+ +Result +
+ +````planetext + (506, 13) (506,) + ['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO' + 'B' 'LSTAT'] + CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX + 0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 + 1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 + 2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 + 3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 + 4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 + + PTRATIO B LSTAT MEDV + 0 15.3 396.90 4.98 24.0 + 1 17.8 396.90 9.14 21.6 + 2 17.8 392.83 4.03 34.7 + 3 18.7 394.63 2.94 33.4 + 4 18.7 396.90 5.33 36.2 +```` + +
+ +
+ +````python +params = {'n_estimators':500, 'max_depth':4, 'min_samples_split':2, 'learning_rate':0.01, 'loss':'ls'} +model = ensemble.GradientBoostingRegressor(**params) +model.fit(x_training_set, y_training_set) +model_score = model.score(x_training_set, y_training_set) +print('R2 sq: ', model_score) + +y_predicted = model.predict(x_test_set) +print('Mean squared error: %.2f'% mean_squared_error(y_test_set, y_predicted)) +print('Test Variance score: %.2f' % r2_score(y_test_set, y_predicted)) + +fig, ax = plt.subplots() +ax.scatter(y_test_set, y_predicted, edgecolors=(0,0,0)) +ax.plot([y_test_set.min(), y_test_set.max()], [y_test_set.min(), y_test_set.max()], 'k--', lw=4) +ax.set_xlabel('Actual') +ax.set_ylabel('Predicted') +ax.set_title('Ground Truth vs Predicted') +plt.show() +```` + +![result](./images/11_3.png) + +
+ + +
diff --git a/images/11_1.png b/images/11_1.png new file mode 100644 index 0000000..9937058 Binary files /dev/null and b/images/11_1.png differ diff --git a/images/11_2.png b/images/11_2.png index 9937058..c151468 100644 Binary files a/images/11_2.png and b/images/11_2.png differ diff --git a/images/11_3.png b/images/11_3.png new file mode 100644 index 0000000..32590b9 Binary files /dev/null and b/images/11_3.png differ