|
|
|
@ -313,5 +313,176 @@ plt.show()
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<summary>Gradient Boosting - Classification</summary>
|
|
|
|
|
<div markdown="1">
|
|
|
|
|
|
|
|
|
|
````python
|
|
|
|
|
from sklearn.datasets import make_hastie_10_2
|
|
|
|
|
from sklearn.ensemble import GradientBoostingClassifier
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
X, y = make_hastie_10_2(random_state=0)
|
|
|
|
|
X_train, X_test = X[:2000], X[2000:]
|
|
|
|
|
y_train, y_test = y[:2000], y[2000:]
|
|
|
|
|
print(X.shape, y.shape)
|
|
|
|
|
print(X[0:5,:])
|
|
|
|
|
print(y[0:5])
|
|
|
|
|
|
|
|
|
|
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0)
|
|
|
|
|
clf.fit(X_train, y_train)
|
|
|
|
|
print('Accuracy score (training): {0:.3f}'.format(clf.score(X_train, y_train)))
|
|
|
|
|
print('Accuracy score (testing): {0:.3f}'.format(clf.score(X_test, y_test)))
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<summary>Result</summary>
|
|
|
|
|
<div markdown="1">
|
|
|
|
|
|
|
|
|
|
````planetext
|
|
|
|
|
(12000, 10) (12000,)
|
|
|
|
|
[[ 1.76405235 0.40015721 0.97873798 2.2408932 1.86755799 -0.97727788
|
|
|
|
|
0.95008842 -0.15135721 -0.10321885 0.4105985 ]
|
|
|
|
|
[ 0.14404357 1.45427351 0.76103773 0.12167502 0.44386323 0.33367433
|
|
|
|
|
1.49407907 -0.20515826 0.3130677 -0.85409574]
|
|
|
|
|
[-2.55298982 0.6536186 0.8644362 -0.74216502 2.26975462 -1.45436567
|
|
|
|
|
0.04575852 -0.18718385 1.53277921 1.46935877]
|
|
|
|
|
[ 0.15494743 0.37816252 -0.88778575 -1.98079647 -0.34791215 0.15634897
|
|
|
|
|
1.23029068 1.20237985 -0.38732682 -0.30230275]
|
|
|
|
|
[-1.04855297 -1.42001794 -1.70627019 1.9507754 -0.50965218 -0.4380743
|
|
|
|
|
-1.25279536 0.77749036 -1.61389785 -0.21274028]]
|
|
|
|
|
[ 1. -1. 1. -1. 1.]
|
|
|
|
|
Accuracy score (training): 0.879
|
|
|
|
|
Accuracy score (testing): 0.819
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<summary>XGBoosting - Regression</summary>
|
|
|
|
|
<div markdown="1">
|
|
|
|
|
|
|
|
|
|
````python
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from sklearn.datasets import load_boston
|
|
|
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
import xgboost as xgb
|
|
|
|
|
|
|
|
|
|
boston = load_boston()
|
|
|
|
|
data = pd.DataFrame(boston.data)
|
|
|
|
|
data.columns = boston.feature_names
|
|
|
|
|
data['PRICE'] = boston.target
|
|
|
|
|
print(data.head())
|
|
|
|
|
X, y = data.iloc[:,:-1], data.iloc[:,-1]
|
|
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
|
|
|
|
|
xg_reg = xgb.XGBRegressor(objective='reg:squarederror', colsample_bytree=0.3, learning_rate=0.1, max_depth=5, alpha=10, n_estimators=10)
|
|
|
|
|
xg_reg.fit(X_train, y_train)
|
|
|
|
|
preds = xg_reg.predict(X_test)
|
|
|
|
|
rmse = np.sqrt(mean_squared_error(y_test, preds))
|
|
|
|
|
print('RMSE: %f' % (rmse))
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<summary>Result</summary>
|
|
|
|
|
<div markdown="1">
|
|
|
|
|
|
|
|
|
|
````planetext
|
|
|
|
|
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX
|
|
|
|
|
0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0
|
|
|
|
|
1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0
|
|
|
|
|
2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0
|
|
|
|
|
3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0
|
|
|
|
|
4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0
|
|
|
|
|
|
|
|
|
|
PTRATIO B LSTAT PRICE
|
|
|
|
|
0 15.3 396.90 4.98 24.0
|
|
|
|
|
1 17.8 396.90 9.14 21.6
|
|
|
|
|
2 17.8 392.83 4.03 34.7
|
|
|
|
|
3 18.7 394.63 2.94 33.4
|
|
|
|
|
4 18.7 396.90 5.33 36.2
|
|
|
|
|
RMSE: 10.423243
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<summary>LightGBM</summary>
|
|
|
|
|
<div markdown="1">
|
|
|
|
|
|
|
|
|
|
````python
|
|
|
|
|
from lightgbm import LGBMClassifier, LGBMRegressor
|
|
|
|
|
from lightgbm import plot_importance, plot_metric, plot_tree
|
|
|
|
|
from sklearn.datasets import load_iris
|
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
from sklearn.model_selection import cross_validate
|
|
|
|
|
|
|
|
|
|
iris = load_iris()
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=123)
|
|
|
|
|
lgbmc = LGBMClassifier(n_estimators=400)
|
|
|
|
|
evals = [(X_test, y_test)]
|
|
|
|
|
lgbmc.fit(X_train, y_train, early_stopping_rounds=100, eval_metric='logloss', eval_set=evals, verbose=True)
|
|
|
|
|
preds = lgbmc.predict(X_test)
|
|
|
|
|
|
|
|
|
|
cross_val = cross_validate(
|
|
|
|
|
estimator=lgbmc,
|
|
|
|
|
X=iris.data, y=iris.target,
|
|
|
|
|
cv=5
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
print('avg fit time: {} (+/- {})'.format(cross_val['fit_time'].mean(), cross_val['fit_time'].std()))
|
|
|
|
|
print('avg fit time: {} (+/- {})'.format(cross_val['score_time'].mean(), cross_val['score_time'].std()))
|
|
|
|
|
print('avg fit time: {} (+/- {})'.format(cross_val['test_score'].mean(), cross_val['test_score'].std()))
|
|
|
|
|
|
|
|
|
|
plot_metric(lgbmc)
|
|
|
|
|
plot_importance(lgbmc, figsize=(10,12))
|
|
|
|
|
plot_tree(lgbmc, figsize=(28,14))
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<summary>Result</summary>
|
|
|
|
|
<div markdown="1">
|
|
|
|
|
|
|
|
|
|
````planetext
|
|
|
|
|
[1] valid_0's multi_logloss: 0.95847
|
|
|
|
|
[2] valid_0's multi_logloss: 0.832184
|
|
|
|
|
[3] valid_0's multi_logloss: 0.731164
|
|
|
|
|
[4] valid_0's multi_logloss: 0.641056
|
|
|
|
|
[5] valid_0's multi_logloss: 0.571726
|
|
|
|
|
[6] valid_0's multi_logloss: 0.507286
|
|
|
|
|
[7] valid_0's multi_logloss: 0.454933
|
|
|
|
|
[8] valid_0's multi_logloss: 0.410205
|
|
|
|
|
[9] valid_0's multi_logloss: 0.372194
|
|
|
|
|
[10] valid_0's multi_logloss: 0.333919
|
|
|
|
|
[11] valid_0's multi_logloss: 0.310212
|
|
|
|
|
[12] valid_0's multi_logloss: 0.282326
|
|
|
|
|
[13] valid_0's multi_logloss: 0.257165
|
|
|
|
|
[14] valid_0's multi_logloss: 0.240836
|
|
|
|
|
[15] valid_0's multi_logloss: 0.225383
|
|
|
|
|
[16] valid_0's multi_logloss: 0.211583
|
|
|
|
|
[17] valid_0's multi_logloss: 0.199289
|
|
|
|
|
[18] valid_0's multi_logloss: 0.186269
|
|
|
|
|
[19] valid_0's multi_logloss: 0.171556
|
|
|
|
|
[20] valid_0's multi_logloss: 0.168245
|
|
|
|
|
[21] valid_0's multi_logloss: 0.161065
|
|
|
|
|
[22] valid_0's multi_logloss: 0.151371
|
|
|
|
|
[23] valid_0's multi_logloss: 0.148081
|
|
|
|
|
[24] valid_0's multi_logloss: 0.143843
|
|
|
|
|
[25] valid_0's multi_logloss: 0.140169
|
|
|
|
|
...
|
|
|
|
|
[137] valid_0's multi_logloss: 0.376748
|
|
|
|
|
avg fit time: 0.5514350891113281 (+/- 0.3701610138582717)
|
|
|
|
|
avg fit time: 0.010002517700195312 (+/- 0.009552237668971902)
|
|
|
|
|
avg fit time: 0.9600000000000002 (+/- 0.04898979485566355)
|
|
|
|
|
````
|
|
|
|
|
|
|
|
|
|
![result](./images/11_4.png)
|
|
|
|
|
![result](./images/11_5.png)
|
|
|
|
|
![result](./images/11_6.png)
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
</details>
|
|
|
|
|