From f8bc1216ef76ea1a8d26910ce1c06f2e92f75afd Mon Sep 17 00:00:00 2001 From: "hy.kim" Date: Wed, 20 Sep 2023 17:26:23 +0900 Subject: [PATCH] init --- regression.ipynb | 179 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 regression.ipynb diff --git a/regression.ipynb b/regression.ipynb new file mode 100644 index 0000000..301b074 --- /dev/null +++ b/regression.ipynb @@ -0,0 +1,179 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 선형 회귀 (Linear Regression)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Coefficients: [[2.50779446]]\n", + "Intercept: [4.64634325]\n" + ] + } + ], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.model_selection import train_test_split\n", + "import numpy as np\n", + "\n", + "# 예제 데이터 생성\n", + "X = np.random.rand(100, 1) * 10 # 100개의 랜덤 데이터\n", + "y = 2.5 * X + 5 + np.random.randn(100, 1) * 2 # y = 2.5x + 5 + 잡음\n", + "\n", + "# 데이터 분할\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", + "\n", + "# 선형 회귀 모델 학습\n", + "model = LinearRegression()\n", + "model.fit(X_train, y_train)\n", + "\n", + "# 예측\n", + "y_pred = model.predict(X_test)\n", + "\n", + "print(\"Coefficients:\", model.coef_)\n", + "print(\"Intercept:\", model.intercept_)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean Squared Error: 4061.83\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn import datasets\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "# 데이터 로드\n", + "diabetes = datasets.load_diabetes()\n", + "X = diabetes.data[:, np.newaxis, 2] # BMI feature만 사용\n", + "y = diabetes.target\n", + "\n", + "# 데이터 분할\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# 선형 회귀 모델 학습\n", + "model = LinearRegression()\n", + "model.fit(X_train, y_train)\n", + "\n", + "# 예측 및 평가\n", + "y_pred = model.predict(X_test)\n", + "mse = mean_squared_error(y_test, y_pred)\n", + "print(f\"Mean Squared Error: {mse:.2f}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 로지스틱 회귀 (Logistic Regression)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# 예제 데이터 생성\n", + "X = np.random.rand(100, 1) * 10 # 100개의 랜덤 데이터\n", + "y = (X > 5).astype(int).ravel() # X가 5보다 크면 1, 아니면 0\n", + "\n", + "# 데이터 분할\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", + "\n", + "# 로지스틱 회귀 모델 학습\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)\n", + "\n", + "# 예측\n", + "y_pred = model.predict(X_test)\n", + "\n", + "print(\"Coefficients:\", model.coef_)\n", + "print(\"Intercept:\", model.intercept_)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 95.61%\n" + ] + } + ], + "source": [ + "from sklearn import datasets\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "# 데이터 로드\n", + "cancer = datasets.load_breast_cancer()\n", + "X = cancer.data\n", + "y = cancer.target\n", + "\n", + "# 데이터 분할\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# 로지스틱 회귀 모델 학습\n", + "model = LogisticRegression(max_iter=10000) # max_iter를 증가시켜 수렴을 도움\n", + "model.fit(X_train, y_train)\n", + "\n", + "# 예측 및 평가\n", + "y_pred = model.predict(X_test)\n", + "acc = accuracy_score(y_test, y_pred)\n", + "print(f\"Accuracy: {acc*100:.2f}%\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}