{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 선형 회귀 (Linear Regression)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Coefficients: [[2.50779446]]\n", "Intercept: [4.64634325]\n" ] } ], "source": [ "from sklearn.linear_model import LinearRegression\n", "from sklearn.model_selection import train_test_split\n", "import numpy as np\n", "\n", "# 예제 데이터 생성\n", "X = np.random.rand(100, 1) * 10 # 100개의 랜덤 데이터\n", "y = 2.5 * X + 5 + np.random.randn(100, 1) * 2 # y = 2.5x + 5 + 잡음\n", "\n", "# 데이터 분할\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", "\n", "# 선형 회귀 모델 학습\n", "model = LinearRegression()\n", "model.fit(X_train, y_train)\n", "\n", "# 예측\n", "y_pred = model.predict(X_test)\n", "\n", "print(\"Coefficients:\", model.coef_)\n", "print(\"Intercept:\", model.intercept_)\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean Squared Error: 4061.83\n" ] } ], "source": [ "import numpy as np\n", "from sklearn import datasets\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.metrics import mean_squared_error\n", "\n", "# 데이터 로드\n", "diabetes = datasets.load_diabetes()\n", "X = diabetes.data[:, np.newaxis, 2] # BMI feature만 사용\n", "y = diabetes.target\n", "\n", "# 데이터 분할\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "# 선형 회귀 모델 학습\n", "model = LinearRegression()\n", "model.fit(X_train, y_train)\n", "\n", "# 예측 및 평가\n", "y_pred = model.predict(X_test)\n", "mse = mean_squared_error(y_test, y_pred)\n", "print(f\"Mean Squared Error: {mse:.2f}\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 로지스틱 회귀 (Logistic Regression)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import train_test_split\n", "\n", "# 예제 데이터 생성\n", "X = np.random.rand(100, 1) * 10 # 100개의 랜덤 데이터\n", "y = (X > 5).astype(int).ravel() # X가 5보다 크면 1, 아니면 0\n", "\n", "# 데이터 분할\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", "\n", "# 로지스틱 회귀 모델 학습\n", "model = LogisticRegression()\n", "model.fit(X_train, y_train)\n", "\n", "# 예측\n", "y_pred = model.predict(X_test)\n", "\n", "print(\"Coefficients:\", model.coef_)\n", "print(\"Intercept:\", model.intercept_)\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 95.61%\n" ] } ], "source": [ "from sklearn import datasets\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score\n", "\n", "# 데이터 로드\n", "cancer = datasets.load_breast_cancer()\n", "X = cancer.data\n", "y = cancer.target\n", "\n", "# 데이터 분할\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "# 로지스틱 회귀 모델 학습\n", "model = LogisticRegression(max_iter=10000) # max_iter를 증가시켜 수렴을 도움\n", "model.fit(X_train, y_train)\n", "\n", "# 예측 및 평가\n", "y_pred = model.predict(X_test)\n", "acc = accuracy_score(y_test, y_pred)\n", "print(f\"Accuracy: {acc*100:.2f}%\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }