# 선형 회귀 (Linear Regression)

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np

# 예제 데이터 생성
X = np.random.rand(100, 1) * 10 # 100개의 랜덤 데이터
y = 2.5 * X + 5 + np.random.randn(100, 1) * 2 # y = 2.5x + 5 + 잡음

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# 선형 회귀 모델 학습
model = LinearRegression()
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)


Coefficients: [[2.50779446]]
Intercept: [4.64634325]


In [9]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# 데이터 로드
diabetes = datasets.load_diabetes()
X = diabetes.data[:, np.newaxis, 2] # BMI feature만 사용
y = diabetes.target

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 선형 회귀 모델 학습
model = LinearRegression()
model.fit(X_train, y_train)

# 예측 및 평가
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")


Mean Squared Error: 4061.83


# 로지스틱 회귀 (Logistic Regression)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# 예제 데이터 생성
X = np.random.rand(100, 1) * 10 # 100개의 랜덤 데이터
y = (X > 5).astype(int).ravel() # X가 5보다 크면 1, 아니면 0

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# 로지스틱 회귀 모델 학습
model = LogisticRegression()
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)


In [11]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 데이터 로드
cancer = datasets.load_breast_cancer()
X = cancer.data
y = cancer.target

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 로지스틱 회귀 모델 학습
model = LogisticRegression(max_iter=10000) # max_iter를 증가시켜 수렴을 도움
model.fit(X_train, y_train)

# 예측 및 평가
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc*100:.2f}%")


Accuracy: 95.61%
