ํผ์ ๊ณต๋ถํ๋ ๋จธ์ ๋ฌ๋+๋ฅ๋ฌ๋์๋ ์ฌ๋ฌ๊ฐ์ ํน์ฑ(๊ธธ์ด, ๋๊ป, ๋์ด)์ ์ฌ์ฉํ์ฌ ์์ธกํ ๋ ์ธ์ ์๋๋ฐ ๋ค์คํ๊ท ์์ ๋ฅผ ์๋์ฒ๋ผ ์ ๊ณตํ๊ณ ์์ต๋๋ค.
multiple_regression.ipynb์ ๋์ด์ ๋ฌด๊ฒ๋ฅผ ์์ธกํ๋ ์์ ๋ฅผ ์๋์์ ์ค๋ช ํฉ๋๋ค.
- ๋ฐ์ดํฐ๋ฅผ ์๋์ ๊ฐ์ด pandas๋ก ์ฝ์ด์ฌ ์ ์์ต๋๋ค. CSV ํ์ผ์๋ length, height, width๋ก ๋๋ฏธ(perch) ๋ฐ์ดํฐ๊ฐ ์ ๋ฆฌ๋์ด ์์ต๋๋ค.
import pandas as pd
df = pd.read_csv('https://bit.ly/perch_csv_data')
perch_full = df.to_numpy()
print(perch_full)์ด๋ ์ฝ์ด์ง ๋ฐ์ดํฐ์ ํํ๋ ์๋์ ๊ฐ์ต๋๋ค.
[[ 8.4 2.11 1.41]
[13.7 3.53 2. ]
[15. 3.82 2.43]
[16.2 4.59 2.63]
[17.4 4.59 2.94]
[18. 5.22 3.32]
[18.7 5.2 3.12]
[19. 5.64 3.05]- Weight์ ๋ํ ๋ฐ์ดํฐ๋ฅผ ์ค๋นํ๊ณ , Train/Test Set์ ์ค๋นํฉ๋๋ค.
import numpy as np
perch_weight = np.array(
[5.9, 32.0, 40.0, 51.5, 70.0, 100.0, 78.0, 80.0, 85.0, 85.0,
110.0, 115.0, 125.0, 130.0, 120.0, 120.0, 130.0, 135.0, 110.0,
130.0, 150.0, 145.0, 150.0, 170.0, 225.0, 145.0, 188.0, 180.0,
197.0, 218.0, 300.0, 260.0, 265.0, 250.0, 250.0, 300.0, 320.0,
514.0, 556.0, 840.0, 685.0, 700.0, 700.0, 690.0, 900.0, 650.0,
820.0, 850.0, 900.0, 1015.0, 820.0, 1100.0, 1000.0, 1100.0,
1000.0, 1000.0]
)
from sklearn.model_selection import train_test_split
# ํ๋ จ ์ธํธ์ ํ
์คํธ ์ธํธ๋ก ๋๋๋๋ค
train_input, test_input, train_target, test_target = train_test_split(
perch_full, perch_weight, random_state=42)- ๋ค์คํ๊ท๋ฅผ ์ํํฉ๋๋ค.
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2, include_bias=False) # default degree=2
poly.fit(train_input)
train_poly = poly.transform(train_input)
test_poly = poly.transform(test_input)- Ridge๋ก ๊ท์ ๋ฅผ ์ํํฉ๋๋ค.
from sklearn.linear_model import Ridge
ridge = Ridge()
ridge.fit(train_poly, train_target)
print(ridge.score(train_poly, train_target))
print(ridge.score(test_poly, test_target))์ด๋์ ๊ฒฐ๊ณผ๋ ์๋์ ๊ฐ์ต๋๋ค.
0.9894023149360563
0.9853164821839827alpha๋ก ๊ท์ ๋กค ์กฐ์ ํฉ๋๋ค.
train_score = []
test_score = []
alpha_list = [0.001, 0.01, 0.1, 1, 10, 100]
for alpha in alpha_list:
# ๋ฆฟ์ง ๋ชจ๋ธ์ ๋ง๋ญ๋๋ค
ridge = Ridge(alpha=alpha)
# ๋ฆฟ์ง ๋ชจ๋ธ์ ํ๋ จํฉ๋๋ค
ridge.fit(train_poly, train_target)
# ํ๋ จ ์ ์์ ํ
์คํธ ์ ์๋ฅผ ์ ์ฅํฉ๋๋ค
train_score.append(ridge.score(train_poly, train_target))
test_score.append(ridge.score(test_poly, test_target))
import matplotlib.pyplot as plt
plt.plot(np.log10(alpha_list), train_score)
plt.plot(np.log10(alpha_list), test_score)
plt.xlabel('alpha')
plt.ylabel('R^2')
plt.show()๊ฒฐ๊ณผ๋ ์๋์ ๊ฐ์ต๋๋ค.
- Lasso๋ก ๊ท์ ๋ฅผ ์ํํฉ๋๋ค.
from sklearn.linear_model import Lasso
lasso = Lasso()
lasso.fit(train_poly, train_target)
print(lasso.score(train_poly, train_target))
print(lasso.score(test_poly, test_target))๊ฒฐ๊ณผ๋ ์๋์ ๊ฐ์ต๋๋ค.
0.9886724935434511
0.9851391569633392alpha๋ก ๊ท์ ์ ์ ๋๋ฅผ ์กฐ์ ํด๋ด ๋๋ค.
train_score = []
test_score = []
alpha_list = [0.001, 0.01, 0.1, 1, 10, 100]
for alpha in alpha_list:
# ๋ผ์ ๋ชจ๋ธ์ ๋ง๋ญ๋๋ค
lasso = Lasso(alpha=alpha, max_iter=10000)
# ๋ผ์ ๋ชจ๋ธ์ ํ๋ จํฉ๋๋ค
lasso.fit(train_poly, train_target)
# ํ๋ จ ์ ์์ ํ
์คํธ ์ ์๋ฅผ ์ ์ฅํฉ๋๋ค
train_score.append(lasso.score(train_poly, train_target))
test_score.append(lasso.score(test_poly, test_target))
plt.plot(np.log10(alpha_list), train_score)
plt.plot(np.log10(alpha_list), test_score)
plt.xlabel('alpha')
plt.ylabel('R^2')
plt.show()์ด๋์ ๊ฒฐ๊ณผ๋ ์๋์ ๊ฐ์ต๋๋ค.

