import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression as LR
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sample = pd.read_csv("sample.csv",header=None)
train["week"].value_counts()
水 43
木 43
金 41
火 41
月 39
Name: week, dtype: int64
pd.get_dummies(train["week"])
月 木 水 火 金
0 1 0 0 0 0
1 0 0 0 1 0
2 0 0 1 0 0
3 0 1 0 0 0
4 0 0 0 0 1
... ... ... ... ... ...
202 0 0 1 0 0
203 0 1 0 0 0
204 0 0 0 0 1
205 1 0 0 0 0
206 0 0 0 1 0
207 rows × 5 columns
trainX
temperature week_月 week_木 week_水 week_火 week_金
0 19.8 1 0 0 0 0
1 17.0 0 0 0 1 0
2 15.5 0 0 1 0 0
3 15.2 0 1 0 0 0
4 16.1 0 0 0 0 1
... ... ... ... ... ... ...
202 24.8 0 0 1 0 0
203 25.4 0 1 0 0 0
204 27.1 0 0 0 0 1
205 26.6 1 0 0 0 0
206 28.1 0 0 0 1 0
207 rows × 6 columns
y = train["y"]
model = LR()
model.fit(trainX,y)
LinearRegression()
model.coef_
array([-2.53878074, 8.26339936, -9.47240196, -2.02873774, 1.85251984,
1.38522051])
model.intercept_
135.69119841401601
testX = pd.get_dummies(test[["week","temperature"]])
testX
temperature week_月 week_木 week_水 week_火 week_金
0 20.2 0 0 1 0 0
1 23.9 0 1 0 0 0
2 28.7 0 0 0 0 1
3 21.5 1 0 0 0 0
4 22.1 0 0 0 1 0
5 23.3 0 0 1 0 0
6 22.5 0 1 0 0 0
7 26.1 0 0 0 0 1
8 26.8 0 0 0 1 0
9 15.6 0 0 1 0 0
10 18.6 0 1 0 0 0
11 23.0 0 0 0 0 1
12 23.9 1 0 0 0 0
13 20.2 0 0 0 1 0
14 15.5 0 0 1 0 0
15 14.7 0 1 0 0 0
16 18.7 0 0 0 0 1
17 22.7 1 0 0 0 0
18 19.1 0 0 0 1 0
19 19.6 0 0 1 0 0
20 19.6 0 1 0 0 0
21 18.8 0 0 0 0 1
22 18.9 0 0 0 1 0
23 16.0 0 0 1 0 0
24 19.2 0 1 0 0 0
25 20.1 0 0 0 0 1
26 21.2 1 0 0 0 0
27 14.7 0 0 0 1 0
28 14.4 0 0 1 0 0
29 19.5 0 1 0 0 0
30 15.8 0 0 0 0 1
31 14.3 1 0 0 0 0
32 16.1 0 0 0 1 0
33 14.9 0 0 1 0 0
34 10.5 0 1 0 0 0
35 14.9 0 0 0 0 1
36 11.4 0 0 0 1 0
37 9.2 0 0 1 0 0
38 15.1 0 1 0 0 0
39 15.3 0 0 0 0 1
pred = model.predict(testX)
pred
array([ 82.37908978, 65.54193684, 64.21341177, 89.37081192,
81.43666396, 74.5088695 , 69.09622987, 70.81424168,
69.5043945 , 94.05748117, 78.99747474, 78.68446197,
83.27773815, 86.26034736, 94.31135925, 88.89871962,
89.60121914, 86.32427504, 89.05300617, 83.90235823,
76.45869401, 89.34734106, 89.56076232, 93.04196888,
77.4742063 , 86.0469261 , 90.13244614, 100.22364142,
97.10401806, 76.71257208, 96.96368327, 107.65003323,
96.66934838, 95.83462769, 99.56159871, 99.24858594,
108.60161785, 110.30567789, 87.88320732, 98.23307364])
sample[1] = pred
sample.to_csv("submit3.csv",index=None,header=None)