之前,主要是用sklearn之类的来实现回归算法 ,现在转pytorch,适应一下。
一,代码
import torch
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
import time
start = time.perf_counter()
o_train = pd.read_csv('./回归分析/train.csv')
o_test = pd.read_csv('./回归分析/test.csv')
all_features = pd.concat((o_train.loc[:, 'Area':'Neighborhood'], o_test.loc[:, 'Area':'Neighborhood']))
all_labels = pd.concat((o_train.loc[:, 'Price'], o_test.loc[:, 'Price']))
numeric_feats = all_features.dtypes[all_features.dtypes != 'object'].index
object_feats = all_features.dtypes[all_features.dtypes == 'object'].index
all_features[numeric_feats] = all_features[numeric_feats].apply(lambda x: (x-x.mean())/(x.std()))
all_features = pd.get_dummies(all_features, prefix=object_feats, dummy_na=True)
all_features = all_features.fillna(all_features.mean())
mean = all_labels.mean()
std = all_labels.std()
all_labels = (all_labels - mean)/std
num_train = o_train.shape[0]
train_features = all_features[:num_train].values.astype(np.float32)
test_features = all_features[num_train:].values.astype(np.float32)
train_labels = all_labels[:num_train].values.astype(np.float32)
test_labels = all_labels[num_train:].values.astype(np.float32)
train_features = torch.from_numpy(train_features)
train_labels = torch.from_numpy(train_labels).unsqueeze(1)
test_features = torch.from_numpy(test_features)
test_labels = torch.from_numpy(test_labels).unsqueeze(1)
train_set = TensorDataset(train_features, train_labels)
test_set = TensorDataset(test_features, test_labels)
train_data = DataLoader(dataset=train_set, batch_size=64, shuffle=True)
test_data = DataLoader(dataset=test_set, batch_size=64, shuffle=False)
class Net(torch.nn.Module):
def __init__(self, n_feature, n_output):
super(Net, self).__init__()
self.layer1 = torch.nn.Linear(n_feature, 600)
self.layer2 = torch.nn.Linear(600, 1200)
self.layer3 = torch.nn.Linear(1200, n_output)
def forward(self, x):
x = self.layer1(x)
x = torch.relu(x)
x = self.layer2(x)
x = torch.relu(x)
x = self.layer3(x)
return x
net = Net(44, 1)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
criterion = torch.nn.MSELoss()
losses = []
eval_losses = []
for i in range(100):
train_loss = 0
net.train()
for tdata, tlabel in train_data:
y_ = net(tdata)
loss = criterion(y_, tlabel)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss = train_loss + loss.item()
losses.append(train_loss/len(train_data))
eval_loss = 0
net.eval()
for edata, elabel in test_data:
y_ = net(edata)
loss = criterion(y_, elabel)
eval_loss = eval_loss + loss.item()
eval_losses.append(eval_loss/len(test_data))
print('训练次数:{},训练集损失:{},测试集损失:{}'.format(i, train_loss/len(train_data), eval_loss/len(test_data)))
y_ = net(test_features)
y_pre = y_ * std + mean
print('测试集预测值:{}'.format(y_pre.squeeze().detach().cpu().numpy()))
print('模型平均误差:{}'.format((abs(y_pre-(test_labels*std+mean)).mean().cpu().item())))
end = time.perf_counter()
print('模型运行时间:{}'.format(end-start))
二,效果