test2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

def draw(path, y_true, y_predict):
y_predict = pd.Series(y_predict)
y_predict.index = y_test.index
fig = plt.figure(figsize=(10, 6))
y_true.sort_index(inplace=True)
y_predict.sort_index(inplace=True)
plt.plot(y_true.index, y_true, marker=‘o’, markersize=1)
plt.plot(y_true.index, y_predict, marker=‘x’, markersize=1)
plt.savefig(path)

def getMetrics(y_true, y_predict):
Result = {}
Result[‘MAE’] = metrics.mean_absolute_error(y_test, y_predict)
Result[‘RMSE’] = metrics.mean_squared_error(y_test, y_predict, squared=False)
Result[‘MAPE’] = metrics.mean_absolute_percentage_error(y_test, y_predict)
return Result

def linear_regression(X_train, X_test, y_train):
linreg = LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=True)
linreg.fit(X_train, y_train)
y_pred = linreg.predict(X_test)
return y_pred

def SVR_regression(X_train, X_test, y_train):
model = SVR()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
return y_pred

def decision_tree_regression(X_train, X_test, y_train):
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
return y_pred

def random_forest(X_train, X_test, y_train):
model = RandomForestRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
return y_pred

def LSTM_regression(X_train, X_test, y_train):
train_X = X_train.values
test_X = X_test.values
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
scaler = MinMaxScaler(feature_range=(0, 1))
train_X = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
test_X = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))
model = Sequential()
model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
model.compile(loss=‘mae’, optimizer=‘adam’)
history = model.fit(train_X, train_y, epochs=100, batch_size=72, validation_data=(test_X, test_y), verbose=2,
shuffle=False)
y_predict = model.predict(test_X)
return y_predict.reshape((y_test.shape[0]))

if name == ‘main’:
FileList = [‘data/PRSA_Data_Aotizhongxin_20130301-20170228.csv’, ‘data/PRSA_Data_Changping_20130301-20170228.csv’]
rawData = pd.read_csv(FileList[0])
rawData[‘season’] = (rawData[‘month’] -1) // 3 + 1
rawData[“Date”] = pd.to_datetime(data[[“year”,“month”,“day”,“hour”]], format=’%Y%m%d%H’)
rawData.index = rawData[“Date”]
rawData.sort_index(inplace=True)
rawData = rawData.groupby([‘year’,‘season’])
resultSet = {}
resultSet[“linear_regression”] = {}
resultSet[“SVR_regression”] = {}
resultSet[“decision_tree_regression”] = {}
resultSet[“random_forest”] = {}
resultSet[“LSTM_regression”] = {}
for item in rawData:
print(’’ * 20, item[0], '’ * 20)
path = str(item[0][0]) + ‘-sea’ + str(item[0][1])
data = pd.DataFrame(item[1][[“year”,“month”,“day”,“hour”,“PM2.5”,“TEMP”,“PRES”,“DEWP”,“RAIN”,“wd”,“WSPM”]])
dataset = data[[“PM2.5”,“TEMP”,“PRES”,“DEWP”,“RAIN”,“WSPM”]]
dataset[“PM2.5(t-1)”] = data[“PM2.5”].shift(1).values
dataset.dropna(axis=0, how=‘any’, inplace=True)
# 数据归一化处理
encoder = LabelEncoder()
dataset[“WD”] = encoder.fit_transform(data[“wd”])
dataset = dataset.apply(lambda x: (x - np.min(x)+1) / (np.max(x) - np.min(x)+1))
X = dataset[[“PM2.5(t-1)”,“TEMP”,“PRES”,“DEWP”,“RAIN”,“WD”,“WSPM”]]
y = dataset[“PM2.5”]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    y_predict = linear_regression(X_train, X_test, y_train)
    draw('result/linear_regression-' + path, y_test, y_predict)
    resultSet["linear_regression"][path] = getMetrics(y_test, y_predict)

    y_predict = SVR_regression(X_train, X_test, y_train)
    draw('result/SVR_regression-' + path, y_test, y_predict)
    resultSet["SVR_regression"][path] = getMetrics(y_test, y_predict)
    
    y_predict = decision_tree_regression(X_train, X_test, y_train)
    draw('result/decision_tree_regression-' + path, y_test, y_predict)
    resultSet["decision_tree_regression"][path] = getMetrics(y_test, y_predict)
    
    y_predict = random_forest(X_train, X_test, y_train)
    draw('result/random_forest-' + path, y_test, y_predict)
    resultSet["random_forest"][path] = getMetrics(y_test, y_predict)
    
    y_predict = LSTM_regression(X_train, X_test, y_train)
    draw('result/LSTM_regression-' + path, y_test, y_predict)
    resultSet["LSTM_regression"][path] = getMetrics(y_test, y_predict)
    
for key, result in resultSet:
    result = pd.DataFrame(result).T
    result.to_csv(key+".csv")
    print(result)
上一篇:Linux系统用户与用户组管理


下一篇:新老手必备的34种JavaScript简写优化技术