整条的数据集划分
import random
def randSplit(dataSet,rate):
l = list(dataSet.index)
random.shuffle(l)
dataSet.index = l
n = dataSet.shape[0]
m = int(n*rate)
train = dataSet.loc[range(m),:]
test = dataSet.loc[range(m,n),:]
dataSet.index = range(dataSet.shape[0])
test.index = range(test.shape[0])
return train, test
特征值和目标值分离的数据集划分
#划分数据集和测试集 ,划分数据集随机划分数据
from sklearn.model_selection import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.25)