导入库 import numpy as np from sklearn import preprocessing as pp input_data = np.array([[2.1, -1.9, 5.5], [-1.5, 2.4, 3.5], [0.5, -7.9, 5.6], [5.9, 2.3, -5.8]]) 二值化 data_bin = pp.Binarizer(threshold=0.5).transform(input_data) #array([[1., 0., 1.], [0., 1., 1.], [0., 0., 1.], [1., 1., 0.]]) 平均去值 input_data.mean(0) # array([ 1.75 , -1.275, 2.2 ]) input_data.std(0) # array([2.71431391, 4.20022321, 4.69414529]) data_scale = pp.scale(input_data) data_scale.mean(0) # array([1.11022302e-16, 0.00000000e+00, 0.00000000e+00]) data_scale.std(0) # array([1., 1., 1.]) 缩放 data_minmax = pp.MinMaxScaler(feature_range=(0, 1)).fit_transform(input_data) #array([[0.48648649, 0.58252427, 0.99122807], [0. , 1. , 0.81578947], [0.27027027, 0. , 1. ], [1. , 0.99029126, 0. ]]) 正常化 #L1标准化,绝对值的总和在每行中总是最多为1 data_L1 = pp.normalize(input_data, 'l1') #array([[ 0.22105263, -0.2 , 0.57894737], [-0.2027027 , 0.32432432, 0.47297297], [ 0.03571429, -0.56428571, 0.4 ], [ 0.42142857, 0.16428571, -0.41428571]]) #L2标准化,每一行中的平方和总是最多为1 data_L2 = pp.normalize(input_data, 'l2') #array([[ 0.33946114, -0.30713151, 0.88906489], [-0.33325106, 0.53320169, 0.7775858 ], [ 0.05156558, -0.81473612, 0.57753446], [ 0.68706914, 0.26784051, -0.6754239 ]]) 标记数据 input_labels = ['red','black','red','green','black','yellow','white'] test_labels = list(set(input_labels)) #创建和训练标签编码器对象 encoder = pp.LabelEncoder() #标签==>数据 values = encoder.transform(test_labels) #array([1, 4, 3, 2, 0], dtype=int64) #数据==>标签 test = np.array([1, 3, 4]) labels = encoder.inverse_transform(test) #array(['green', 'white', 'yellow'], dtype='<U6')