baseline_v2_changeModel(cnn):score:267.2897
# 2021.05.08
# lightgbm 模型更换成CNN 模型
# 本文原创 望赞鼓励,转载请说明出处.
仍存疑问:
dense层的作用,需要几层
如何来构造CNN
import os
import gc
import math
import pandas as pd
import numpy as np
import lightgbm as lgb
#import xgboost as xgb
from catboost import CatBoostRegressor
from sklearn.linear_model import SGDRegressor, LinearRegression, Ridge
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tqdm import tqdm
import matplotlib.pyplot as plt
import time
import warnings
warnings.filterwarnings('ignore')
train = pd.read_csv('train.csv')
test=pd.read_csv('testA.csv')
train.head()
id | heartbeat_signals | label | |
---|---|---|---|
0 | 0 | 0.9912297987616655,0.9435330436439665,0.764677... | 0.0 |
1 | 1 | 0.9714822034884503,0.9289687459588268,0.572932... | 0.0 |
2 | 2 | 1.0,0.9591487564065292,0.7013782792997189,0.23... | 2.0 |
3 | 3 | 0.9757952826275774,0.9340884687738161,0.659636... | 0.0 |
4 | 4 | 0.0,0.055816398940721094,0.26129357194994196,0... | 2.0 |
def reduce_mem_usage(df):
start_mem = df.memory_usage().sum() / 1024**2
print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
for col in df.columns:
col_type = df[col].dtype
if col_type != object:
c_min = df[col].min()
c_max = df[col].max()
if str(col_type)[:3] == 'int':
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
df[col] = df[col].astype(np.int8)
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
df[col] = df[col].astype(np.int16)
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
df[col] = df[col].astype(np.int32)
elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
df[col] = df[col].astype(np.int64)
else:
if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
df[col] = df[col].astype(np.float16)
elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
df[col] = df[col].astype(np.float32)
else:
df[col] = df[col].astype(np.float64)
else:
df[col] = df[col].astype('category')
end_mem = df.memory_usage().sum() / 1024**2
print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
return df
# 简单预处理
train_list = []
for items in train.values:
train_list.append([items[0]] + [float(i) for i in items[1].split(',')] + [items[2]])
train = pd.DataFrame(np.array(train_list))
train.columns = ['id'] + ['s_'+str(i) for i in range(len(train_list[0])-2)] + ['label']
train = reduce_mem_usage(train)
test_list=[]
for items in test.values:
test_list.append([items[0]] + [float(i) for i in items[1].split(',')])
test = pd.DataFrame(np.array(test_list))
test.columns = ['id'] + ['s_'+str(i) for i in range(len(test_list[0])-1)]
test = reduce_mem_usage(test)
Memory usage of dataframe is 157.93 MB
Memory usage after optimization is: 39.67 MB
Decreased by 74.9%
Memory usage of dataframe is 31.43 MB
Memory usage after optimization is: 7.90 MB
Decreased by 74.9%
test
id | s_0 | s_1 | s_2 | s_3 | s_4 | s_5 | s_6 | s_7 | s_8 | ... | s_195 | s_196 | s_197 | s_198 | s_199 | s_200 | s_201 | s_202 | s_203 | s_204 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 100000.0 | 0.991699 | 1.000000 | 0.631836 | 0.136230 | 0.041412 | 0.102722 | 0.120850 | 0.123413 | 0.107910 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
1 | 100001.0 | 0.607422 | 0.541504 | 0.340576 | 0.000000 | 0.090698 | 0.164917 | 0.195068 | 0.168823 | 0.198853 | ... | 0.389893 | 0.386963 | 0.367188 | 0.364014 | 0.360596 | 0.357178 | 0.350586 | 0.350586 | 0.350586 | 0.36377 |
2 | 100002.0 | 0.975098 | 0.670898 | 0.686523 | 0.708496 | 0.718750 | 0.716797 | 0.720703 | 0.701660 | 0.596680 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
3 | 100003.0 | 0.995605 | 0.916992 | 0.520996 | 0.000000 | 0.221802 | 0.404053 | 0.490479 | 0.527344 | 0.518066 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
4 | 100004.0 | 1.000000 | 0.888184 | 0.745605 | 0.531738 | 0.380371 | 0.224609 | 0.091125 | 0.057648 | 0.003914 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
19995 | 119995.0 | 1.000000 | 0.833008 | 0.634277 | 0.639160 | 0.624023 | 0.598145 | 0.613770 | 0.624023 | 0.628906 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19996 | 119996.0 | 1.000000 | 0.826172 | 0.452148 | 0.082214 | 0.000000 | 0.137085 | 0.201050 | 0.165649 | 0.158081 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19997 | 119997.0 | 0.951660 | 0.916504 | 0.667480 | 0.352051 | 0.255371 | 0.197388 | 0.173584 | 0.141968 | 0.134521 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19998 | 119998.0 | 0.927734 | 0.677246 | 0.242920 | 0.055359 | 0.102112 | 0.072266 | 0.021011 | 0.038300 | 0.048553 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19999 | 119999.0 | 0.665527 | 0.526855 | 0.516602 | 0.376465 | 0.489258 | 0.480713 | 0.459229 | 0.482910 | 0.469971 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
20000 rows × 206 columns
#删除表中的某一行或者某一列更明智的方法是使用drop,它不改变原有的df中的数据,而是返回另一个dataframe来存放删除后的数据。
#就是新开一个表
#drop函数默认删除行,列需要加axis = 1
x_train = train.drop(['id','label'], axis=1)
y_train = train['label']
x_test=test.drop(['id'], axis=1)
x_train
s_0 | s_1 | s_2 | s_3 | s_4 | s_5 | s_6 | s_7 | s_8 | s_9 | ... | s_195 | s_196 | s_197 | s_198 | s_199 | s_200 | s_201 | s_202 | s_203 | s_204 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.991211 | 0.943359 | 0.764648 | 0.618652 | 0.379639 | 0.190796 | 0.040222 | 0.026001 | 0.031708 | 0.065552 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1 | 0.971680 | 0.929199 | 0.572754 | 0.178467 | 0.122986 | 0.132324 | 0.094421 | 0.089600 | 0.030487 | 0.040497 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | 1.000000 | 0.958984 | 0.701172 | 0.231812 | 0.000000 | 0.080688 | 0.128418 | 0.187500 | 0.280762 | 0.328369 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
3 | 0.975586 | 0.934082 | 0.659668 | 0.249878 | 0.237061 | 0.281494 | 0.249878 | 0.249878 | 0.241455 | 0.230713 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
4 | 0.000000 | 0.055817 | 0.261230 | 0.359863 | 0.433105 | 0.453613 | 0.499023 | 0.542969 | 0.616699 | 0.676758 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
99995 | 1.000000 | 0.677734 | 0.222412 | 0.257080 | 0.204712 | 0.054657 | 0.026154 | 0.118164 | 0.244873 | 0.328857 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
99996 | 0.926758 | 0.906250 | 0.637207 | 0.415039 | 0.374756 | 0.382568 | 0.358887 | 0.341309 | 0.336426 | 0.317139 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
99997 | 0.925781 | 0.587402 | 0.633301 | 0.632324 | 0.639160 | 0.614258 | 0.599121 | 0.517578 | 0.403809 | 0.253174 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
99998 | 1.000000 | 0.994629 | 0.829590 | 0.458252 | 0.264160 | 0.240234 | 0.213745 | 0.189331 | 0.203857 | 0.210815 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
99999 | 0.925781 | 0.916504 | 0.404297 | 0.000000 | 0.262939 | 0.385498 | 0.361084 | 0.332764 | 0.339844 | 0.350586 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
100000 rows × 205 columns
x_test
s_0 | s_1 | s_2 | s_3 | s_4 | s_5 | s_6 | s_7 | s_8 | s_9 | ... | s_195 | s_196 | s_197 | s_198 | s_199 | s_200 | s_201 | s_202 | s_203 | s_204 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.991699 | 1.000000 | 0.631836 | 0.136230 | 0.041412 | 0.102722 | 0.120850 | 0.123413 | 0.107910 | 0.110535 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
1 | 0.607422 | 0.541504 | 0.340576 | 0.000000 | 0.090698 | 0.164917 | 0.195068 | 0.168823 | 0.198853 | 0.153564 | ... | 0.389893 | 0.386963 | 0.367188 | 0.364014 | 0.360596 | 0.357178 | 0.350586 | 0.350586 | 0.350586 | 0.36377 |
2 | 0.975098 | 0.670898 | 0.686523 | 0.708496 | 0.718750 | 0.716797 | 0.720703 | 0.701660 | 0.596680 | 0.487061 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
3 | 0.995605 | 0.916992 | 0.520996 | 0.000000 | 0.221802 | 0.404053 | 0.490479 | 0.527344 | 0.518066 | 0.545410 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
4 | 1.000000 | 0.888184 | 0.745605 | 0.531738 | 0.380371 | 0.224609 | 0.091125 | 0.057648 | 0.003914 | 0.007820 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
19995 | 1.000000 | 0.833008 | 0.634277 | 0.639160 | 0.624023 | 0.598145 | 0.613770 | 0.624023 | 0.628906 | 0.624023 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19996 | 1.000000 | 0.826172 | 0.452148 | 0.082214 | 0.000000 | 0.137085 | 0.201050 | 0.165649 | 0.158081 | 0.165649 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19997 | 0.951660 | 0.916504 | 0.667480 | 0.352051 | 0.255371 | 0.197388 | 0.173584 | 0.141968 | 0.134521 | 0.127075 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19998 | 0.927734 | 0.677246 | 0.242920 | 0.055359 | 0.102112 | 0.072266 | 0.021011 | 0.038300 | 0.048553 | 0.017532 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
19999 | 0.665527 | 0.526855 | 0.516602 | 0.376465 | 0.489258 | 0.480713 | 0.459229 | 0.482910 | 0.469971 | 0.399170 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 |
20000 rows × 205 columns
y_train
0 0.0
1 0.0
2 2.0
3 0.0
4 2.0
...
99995 0.0
99996 2.0
99997 3.0
99998 2.0
99999 0.0
Name: label, Length: 100000, dtype: float16
# from keras.utils.np_utils import to_categorical
# y_train = to_categorical(y_train)
# y_train
# loss函数
def abs_sum(y_pre,y_tru):
y_pre=np.array(y_pre)
y_tru=np.array(y_tru)
loss=sum(sum(abs(y_pre-y_tru)))
return loss
from keras.models import Sequential
from keras.layers import Dense # for fully connected layers dense will be used
from keras.layers import Conv1D, MaxPooling1D, Flatten
from keras.optimizers import Adam
# avoid overfitting by normalizing the samples
from keras.layers.normalization import BatchNormalization
# cnn
def build_model():
model = Sequential()
#过滤器=神经元总数中的单位
#Padding='相同',零填充,在输入数据周围添加零像素
model.add(Conv1D(filters = 64, kernel_size = 6, activation='relu', padding = 'same', input_shape = (205, 1))) #we pass individual values hence not 100000,187,1
#:(none,205,64)
# Normalization to avoid overfitting
# 大概就是保持数据的敏感 ,加在全连接函数和激励函数之间:https://www.bilibili.com/video/BV1Lx411j7GT?from=search&seid=5048435414489430319
model.add(BatchNormalization())
# #:(none,205,64)#只是把数据重初始化一下,也就是重新分布了一下,为了使得这些数在下面的激励函数中区分的更大
# Pooling
model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same'))
# :(none,103,64)# 因为strides是2 一下跳两步 padding=“same" 所以变化前特征总数不变 那么输出就是187/2=94
model.add(Conv1D(filters = 64, kernel_size = 6, activation='relu', padding = 'same'))# (none, 103, 64)
model.add(BatchNormalization()) #:(none, 103, 64)
model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same')) #:(none, 52, 64)
model.add(Conv1D( filters = 64, kernel_size = 6, activation='relu', padding = 'same'))#:(none, 52, 64)
model.add(BatchNormalization()) #:(none, 52, 64)
model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same')) #:(none, 26, 64)
# Flatten
model.add(Flatten())
#:(none, 1664) 这一步就是把多维的数据铺平 26*64 = 1664
# Fully connected layer
# input layer
#Dense(用来写输出层)
#当输入序列的长度固定时,该值为其长度。如果要在该层后接Flatten层,然后接Dense层,则必须指定该参数,否则Dense层的输出维度无法自动推断。
#units:大于0的整数,代表该层的输出维度。
model.add(Dense(units = 64, activation='relu'))
# Hidden Layer
model.add(Dense(units = 64, activation='relu'))
# Output Layer
model.add(Dense(units = 4, activation='softmax'))
# loss = 'categorical_crossentropy'
model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
return model
model = build_model()
# This is for one sample, i.e. one row
model.summary()
Model: “sequential”
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 205, 64) 448
_________________________________________________________________
batch_normalization (BatchNo (None, 205, 64) 256
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 103, 64) 0
_________________________________________________________________
conv1d_1 (Conv1D) (None, 103, 64) 24640
_________________________________________________________________
batch_normalization_1 (Batch (None, 103, 64) 256
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 52, 64) 0
_________________________________________________________________
conv1d_2 (Conv1D) (None, 52, 64) 24640
_________________________________________________________________
batch_normalization_2 (Batch (None, 52, 64) 256
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 26, 64) 0
_________________________________________________________________
flatten (Flatten) (None, 1664) 0
_________________________________________________________________
dense (Dense) (None, 64) 106560
_________________________________________________________________
dense_1 (Dense) (None, 64) 4160
_________________________________________________________________
dense_2 (Dense) (None, 4) 260
=================================================================
Total params: 161,476
Trainable params: 161,092
Non-trainable params: 384
_________________________________________________________________
from keras.utils.np_utils import to_categorical
def cv_model(clf, train_x, train_y, test_x, clf_name):
folds = 5
seed = 2021
#k-交叉验证KFold
# n_split:要划分的折数
#shuffle: 每次都进行shuffle,测试集中折数的总和就是训练集的个数
#random_state:随机状态 总结:对于那些本质上是随机的过程,我们有必要控制随机的状态,这样才能重复的展现相同的结果。
#如果,对随机状态不加控制,那么实验的结果就无法固定,而是随机的显现。比喻的说一下,也不知道准不准确。
# 一个容器中放置一定量的沙子,每次用手去抓沙子的时候,抓取的结果会受到抓取的力度、一只手抓还是两只手抓、手是干的或湿的等诸多因素的影响(将影响因素定为A={a,b,c,d,e,f,……})。
#固定random_state后,每次抓取沙子时的影响因素将被固定为具体的某一组,这样每次抓取的沙子就具有了相同的状态。
kf = KFold(n_splits=folds, shuffle=True, random_state=seed)
#为预测做准备
test = np.zeros((test_x.shape[0],4))
test_x = test_x.iloc[:, :].values
test_x = test_x.reshape(len(test_x),test_x.shape[1],1)
cv_scores = []
onehot_encoder = OneHotEncoder(sparse=False)
for i, (train_index, test_index) in enumerate(kf.split(x_train, y_train)):
print('************************************ {} ************************************'.format(str(i+1)))
x_kf_train, y_kf_train, x_kf_test, y_kf_test = x_train.iloc[train_index], y_train[train_index], x_train.iloc[test_index], y_train[test_index]
if clf_name == "cnn":
x_kf_train = x_kf_train.iloc[:, :].values
x_kf_test = x_kf_test.iloc[:, :].values
x_kf_train = x_kf_train.reshape(len(x_kf_train),x_kf_train.shape[1],1)
x_kf_test = x_kf_test.reshape(len(x_kf_test),x_kf_test.shape[1],1)
y_kf_train = to_categorical(y_kf_train)
y_kf_test = to_categorical(y_kf_test)
history = model.fit(x_kf_train,y_kf_train, epochs = 15, batch_size = 32, validation_data=(x_kf_test, y_kf_test))
x_kf_test_pred = model.predict(x_kf_test)
test_pred = model.predict(test_x)
print("y_kf_test++++++:")
print(y_kf_test)
print('预测的概率矩阵为test_pred:')
print(test_pred)
print("abs_sum++++++:")
score=abs_sum(y_kf_test, x_kf_test_pred)
cv_scores.append(score)
print("cv_scores+++++:")
print(cv_scores)
# test += test_pred
# test=test/kf.n_splits
# 将将要预测的带到
return test_pred
# folds = 5
# seed = 2021
# kf = KFold(n_splits=folds, shuffle=True, random_state=seed)
# # test = np.zeros((X_text.shape[0],4))
# cv_scores = []
# onehot_encoder = OneHotEncoder(sparse=False)
# for i, (train_index, valid_index) in enumerate(kf.split(x_train, y_train)):
# print('************************************ {} ************************************'.format(str(i+1)))
# # .iloc 提取的某一行,i取值就是【1,folds】,
# # 其实就是去了四块内容,训练集(trn_)的x 训练集的y 测试集(valid_)x 测试集y
# trn_x, trn_y, val_x, val_y = x_train[train_index], y_train[train_index], x_train[valid_index], y_train[valid_index]
# ohe = OneHotEncoder()
# trn_y = ohe.fit_transform(trn_y.values.reshape(-1,1))
# val_y = ohe.transform(val_y.values.reshape(-1,1))
# trn_x =trn_x.values.reshape(len(trn_x),trn_x.values.shape[1],1)
# val_x = val_x.values.reshape(len(val_x),val_x.values.shape[1],1)
# history = model.fit(trn_x,trn_y, epochs = 15, batch_size = 32, validation_data=(val_x, val_y))
# model.evaluate(trn_x, trn_y)
def lgb_model(x_train, y_train, x_test):
lgb_test = cv_model(lgb, x_train, y_train, x_test, "cnn")
return lgb_test
lgb_test = lgb_model(x_train, y_train, x_test)
************************************ 1 ************************************
Epoch 1/15
2500/2500 [==============================] - 89s 35ms/step - loss: 0.1964 - accuracy: 0.9370 - val_loss: 0.1091 - val_accuracy: 0.9664
Epoch 2/15
2500/2500 [==============================] - 93s 37ms/step - loss: 0.0664 - accuracy: 0.9800 - val_loss: 0.0643 - val_accuracy: 0.9805
Epoch 3/15
2500/2500 [==============================] - 108s 43ms/step - loss: 0.0505 - accuracy: 0.9834 - val_loss: 0.0625 - val_accuracy: 0.9823
Epoch 4/15
2500/2500 [==============================] - 111s 44ms/step - loss: 0.0363 - accuracy: 0.9884 - val_loss: 0.0549 - val_accuracy: 0.9809
Epoch 5/15
2500/2500 [==============================] - 108s 43ms/step - loss: 0.0325 - accuracy: 0.9897 - val_loss: 0.0411 - val_accuracy: 0.9883
Epoch 6/15
2500/2500 [==============================] - 107s 43ms/step - loss: 0.0246 - accuracy: 0.9921 - val_loss: 0.0623 - val_accuracy: 0.9807
Epoch 7/15
2500/2500 [==============================] - 110s 44ms/step - loss: 0.0260 - accuracy: 0.9919 - val_loss: 0.0427 - val_accuracy: 0.9879
Epoch 8/15
2500/2500 [==============================] - 99s 40ms/step - loss: 0.0184 - accuracy: 0.9941 - val_loss: 0.0471 - val_accuracy: 0.9865
Epoch 9/15
2500/2500 [==============================] - 100s 40ms/step - loss: 0.0185 - accuracy: 0.9942 - val_loss: 0.0419 - val_accuracy: 0.9885
Epoch 10/15
2500/2500 [==============================] - 101s 40ms/step - loss: 0.0173 - accuracy: 0.9943 - val_loss: 0.0379 - val_accuracy: 0.9901
Epoch 11/15
2500/2500 [==============================] - 102s 41ms/step - loss: 0.0141 - accuracy: 0.9958 - val_loss: 0.0404 - val_accuracy: 0.9900
Epoch 12/15
2500/2500 [==============================] - 99s 40ms/step - loss: 0.0127 - accuracy: 0.9960 - val_loss: 0.0385 - val_accuracy: 0.9900
Epoch 13/15
2500/2500 [==============================] - 96s 39ms/step - loss: 0.0128 - accuracy: 0.9957 - val_loss: 0.0440 - val_accuracy: 0.9908
Epoch 14/15
2500/2500 [==============================] - 103s 41ms/step - loss: 0.0111 - accuracy: 0.9962 - val_loss: 0.0438 - val_accuracy: 0.9902
Epoch 15/15
2500/2500 [==============================] - 100s 40ms/step - loss: 0.0098 - accuracy: 0.9967 - val_loss: 0.0379 - val_accuracy: 0.9907
y_kf_test++++++:
[[0. 0. 1. 0.]
[1. 0. 0. 0.]
[0. 0. 1. 0.]
...
[1. 0. 0. 0.]
[0. 0. 1. 0.]
[0. 0. 1. 0.]]
预测的概率矩阵为test_pred:
[[9.99999881e-01 1.23368892e-07 6.21502979e-12 4.32745534e-10]
[7.47138074e-06 1.64497091e-04 9.99590218e-01 2.37837623e-04]
[1.05034046e-11 1.90382871e-16 1.06615223e-08 1.00000000e+00]
...
[1.77340873e-03 1.38662233e-06 9.98224914e-01 2.29101516e-07]
[9.99994159e-01 5.82347275e-06 2.35584338e-12 1.23620975e-10]
[9.99747932e-01 1.21477584e-04 2.38122061e-06 1.28281055e-04]]
abs_sum++++++:
cv_scores+++++:
[473.23671531677246]
************************************ 2 ************************************
Epoch 1/15
2500/2500 [==============================] - 93s 37ms/step - loss: 0.0187 - accuracy: 0.9948 - val_loss: 0.0102 - val_accuracy: 0.9966
Epoch 2/15
2500/2500 [==============================] - 103s 41ms/step - loss: 0.0122 - accuracy: 0.9961 - val_loss: 0.0097 - val_accuracy: 0.9966
Epoch 3/15
2500/2500 [==============================] - 97s 39ms/step - loss: 0.0122 - accuracy: 0.9958 - val_loss: 0.0206 - val_accuracy: 0.9940
Epoch 4/15
2500/2500 [==============================] - 91s 37ms/step - loss: 0.0116 - accuracy: 0.9963 - val_loss: 0.0133 - val_accuracy: 0.9960
Epoch 5/15
2500/2500 [==============================] - 103s 41ms/step - loss: 0.0098 - accuracy: 0.9969 - val_loss: 0.0182 - val_accuracy: 0.9953
Epoch 6/15
2500/2500 [==============================] - 96s 39ms/step - loss: 0.0087 - accuracy: 0.9971 - val_loss: 0.0145 - val_accuracy: 0.9952
Epoch 7/15
2500/2500 [==============================] - 95s 38ms/step - loss: 0.0074 - accuracy: 0.9978 - val_loss: 0.0347 - val_accuracy: 0.9913
Epoch 8/15
2500/2500 [==============================] - 103s 41ms/step - loss: 0.0088 - accuracy: 0.9972 - val_loss: 0.0179 - val_accuracy: 0.9956
Epoch 9/15
2500/2500 [==============================] - 90s 36ms/step - loss: 0.0082 - accuracy: 0.9976 - val_loss: 0.0256 - val_accuracy: 0.9941
Epoch 10/15
2500/2500 [==============================] - 104s 41ms/step - loss: 0.0073 - accuracy: 0.9979 - val_loss: 0.0212 - val_accuracy: 0.9944
Epoch 11/15
2500/2500 [==============================] - 100s 40ms/step - loss: 0.0067 - accuracy: 0.9980 - val_loss: 0.0263 - val_accuracy: 0.9926
Epoch 12/15
2500/2500 [==============================] - 87s 35ms/step - loss: 0.0073 - accuracy: 0.9977 - val_loss: 0.0159 - val_accuracy: 0.9960
Epoch 13/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0069 - accuracy: 0.9981 - val_loss: 0.0376 - val_accuracy: 0.9902
Epoch 14/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0060 - accuracy: 0.9982 - val_loss: 0.0246 - val_accuracy: 0.9942
Epoch 15/15
2500/2500 [==============================] - 82s 33ms/step - loss: 0.0060 - accuracy: 0.9981 - val_loss: 0.0292 - val_accuracy: 0.9940
y_kf_test++++++:
[[0. 0. 1. 0.]
[0. 0. 0. 1.]
[0. 0. 0. 1.]
...
[1. 0. 0. 0.]
[1. 0. 0. 0.]
[1. 0. 0. 0.]]
预测的概率矩阵为test_pred:
[[1.0000000e+00 3.1578247e-11 8.9162606e-17 9.0605463e-16]
[7.5351809e-26 4.6311908e-30 1.0000000e+00 2.6168691e-38]
[1.3659213e-14 1.1359105e-21 7.8721543e-11 1.0000000e+00]
...
[1.6125210e-04 1.5620843e-05 9.9982315e-01 1.6944726e-10]
[1.0000000e+00 6.7933081e-10 7.0405877e-13 2.2816355e-14]
[9.9999905e-01 4.4103444e-07 9.4723184e-08 4.0850134e-07]]
abs_sum++++++:
cv_scores+++++:
[473.23671531677246, 290.0407085418701]
************************************ 3 ************************************
Epoch 1/15
2500/2500 [==============================] - 86s 34ms/step - loss: 0.0106 - accuracy: 0.9969 - val_loss: 0.0042 - val_accuracy: 0.9987
Epoch 2/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0067 - accuracy: 0.9981 - val_loss: 0.0051 - val_accuracy: 0.9985
Epoch 3/15
2500/2500 [==============================] - 88s 35ms/step - loss: 0.0068 - accuracy: 0.9979 - val_loss: 0.0072 - val_accuracy: 0.9981
Epoch 4/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0053 - accuracy: 0.9984 - val_loss: 0.0107 - val_accuracy: 0.9962
Epoch 5/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0068 - accuracy: 0.9980 - val_loss: 0.0065 - val_accuracy: 0.9977
Epoch 6/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0057 - accuracy: 0.9981 - val_loss: 0.0177 - val_accuracy: 0.9950
Epoch 7/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0058 - accuracy: 0.9984 - val_loss: 0.0072 - val_accuracy: 0.9980
Epoch 8/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0056 - accuracy: 0.9984 - val_loss: 0.0077 - val_accuracy: 0.9977
Epoch 9/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0040 - accuracy: 0.9986 - val_loss: 0.0161 - val_accuracy: 0.9967
Epoch 10/15
2500/2500 [==============================] - 86s 34ms/step - loss: 0.0059 - accuracy: 0.9983 - val_loss: 0.0135 - val_accuracy: 0.9963
Epoch 11/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0057 - accuracy: 0.9984 - val_loss: 0.0137 - val_accuracy: 0.9967
Epoch 12/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0040 - accuracy: 0.9989 - val_loss: 0.0100 - val_accuracy: 0.9974
Epoch 13/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0052 - accuracy: 0.9985 - val_loss: 0.0145 - val_accuracy: 0.9966
Epoch 14/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0044 - accuracy: 0.9987 - val_loss: 0.0199 - val_accuracy: 0.9956
Epoch 15/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0053 - accuracy: 0.9987 - val_loss: 0.0189 - val_accuracy: 0.9955
y_kf_test++++++:
[[1. 0. 0. 0.]
[0. 0. 1. 0.]
[1. 0. 0. 0.]
...
[0. 0. 1. 0.]
[0. 0. 0. 1.]
[1. 0. 0. 0.]]
预测的概率矩阵为test_pred:
[[1.0000000e+00 3.4608899e-11 6.7611266e-16 1.6289031e-16]
[3.2447400e-17 3.6295522e-13 1.0000000e+00 1.7152423e-38]
[2.6966924e-25 2.5888265e-33 4.9505679e-23 1.0000000e+00]
...
[2.8026802e-04 1.5653444e-05 9.9970406e-01 6.4556320e-09]
[9.9999952e-01 4.2466991e-07 4.3031992e-13 6.4092606e-11]
[9.9703240e-01 2.8589300e-03 1.0849427e-04 6.7704690e-08]]
abs_sum++++++:
cv_scores+++++:
[473.23671531677246, 290.0407085418701, 216.68724060058594]
************************************ 4 ************************************
Epoch 1/15
2500/2500 [==============================] - 82s 33ms/step - loss: 0.0065 - accuracy: 0.9981 - val_loss: 0.0026 - val_accuracy: 0.9990
Epoch 2/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0050 - accuracy: 0.9986 - val_loss: 0.0045 - val_accuracy: 0.9987
Epoch 3/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0044 - accuracy: 0.9987 - val_loss: 0.0028 - val_accuracy: 0.9989
Epoch 4/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0053 - accuracy: 0.9984 - val_loss: 0.0108 - val_accuracy: 0.9976
Epoch 5/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0043 - accuracy: 0.9989 - val_loss: 0.0116 - val_accuracy: 0.9972
Epoch 6/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0031 - accuracy: 0.9990 - val_loss: 0.0087 - val_accuracy: 0.9975
Epoch 7/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0030 - accuracy: 0.9990 - val_loss: 0.0230 - val_accuracy: 0.9944
Epoch 8/15
2500/2500 [==============================] - 82s 33ms/step - loss: 0.0052 - accuracy: 0.9987 - val_loss: 0.0126 - val_accuracy: 0.9963
Epoch 9/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0049 - accuracy: 0.9987 - val_loss: 0.0222 - val_accuracy: 0.9937
Epoch 10/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0036 - accuracy: 0.9990 - val_loss: 0.0134 - val_accuracy: 0.9966
Epoch 11/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0032 - accuracy: 0.9990 - val_loss: 0.0124 - val_accuracy: 0.9967
Epoch 12/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0046 - accuracy: 0.9989 - val_loss: 0.0068 - val_accuracy: 0.9976
Epoch 13/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0043 - accuracy: 0.9989 - val_loss: 0.0149 - val_accuracy: 0.9965
Epoch 14/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0033 - accuracy: 0.9990 - val_loss: 0.0086 - val_accuracy: 0.9979
Epoch 15/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0027 - accuracy: 0.9992 - val_loss: 0.0172 - val_accuracy: 0.9952
y_kf_test++++++:
[[1. 0. 0. 0.]
[1. 0. 0. 0.]
[1. 0. 0. 0.]
...
[0. 0. 0. 1.]
[1. 0. 0. 0.]
[0. 1. 0. 0.]]
预测的概率矩阵为test_pred:
[[1.0000000e+00 1.0942575e-12 3.9219293e-17 9.0291727e-18]
[1.8889039e-13 1.7051572e-07 9.9999988e-01 8.8278994e-23]
[1.4202803e-20 5.8440978e-34 1.3529702e-17 1.0000000e+00]
...
[3.3775454e-05 2.1584659e-04 9.9975032e-01 4.9212700e-12]
[9.9991751e-01 6.5086162e-05 2.6754990e-06 1.4732053e-05]
[9.9999952e-01 8.1493896e-09 4.8628596e-07 3.6478176e-09]]
abs_sum++++++:
cv_scores+++++:
[473.23671531677246, 290.0407085418701, 216.68724060058594, 255.52966451644897]
************************************ 5 ************************************
Epoch 1/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0068 - accuracy: 0.9983 - val_loss: 0.0020 - val_accuracy: 0.9994
Epoch 2/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0041 - accuracy: 0.9990 - val_loss: 0.0135 - val_accuracy: 0.9972
Epoch 3/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0033 - accuracy: 0.9990 - val_loss: 0.0023 - val_accuracy: 0.9995
Epoch 4/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0044 - accuracy: 0.9987 - val_loss: 0.0044 - val_accuracy: 0.9987
Epoch 5/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0037 - accuracy: 0.9990 - val_loss: 0.0044 - val_accuracy: 0.9989
Epoch 6/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0033 - accuracy: 0.9991 - val_loss: 0.0128 - val_accuracy: 0.9973
Epoch 7/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0042 - accuracy: 0.9989 - val_loss: 0.0045 - val_accuracy: 0.9988
Epoch 8/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0041 - accuracy: 0.9990 - val_loss: 0.0029 - val_accuracy: 0.9988
Epoch 9/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0035 - accuracy: 0.9991 - val_loss: 0.0089 - val_accuracy: 0.9969
Epoch 10/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0032 - accuracy: 0.9991 - val_loss: 0.0076 - val_accuracy: 0.9974
Epoch 11/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0031 - accuracy: 0.9992 - val_loss: 0.0062 - val_accuracy: 0.9981
Epoch 12/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0041 - accuracy: 0.9988 - val_loss: 0.0058 - val_accuracy: 0.9981
Epoch 13/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0043 - accuracy: 0.9989 - val_loss: 0.0087 - val_accuracy: 0.9975
Epoch 14/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0030 - accuracy: 0.9992 - val_loss: 0.0033 - val_accuracy: 0.9990
Epoch 15/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0028 - accuracy: 0.9992 - val_loss: 0.0074 - val_accuracy: 0.9981
y_kf_test++++++:
[[0. 0. 1. 0.]
[1. 0. 0. 0.]
[1. 0. 0. 0.]
...
[1. 0. 0. 0.]
[1. 0. 0. 0.]
[1. 0. 0. 0.]]
预测的概率矩阵为test_pred:
[[1.0000000e+00 2.9956503e-13 1.9854391e-16 2.4101917e-17]
[7.2484188e-19 1.7757707e-14 1.0000000e+00 0.0000000e+00]
[2.4454344e-29 0.0000000e+00 1.4663728e-33 1.0000000e+00]
...
[8.1580965e-06 1.2090248e-04 9.9987090e-01 1.8680077e-12]
[1.0000000e+00 4.4534781e-10 1.6605388e-13 7.2103205e-15]
[1.0000000e+00 1.0435092e-11 6.3772593e-13 2.8722074e-14]]
abs_sum++++++:
cv_scores+++++:
[473.23671531677246, 290.0407085418701, 216.68724060058594, 255.52966451644897, 83.4570825099945]
temp=pd.DataFrame(lgb_test)
result=pd.read_csv('sample_submit.csv')
result['label_0']=temp[0]
result['label_1']=temp[1]
result['label_2']=temp[2]
result['label_3']=temp[3]
result.to_csv('submit_baseline_v2.3.csv',index=False)
submit_data=pd.read_csv('submit_baseline_v2.3.csv')
submit_data
id | label_0 | label_1 | label_2 | label_3 | |
---|---|---|---|---|---|
0 | 100000 | 1.000000e+00 | 2.995650e-13 | 1.985439e-16 | 2.410192e-17 |
1 | 100001 | 7.248419e-19 | 1.775771e-14 | 1.000000e+00 | 0.000000e+00 |
2 | 100002 | 2.445434e-29 | 0.000000e+00 | 1.466373e-33 | 1.000000e+00 |
3 | 100003 | 1.000000e+00 | 1.637765e-21 | 2.459309e-21 | 1.862687e-24 |
4 | 100004 | 1.000000e+00 | 3.080988e-10 | 6.932140e-15 | 5.327876e-19 |
... | ... | ... | ... | ... | ... |
19995 | 119995 | 9.999999e-01 | 6.929825e-08 | 2.932834e-08 | 4.951478e-10 |
19996 | 119996 | 1.000000e+00 | 4.902514e-08 | 2.717561e-08 | 6.707961e-12 |
19997 | 119997 | 8.158096e-06 | 1.209025e-04 | 9.998709e-01 | 1.868008e-12 |
19998 | 119998 | 1.000000e+00 | 4.453478e-10 | 1.660539e-13 | 7.210321e-15 |
19999 | 119999 | 1.000000e+00 | 1.043509e-11 | 6.377259e-13 | 2.872207e-14 |
20000 rows × 5 columns
for index,row in submit_data.iterrows():
row_max = max(list(row)[1:])
if row_max > 0.9:
for i in range(1,5):
if row[i]>0.9:
submit_data.iloc[index,i] = 1
else:
submit_data.iloc[index,i] = 0
submit_data
id | label_0 | label_1 | label_2 | label_3 | |
---|---|---|---|---|---|
0 | 100000 | 1.0 | 0.0 | 0.0 | 0.0 |
1 | 100001 | 0.0 | 0.0 | 1.0 | 0.0 |
2 | 100002 | 0.0 | 0.0 | 0.0 | 1.0 |
3 | 100003 | 1.0 | 0.0 | 0.0 | 0.0 |
4 | 100004 | 1.0 | 0.0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... |
19995 | 119995 | 1.0 | 0.0 | 0.0 | 0.0 |
19996 | 119996 | 1.0 | 0.0 | 0.0 | 0.0 |
19997 | 119997 | 0.0 | 0.0 | 1.0 | 0.0 |
19998 | 119998 | 1.0 | 0.0 | 0.0 | 0.0 |
19999 | 119999 | 1.0 | 0.0 | 0.0 | 0.0 |
20000 rows × 5 columns
submit_data.to_csv('submit_baseline_v2.3.1.csv',index=False)