def train_add_hat(x,features):
import numpy as np
import pandas as pd
df=x.copy()
q95_dict={}
for col in features:
q95=np.percentile(df[col],95)
q95_dict[col]=q95
b=np.array(df[col])
c=list(map(lambda x:q95 if x>95 else x,b))
df=df.drop(col,axis=1)
df[col]=c
return df,q95_dict
#用同一标准处理测试集
def add_hat(x,features,q95_dict):
import numpy as np
import pandas as pd
df = x.copy()
len_d=len(df.index)
for col in features:
q95=q95_dict[col]
b=np.array(df[col])
c=list(map(lambda x:q95 if x>q95 else x,b))
df=df.drop(col,axis=1)
df[col]=c
return df