前言
自己根据需求, 自定了一个简单的CustomImputer, 代码如下
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.multiclass import type_of_target
from sklearn.preprocessing import OneHotEncoder, StandardScaler
try:
from sklearn.impute import SimpleImputer as Imputer
except:
from sklearn.preprocessing import Imputer
from sklearn.pipeline import FeatureUnion, Pipeline
import inspect
class CustomImputer( BaseEstimator, TransformerMixin ):
def __init__(self, strategy="mean", custom_value=None):
args, _, _, values = inspect.getargvalues(inspect.currentframe())
values.pop("self")
for arg, val in values.items():
setattr(self, arg, val)
def fit(self, X, y=None):
# Check parameters
if self.custom_value:
self.statistics_ = self.custom_value
else:
allowed_strategies = ["mean", "median", "mode"]
if self.strategy not in allowed_strategies:
raise ValueError("Can only use these strategies: {0} "
" got strategy={1}".format(allowed_strategies,
self.strategy))
if self.strategy == "mean":
self.statistics_ = X.mean()
elif self.strategy == "median":
self.statistics_ = X.median()
elif self.strategy == 'mode':
#X.fillna(X.mode().iloc[0], inplace=True)
self.statistics_ = X.mode().iloc[0]
return self
def transform(self, X):
"""Impute all missing values in X."""
return X.fillna(self.statistics_ , inplace=True)
#Demo
data = pd.read_csv("data/kc_house_data.csv")
data.iloc[-4:]=np.nan
custom_imputer = CustomImputer(strategy = "mean")
custom_imputer.fit(data)
custom_imputer.transform(data)
data.tail()