自定义sklearn CustomImputer

前言

自己根据需求, 自定了一个简单的CustomImputer, 代码如下

import numpy as np 
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.multiclass import type_of_target
from sklearn.preprocessing import OneHotEncoder, StandardScaler
try:
    from sklearn.impute import SimpleImputer as Imputer
except:
    from sklearn.preprocessing import Imputer
from sklearn.pipeline import FeatureUnion, Pipeline
import inspect


class CustomImputer( BaseEstimator, TransformerMixin ):
    def __init__(self, strategy="mean", custom_value=None):
        args, _, _, values = inspect.getargvalues(inspect.currentframe())
        values.pop("self")
        
        for arg, val in values.items():
            setattr(self, arg, val)
        
    def fit(self, X, y=None):
        # Check parameters
        
        if self.custom_value:
            self.statistics_ = self.custom_value
        else:
            allowed_strategies = ["mean", "median", "mode"]
            if self.strategy not in allowed_strategies:
                raise ValueError("Can only use these strategies: {0} "
                                 " got strategy={1}".format(allowed_strategies,
                                                            self.strategy))
            if self.strategy == "mean": 
                self.statistics_ = X.mean()
            elif self.strategy == "median":
                self.statistics_ = X.median()
            elif self.strategy == 'mode':
                #X.fillna(X.mode().iloc[0], inplace=True)
                self.statistics_ = X.mode().iloc[0]
        
        return self
    
    def transform(self, X):
        """Impute all missing values in X."""
        
        return X.fillna(self.statistics_ , inplace=True)

#Demo
data = pd.read_csv("data/kc_house_data.csv")
data.iloc[-4:]=np.nan
custom_imputer = CustomImputer(strategy = "mean")
custom_imputer.fit(data)
custom_imputer.transform(data)
data.tail()
上一篇:Unity回合制游戏集合


下一篇:MyBatis-Plus代码生成器