Source code for sklearn_utils.preprocessing.dict_input
import copy
from sklearn.base import TransformerMixin, clone
from sklearn.feature_extraction import DictVectorizer
[docs]class DictInput(TransformerMixin):
"""Converts a preprocessing step to accept list of dict."""
[docs] def __init__(self, transformer, feature_selection=False, sparse=False):
'''
:param transformer: Sklearn transformer
:param feature_selection: is this transformer perform feature selection.
'''
self.transformer = transformer
self.feature_selection = feature_selection
self.sparse = sparse
[docs] def fit(self, X, y=None):
self.dict_vectorizer_ = DictVectorizer(sparse=self.sparse)
self.transformer.fit(self.dict_vectorizer_.fit_transform(X, y), y)
if self.feature_selection:
names = self.transformer.get_support()
self.clone_dict_vectorizer_ = copy.deepcopy(self.dict_vectorizer_)
self.clone_dict_vectorizer_.restrict(names)
return self
[docs] def transform(self, X):
'''
:param X: features.
'''
inverser_tranformer = self.dict_vectorizer_
if self.feature_selection:
inverser_tranformer = self.clone_dict_vectorizer_
return inverser_tranformer.inverse_transform(
self.transformer.transform(
self.dict_vectorizer_.transform(X)))
class DfInput(DictInput):
def __init__(self, transformer, feature_selection=False, sparse=False):
# TODO: implement this.
raise NotImplementedError()