Source code for sklearn_utils.utils.skutils_io

import math
import json
import gzip
import pickle

import pandas as pd

from .data_utils import map_dict_list


[docs]class SkUtilsIO: ''' IO class to read and write dataset to file. '''
[docs] def __init__(self, path, gz=False): ''' :filename: file name with path. ''' self.path = path self.gz = gz
[docs] def from_csv(self, label_column='labels'): ''' Read dataset from csv. ''' df = pd.read_csv(self.path, header=0) X = df.loc[:, df.columns != label_column].to_dict('records') X = map_dict_list(X, if_func=lambda k, v: v and math.isfinite(v)) y = list(df[label_column].values) return X, y
[docs] def to_csv(self, X, y): ''' Writes dataset to csv. ''' # TODO: implement this raise NotImplementedError
[docs] def from_json(self): ''' Reads dataset from json. ''' with gzip.open('%s.gz' % self.path, 'rt') if self.gz else open(self.path) as file: return list(map(list, zip(*json.load(file))))[::-1]
[docs] def to_json(self, X, y): ''' Reads dataset to csv. :param X: dataset as list of dict. :param y: labels. ''' with gzip.open('%s.gz' % self.path, 'wt') if self.gz else open( self.path, 'w') as file: json.dump(list(zip(y, X)), file)
[docs] def from_pickle(self): ''' Reads dataset to pickle. ''' raise NotImplementedError
[docs] def to_pickle(self, X, y): ''' Writes dataset to pickle. :param X: dataset as list of dict. :param y: labels. ''' raise NotImplementedError