class BaseLoader(object): """docstring for BaseLoader""" def __init__(self, data_name, data_path): super(BaseLoader, self).__init__() self.data_name = data_name self.data_path = data_path def load(self): """ :return: string """ with open(self.data_path, "r", encoding="utf-8") as f: text = f.read() return text class ToyLoader0(BaseLoader): """ For charLM """ def __init__(self, name, path): super(ToyLoader0, self).__init__(name, path) def load(self): with open(self.data_path, 'r') as f: corpus = f.read().lower() import re corpus = re.sub(r"", "unk", corpus) return corpus.split()