Source code for pysummarization.nlpbase.autoabstractor.n_gram_auto_abstractor

# -*- coding: utf-8 -*-
from pysummarization.nlpbase.auto_abstractor import AutoAbstractor
from pysummarization.n_gram import Ngram


[docs]class NgramAutoAbstractor(AutoAbstractor): ''' The object for automatic summarization. The minimum unit of token is N-gram. ''' # The object of N-gram. __n_gram = None
[docs] def get_n_gram(self): ''' gettter ''' if isinstance(self.__n_gram, Ngram): return self.__n_gram else: raise TypeError("The type of n_gram must be Ngram.")
[docs] def set_n_gram(self, value): ''' setter ''' if isinstance(value, Ngram): self.__n_gram = value else: raise TypeError("The type of n_gram must be Ngram.")
n_gram = property(get_n_gram, set_n_gram) # N of N-gram. __n = 2
[docs] def get_n(self): ''' getter ''' if isinstance(self.__n, int): return self.__n else: raise TypeError("The type of n must be int.")
[docs] def set_n(self, value): ''' setter ''' if isinstance(value, int): self.__n = value else: raise TypeError("The type of n must be int.")
n = property(get_n, set_n)
[docs] def tokenize(self, data): ''' Tokenize sentence. Args: [n-gram, n-gram, n-gram, ...] ''' super().tokenize(data) token_tuple_zip = self.n_gram.generate_tuple_zip(self.token, self.n) token_list = [] self.token = ["".join(list(token_tuple)) for token_tuple in token_tuple_zip]