Source code for pysummarization.n_gram

#!/user/bin/env python
# -*- coding: utf-8 -*-


[docs]class Ngram(object): ''' N-gram '''
[docs] def generate_ngram_data_set(self, token_list, n=2): ''' Generate the N-gram's pair. Args: token_list: The list of tokens. n N Returns: zip of Tuple(Training N-gram data, Target N-gram data) ''' n_gram_tuple_zip = self.generate_tuple_zip(token_list, n) n_gram_tuple_list = [n_gram_tuple for n_gram_tuple in n_gram_tuple_zip] n_gram_data_set = self.generate_tuple_zip(n_gram_tuple_list, 2) return n_gram_data_set
[docs] def generate_skip_gram_data_set(self, token_list): ''' Generate the Skip-gram's pair. Args: token_list: The list of tokens. Returns: zip of Tuple(Training N-gram data, Target N-gram data) ''' n_gram_tuple_zip = self.generate_tuple_zip(token_list, 3) skip_gram_list = [] for pre, point, post in n_gram_tuple_zip: skip_gram_list.append((point, pre)) skip_gram_list.append((point, post)) return zip(skip_gram_list)
[docs] def generate_tuple_zip(self, token_list, n=2): ''' Generate the N-gram. Args: token_list: The list of tokens. n N Returns: zip of Tuple(N-gram) ''' return zip(*[token_list[i:] for i in range(n)])