Source code for pysummarization.vectorizabletoken.tfidf_vectorizer

# -*- coding: utf-8 -*-
import nltk
from pysummarization.vectorizable_token import VectorizableToken


[docs]class TfidfVectorizer(VectorizableToken): ''' Vectorize token. ''' # Document __collection = [] def __init__(self, token_list_list): ''' Initialize. Args: token_list_list: The list of list of tokens. ''' self.__collection = nltk.TextCollection(token_list_list)
[docs] def vectorize(self, token_list): ''' Tokenize token list. Args: token_list: The list of tokens.. Returns: [vector of token, vector of token, vector of token, ...] ''' vector_list = [self.__collection.tf_idf(token, self.__collection) for token in token_list] return vector_list