Source code for pysummarization.similarityfilter.tanimoto

# -*- coding: utf-8 -*-
from pysummarization.similarity_filter import SimilarityFilter


[docs]class Tanimoto(SimilarityFilter): ''' Concrete class for filtering mutually similar sentences. '''
[docs] def calculate(self, token_list_x, token_list_y): ''' Calculate similarity with the Tanimoto coefficient. Concrete method. Args: token_list_x: [token, token, token, ...] token_list_y: [token, token, token, ...] Returns: Similarity. ''' match_list = [tanimoto_value for tanimoto_value in token_list_x if tanimoto_value in token_list_y] return float(len(match_list) / (len(token_list_x) + len(token_list_y) - len(match_list)))