Source code for pysummarization.vectorizablesentence.lstm_rtrbm

# -*- coding: utf-8 -*-
from logging import getLogger
import numpy as np
from pysummarization.vectorizable_sentence import VectorizableSentence

# `Builder` in `Builder Patter`.
from pydbm.dbm.builders.lstm_rt_rbm_simple_builder import LSTMRTRBMSimpleBuilder
# LSTM and Contrastive Divergence for function approximation.
from pydbm.approximation.rtrbmcd.lstm_rt_rbm_cd import LSTMRTRBMCD
# Logistic Function as activation function.
from pydbm.activation.logistic_function import LogisticFunction
# Tanh Function as activation function.
from pydbm.activation.tanh_function import TanhFunction
# Stochastic Gradient Descent(SGD) as optimizer.
from pydbm.optimization.optparams.sgd import SGD


[docs]class LSTMRTRBM(VectorizableSentence): ''' Vectorize sentences by LSTM-RTRBM. LSTM-RTRBM model integrates the ability of LSTM in memorizing and retrieving useful history information, together with the advantage of RBM in high dimensional data modelling(Lyu, Q., Wu, Z., Zhu, J., & Meng, H. 2015, June). Like RTRBM, LSTM-RTRBM also has the recurrent hidden units. References: - Boulanger-Lewandowski, N., Bengio, Y., & Vincent, P. (2012). Modeling temporal dependencies in high-dimensional sequences: Application to polyphonic music generation and transcription. arXiv preprint arXiv:1206.6392. - Lyu, Q., Wu, Z., Zhu, J., & Meng, H. (2015, June). Modelling High-Dimensional Sequences with LSTM-RTRBM: Application to Polyphonic Music Generation. In IJCAI (pp. 4138-4139). - Lyu, Q., Wu, Z., & Zhu, J. (2015, October). Polyphonic music modelling with LSTM-RTRBM. In Proceedings of the 23rd ACM international conference on Multimedia (pp. 991-994). ACM. - Sutskever, I., Hinton, G. E., & Taylor, G. W. (2009). The recurrent temporal restricted boltzmann machine. In Advances in Neural Information Processing Systems (pp. 1601-1608). '''
[docs] def vectorize(self, sentence_list): ''' Args: sentence_list: The list of tokenized sentences. [[`token`, `token`, `token`, ...], [`token`, `token`, `token`, ...], [`token`, `token`, `token`, ...]] Returns: `np.ndarray` of tokens. [vector of token, vector of token, vector of token] ''' test_observed_arr = self.__setup_dataset(sentence_list, self.__token_master_list, self.__seq_len) inferenced_arr = self.__rbm.inference( test_observed_arr, training_count=1, r_batch_size=-1 ) return inferenced_arr
[docs] def learn( self, sentence_list, token_master_list, hidden_neuron_count=1000, training_count=1, batch_size=100, learning_rate=1e-03, seq_len=5 ): ''' Init. Args: sentence_list: The `list` of sentences. token_master_list: Unique `list` of tokens. hidden_neuron_count: The number of units in hidden layer. training_count: The number of training. bath_size: Batch size of Mini-batch. learning_rate: Learning rate. seq_len: The length of one sequence. ''' observed_arr = self.__setup_dataset(sentence_list, token_master_list, seq_len) visible_num = observed_arr.shape[-1] # `Builder` in `Builder Pattern` for LSTM-RTRBM. rnnrbm_builder = LSTMRTRBMSimpleBuilder() # Learning rate. rnnrbm_builder.learning_rate = learning_rate # Set units in visible layer. rnnrbm_builder.visible_neuron_part(LogisticFunction(), visible_num) # Set units in hidden layer. rnnrbm_builder.hidden_neuron_part(LogisticFunction(), hidden_neuron_count) # Set units in RNN layer. rnnrbm_builder.rnn_neuron_part(TanhFunction()) # Set graph and approximation function, delegating `SGD` which is-a `OptParams`. rnnrbm_builder.graph_part(LSTMRTRBMCD(opt_params=SGD())) # Building. rbm = rnnrbm_builder.get_result() # Learning. rbm.learn( # The `np.ndarray` of observed data points. observed_arr, # Training count. training_count=training_count, # Batch size. batch_size=batch_size ) self.__rbm = rbm self.__token_master_list = token_master_list self.__seq_len = seq_len
def __setup_dataset(self, sentence_list, token_master_list, seq_len): sentence_len_list = [0] * len(sentence_list) for i in range(len(sentence_list)): sentence_len_list[i] = len(sentence_list[i]) observed_list = [None] * len(sentence_list) for i in range(len(sentence_list)): arr_list = [None] * seq_len for j in range(seq_len): arr = np.zeros(len(token_master_list)) try: token = sentence_list[i][j] arr[token_master_list.index(token)] = 1 except IndexError: pass finally: arr = arr.astype(np.float64) arr_list[j] = arr observed_list[i] = arr_list observed_arr = np.array(observed_list) return observed_arr