Source code for pysummarization.iteratabledata.token_iterator

# -*- coding: utf-8 -*-
from accelbrainbase.iteratable_data import IteratableData
from pysummarization.vectorizable_token import VectorizableToken
import numpy as np


[docs]class TokenIterator(IteratableData): ''' ''' # is-a `VectorizableToken`. __vectorizable_token = None
[docs] def get_vectorizable_token(self): ''' getter ''' return self.__vectorizable_token
[docs] def set_vectorizable_token(self, value): ''' setter ''' if isinstance(value, VectorizableToken) is False: raise TypeError("The type of `vectorizable_token` must be `VectorizableToken`.") self.__vectorizable_token = value
vectorizable_token = property(get_vectorizable_token, set_vectorizable_token) def __init__( self, vectorizable_token, token_arr, epochs=1000, batch_size=25, seq_len=5, test_size=0.3, norm_mode=None, noiseable_data=None ): ''' Init. Args: vectorizable_token: is-a `VectorizableToken`. token_arr: `np.ndarray` of token vectors. epochs: `int` of epochs. batch_size: `int` of batch size. seq_len: `int` of length of series. test_size: `float` of rate of test data. training data : test data = (1 - test_size) : test_size norm_mode: How to normalize pixel values of images. - `z_score`: Z-Score normalization. - `min_max`: Min-max normalization. - others : This class will not normalize the data. noiseable_data: is-a `NoiseableData`. ''' self.vectorizable_token = vectorizable_token vector_list = vectorizable_token.vectorize(token_list=token_arr.tolist()) vector_arr = np.array(vector_list) observed_list = [] for i in range(seq_len, vector_arr.shape[0]): observed_list.append(vector_arr[i-seq_len:i]) observed_arr = np.array(observed_list) self.observed_arr = observed_arr training_row = int(observed_arr.shape[0] * (1 - test_size)) key_arr = np.arange(observed_arr.shape[0]) np.random.shuffle(key_arr) training_arr = observed_arr[key_arr[:training_row]] test_arr = observed_arr[key_arr[training_row:]] self.training_arr = training_arr self.test_arr = test_arr self.epochs = epochs self.batch_size = batch_size self.seq_len = seq_len self.__norm_mode = norm_mode self.__noiseable_data = noiseable_data
[docs] def generate_learned_samples(self): ''' Draw and generate data. Returns: `Tuple` data. The shape is ... - `mxnet.ndarray` of observed data points in training. - `mxnet.ndarray` of supervised data in training. - `mxnet.ndarray` of observed data points in test. - `mxnet.ndarray` of supervised data in test. ''' for epoch in range(self.epochs): training_key_arr = np.arange(self.training_arr.shape[0]) test_key_arr = np.arange(self.test_arr.shape[0]) np.random.shuffle(training_key_arr) np.random.shuffle(test_key_arr) training_batch_arr = self.training_arr[training_key_arr[:self.batch_size]] test_batch_arr = self.test_arr[test_key_arr[:self.batch_size]] training_batch_arr = self.pre_normalize(training_batch_arr) test_batch_arr = self.pre_normalize(test_batch_arr) if self.__noiseable_data is not None: training_batch_arr = self.__noiseable_data.noise(training_batch_arr) yield training_batch_arr, training_batch_arr, test_batch_arr, test_batch_arr
[docs] def generate_inferenced_samples(self): ''' Draw and generate data. The targets will be drawn from all image file sorted in ascending order by file name. Returns: `Tuple` data. The shape is ... - `None`. - `None`. - `mxnet.ndarray` of observed data points in test. - file path. ''' i = 0 while i + self.batch_size < self.observed_arr.shape[0]: test_batch_arr = self.observed_arr[i:i+self.batch_size] test_batch_arr = self.pre_normalize(test_batch_arr) i = i + self.batch_size yield None, None, test_batch_arr, None
[docs] def pre_normalize(self, arr): ''' Normalize before observation. Args: arr: Tensor. Returns: Tensor. ''' if self.__norm_mode == "min_max": if arr.max() != arr.min(): n = 0.0 else: n = 1e-08 arr = (arr - arr.min()) / (arr.max() - arr.min() + n) elif self.__norm_mode == "z_score": std = arr.asnumpy().std() if std == 0: std += 1e-08 arr = (arr - arr.mean()) / std arr = arr * self.__scale return arr
[docs] def set_readonly(self, value): ''' setter ''' raise TypeError("This property must be read-only.")
[docs] def get_epochs(self): ''' getter ''' return self.__epochs
[docs] def set_epochs(self, value): ''' setter ''' self.__epochs = value
epochs = property(get_epochs, set_epochs)
[docs] def get_batch_size(self): ''' getter ''' return self.__batch_size
[docs] def set_batch_size(self, value): ''' setter ''' self.__batch_size = value
batch_size = property(get_batch_size, set_batch_size)
[docs] def get_seq_len(self): ''' getter ''' return self.__seq_len
[docs] def set_seq_len(self, value): ''' setter ''' self.__seq_len = value
seq_len = property(get_seq_len, set_seq_len) __norm_mode = "z_score"
[docs] def get_norm_mode(self): ''' getter ''' return self.__norm_mode
[docs] def set_norm_mode(self, value): ''' setter ''' self.__norm_mode = value
norm_mode = property(get_norm_mode, set_norm_mode) __scale = 1.0
[docs] def get_scale(self): ''' getter ''' return self.__scale
[docs] def set_scale(self, value): ''' setter ''' self.__scale = value
scale = property(get_scale, set_scale)