Почему обучение модели прогноза временных рядов RNN прекращается после нескольких итераций с использованием GluonTS? - PullRequest
0 голосов
/ 22 апреля 2020

Я пытаюсь построить модель RNN для прогнозирования временных рядов с использованием GluonTS. Я следую официальному примеру документации здесь: https://gluon-ts.mxnet.io/examples/extended_forecasting_tutorial/extended_tutorial.html#5 .4-From-feedforward-to-RNN

У меня есть классифицированные данные временных рядов ( Github Gist of the CSV здесь ), который я пытаюсь использовать для построения модели прогноза. Первая эпоха обучения модели начинается, но заканчивается через пару итераций.

Это сообщение об ошибке, которое я получаю:

INFO:root:Using CPU
INFO:root:Start model training
learning rate from ``lr_scheduler`` has been overwritten by ``learning_rate`` in optimizer.
INFO:root:Epoch[0] Learning rate is 0.001
  0%|                                                                                                                             | 0/100 [00:00<?, ?it/s]INFO:root:Number of parameters in MyProbTrainRNN: 20082
  3%|██▉                                                                                             | 3/100 [00:00<00:32,  3.02it/s, avg_epoch_loss=1.06]
Traceback (most recent call last):
  File "rnn_model.py", line 346, in <module>
    predictor = estimator.train(train_ds)
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/model/estimator.py", line 223, in train
    return self.train_model(training_data, validation_data).predictor
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/model/estimator.py", line 208, in train_model
    validation_iter=validation_data_loader,
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/trainer/_base.py", line 297, in __call__
    epoch_loss = loop(epoch_no, train_iter)
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/trainer/_base.py", line 230, in loop
    for batch_no, data_entry in enumerate(it, start=1):
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/tqdm/std.py", line 1108, in __iter__
    for obj in iterable:
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/dataset/loader.py", line 199, in __iter__
    self.batch_size - 1
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/dataset/loader.py", line 166, in _emit_batches_while_buffer_larger_than
    yield self._buffer.next_batch()
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/dataset/loader.py", line 54, in next_batch
    batch = {k: self.stack(v[:n]) for k, v in self._buffers.items()}
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/dataset/loader.py", line 54, in <dictcomp>
    batch = {k: self.stack(v[:n]) for k, v in self._buffers.items()}
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/dataset/loader.py", line 65, in stack
    return mx.nd.array(data, dtype=data.dtype, ctx=self.ctx)
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/mxnet/ndarray/utils.py", line 146, in array
    return _array(source_array, ctx=ctx, dtype=dtype)
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/mxnet/ndarray/ndarray.py", line 3284, in array
    arr = empty(source_array.shape, ctx, dtype)
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/mxnet/ndarray/ndarray.py", line 4735, in empty
    return NDArray(handle=_new_alloc_handle(shape, ctx, False, dtype))
  File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/mxnet/ndarray/ndarray.py", line 174, in _new_alloc_handle
    ctypes.c_int(int(_DTYPE_NP_TO_MX[np.dtype(dtype).type])),
KeyError: <class 'numpy.object_'>

Вот мой код:

import mxnet as mx
from mxnet import gluon
import numpy as np
import pandas as pd
from datetime import timedelta

# Imports related to GluonTS 

from gluonts.distribution.distribution_output import DistributionOutput
from gluonts.distribution.gaussian import GaussianOutput
from gluonts.model.estimator import GluonEstimator
from gluonts.dataset.common import ListDataset
from gluonts.block.scaler import MeanScaler, NOPScaler
from gluonts.model.predictor import Predictor, RepresentableBlockPredictor
from gluonts.core.component import validated
from gluonts.trainer import Trainer
from gluonts.support.util import copy_parameters
from gluonts.dataset.field_names import FieldName
from mxnet.gluon import HybridBlock
from gluonts.transform import (
    AddObservedValuesIndicator,
    Chain,
    Transformation,
    ExpectedNumInstanceSampler,
    InstanceSplitter
)

# Random Seeds 
mx.random.seed(0)
np.random.seed(0)


class MyProbRNN(gluon.HybridBlock):
    def __init__(self,
                 prediction_length,
                 context_length,
                 distr_output,
                 num_cells,
                 num_layers,
                 num_sample_paths=100,
                 scaling=True,
                 **kwargs
     ) -> None:
        super().__init__(**kwargs)
        self.prediction_length = prediction_length
        self.context_length = context_length
        self.distr_output = distr_output
        self.num_cells = num_cells
        self.num_layers = num_layers
        self.num_sample_paths = num_sample_paths
        self.proj_distr_args = distr_output.get_args_proj()
        self.scaling = scaling

        with self.name_scope():
            self.rnn = mx.gluon.rnn.HybridSequentialRNNCell()
            for k in range(self.num_layers):
                cell = mx.gluon.rnn.LSTMCell(hidden_size=self.num_cells)
                cell = mx.gluon.rnn.ResidualCell(cell) if k > 0 else cell
                self.rnn.add(cell)

            if scaling:
                self.scaler = MeanScaler(keepdims=True)
            else:
                self.scaler = NOPScaler(keepdims=True)

    def compute_scale(self, past_target, past_observed_values):
        # scale is computed on the context length last units of the past target
        # scale shape is (batch_size, 1, *target_shape)
        _, scale = self.scaler(
            past_target.slice_axis(
                axis=1, begin=-self.context_length, end=None
            ),
            past_observed_values.slice_axis(
                axis=1, begin=-self.context_length, end=None
            ),
        )

        return scale

    def unroll_encoder(self,
                       F,
                       past_target,
                       past_observed_values,
                       future_target=None,
                       future_observed_values=None):
        # overall target field
        # input target from -(context_length + prediction_length + 1) to -1
        if future_target is not None:  # during training
            target_in = F.concat(
                past_target, future_target, dim=-1
            ).slice_axis(
                axis=1, begin=-(self.context_length + self.prediction_length + 1), end=-1
            )

            # overall observed_values field
            # input observed_values corresponding to target_in
            observed_values_in = F.concat(
                past_observed_values, future_observed_values, dim=-1
            ).slice_axis(
                axis=1, begin=-(self.context_length + self.prediction_length + 1), end=-1
            )

            rnn_length = self.context_length + self.prediction_length
        else:  # during inference
            target_in = past_target.slice_axis(
                axis=1, begin=-(self.context_length + 1), end=-1
            )

            # overall observed_values field
            # input observed_values corresponding to target_in
            observed_values_in = past_observed_values.slice_axis(
                axis=1, begin=-(self.context_length + 1), end=-1
            )

            rnn_length = self.context_length

        # compute scale
        scale = self.compute_scale(target_in, observed_values_in)

        # scale target_in
        target_in_scale = F.broadcast_div(target_in, scale)

        # compute network output
        net_output, states = self.rnn.unroll(
            inputs=target_in_scale,
            length=rnn_length,
            layout="NTC",
            merge_outputs=True,
        )

        return net_output, states, scale


class MyProbTrainRNN(MyProbRNN):
    def hybrid_forward(self,
                       F,
                       past_target,
                       future_target,
                       past_observed_values,
                       future_observed_values):

        net_output, _, scale = self.unroll_encoder(F,
                                                   past_target,
                                                   past_observed_values,
                                                   future_target,
                                                   future_observed_values)

        # output target from -(context_length + prediction_length) to end
        target_out = F.concat(
            past_target, future_target, dim=-1
        ).slice_axis(
            axis=1, begin=-(self.context_length + self.prediction_length), end=None
        )

        # project network output to distribution parameters domain
        distr_args = self.proj_distr_args(net_output)

        # compute distribution
        distr = self.distr_output.distribution(distr_args, scale=scale)

        # negative log-likelihood
        loss = distr.loss(target_out)
        return loss


class MyProbPredRNN(MyProbTrainRNN):
    def sample_decoder(self, F, past_target, states, scale):
        # repeat fields: from (batch_size, past_target_length) to
        # (batch_size * num_sample_paths, past_target_length)
        repeated_states = [
            s.repeat(repeats=self.num_sample_paths, axis=0)
            for s in states
        ]
        repeated_scale = scale.repeat(repeats=self.num_sample_paths, axis=0)

        # first decoder input is the last value of the past_target, i.e.,
        # the previous value of the first time step we want to forecast
        decoder_input = past_target.slice_axis(
            axis=1, begin=-1, end=None
        ).repeat(
            repeats=self.num_sample_paths, axis=0
        )

        # list with samples at each time step
        future_samples = []

        # for each future time step we draw new samples for this time step and update the state
        # the drawn samples are the inputs to the rnn at the next time step
        for k in range(self.prediction_length):
            rnn_outputs, repeated_states = self.rnn.unroll(
                inputs=decoder_input,
                length=1,
                begin_state=repeated_states,
                layout="NTC",
                merge_outputs=True,
            )

            # project network output to distribution parameters domain
            distr_args = self.proj_distr_args(rnn_outputs)

            # compute distribution
            distr = self.distr_output.distribution(distr_args, scale=repeated_scale)

            # draw samples (batch_size * num_samples, 1)
            new_samples = distr.sample()

            # append the samples of the current time step
            future_samples.append(new_samples)

            # update decoder input for the next time step
            decoder_input = new_samples

        samples = F.concat(*future_samples, dim=1)

        # (batch_size, num_samples, prediction_length)
        return samples.reshape(shape=(-1, self.num_sample_paths, self.prediction_length))

    def hybrid_forward(self, F, past_target, past_observed_values):
        # unroll encoder over context_length
        net_output, states, scale = self.unroll_encoder(F,
                                                        past_target,
                                                        past_observed_values)

        samples = self.sample_decoder(F, past_target, states, scale)

        return samples


class MyProbRNNEstimator(GluonEstimator):
    @validated()
    def __init__(
            self,
            prediction_length: int,
            context_length: int,
            freq: str,
            distr_output: DistributionOutput,
            num_cells: int,
            num_layers: int,
            num_sample_paths: int = 100,
            scaling: bool = True,
            trainer: Trainer = Trainer()
    ) -> None:
        super().__init__(trainer=trainer)
        self.prediction_length = prediction_length
        self.context_length = context_length
        self.freq = freq
        self.distr_output = distr_output
        self.num_cells = num_cells
        self.num_layers = num_layers
        self.num_sample_paths = num_sample_paths
        self.scaling = scaling

    def create_transformation(self):
        # Feature transformation that the model uses for input.
        return Chain(
            [
                AddObservedValuesIndicator(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.OBSERVED_VALUES,
                ),
                InstanceSplitter(
                    target_field=FieldName.TARGET,
                    is_pad_field=FieldName.IS_PAD,
                    start_field=FieldName.START,
                    forecast_start_field=FieldName.FORECAST_START,
                    train_sampler=ExpectedNumInstanceSampler(num_instances=1),
                    past_length=self.context_length + 1,
                    future_length=self.prediction_length,
                    time_series_fields=[
                        FieldName.FEAT_DYNAMIC_REAL,
                        FieldName.OBSERVED_VALUES,
                    ],
                ),

            ]
        )

    def create_training_network(self) -> MyProbTrainRNN:
        return MyProbTrainRNN(
            prediction_length=self.prediction_length,
            context_length=self.context_length,
            distr_output=self.distr_output,
            num_cells=self.num_cells,
            num_layers=self.num_layers,
            num_sample_paths=self.num_sample_paths,
            scaling=self.scaling
        )

    def create_predictor(
            self, transformation: Transformation, trained_network: HybridBlock
    ) -> Predictor:
        prediction_network = MyProbPredRNN(
            prediction_length=self.prediction_length,
            context_length=self.context_length,
            distr_output=self.distr_output,
            num_cells=self.num_cells,
            num_layers=self.num_layers,
            num_sample_paths=self.num_sample_paths,
            scaling=self.scaling
        )

        copy_parameters(trained_network, prediction_network)

        return RepresentableBlockPredictor(
            input_transform=transformation,
            prediction_net=prediction_network,
            batch_size=self.trainer.batch_size,
            freq=self.freq,
            prediction_length=self.prediction_length,
            ctx=self.trainer.ctx,
        )


estimator = MyProbRNNEstimator(
        prediction_length=24,
        context_length=48,
        freq="1H",
        num_cells=40,
        num_layers=2,
        distr_output=GaussianOutput(),
        trainer=Trainer(ctx="cpu",
                        epochs=5,
                        learning_rate=1e-3,
                        hybridize=False,
                        num_batches_per_epoch=100
                       )
    )



# df = pd.read_csv('categorized_timeseries.csv', index_col=None, parse_dates=False)
df = pd.read_csv('10216800.csv', index_col=None, names=['Timestamp', 'kw'], parse_dates=True)

prediction_length = 96*7
freq = "15min"
start = pd.Timestamp(df['Timestamp'][0], freq=freq)
target = df['kw'].to_numpy()
fdr = np.empty((len(target), 2))
fdr.fill(1)

# # train dataset: cut the last window of length "prediction_length", add "target" and "start" fields
train_ds = ListDataset([{FieldName.TARGET: target[:-prediction_length], FieldName.START:start, FieldName.FEAT_DYNAMIC_REAL:fdr[:-prediction_length]}], freq=freq)
# # test dataset: use the whole dataset, add "target" and "start" fields
test_ds = ListDataset([{FieldName.TARGET:target[-prediction_length:], FieldName.START:start, FieldName.FEAT_DYNAMIC_REAL:fdr[-prediction_length:]}], freq=freq)

predictor = estimator.train(train_ds)

Чтобы исправить ошибку, я попытался изменить размер пакета, однако, это, кажется, не оказывает влияния. Любое направление о том, как решить эту проблему, поможет.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...