Я пытаюсь построить модель RNN для прогнозирования временных рядов с использованием GluonTS. Я следую официальному примеру документации здесь: https://gluon-ts.mxnet.io/examples/extended_forecasting_tutorial/extended_tutorial.html#5 .4-From-feedforward-to-RNN
У меня есть классифицированные данные временных рядов ( Github Gist of the CSV здесь ), который я пытаюсь использовать для построения модели прогноза. Первая эпоха обучения модели начинается, но заканчивается через пару итераций.
Это сообщение об ошибке, которое я получаю:
INFO:root:Using CPU
INFO:root:Start model training
learning rate from ``lr_scheduler`` has been overwritten by ``learning_rate`` in optimizer.
INFO:root:Epoch[0] Learning rate is 0.001
0%| | 0/100 [00:00<?, ?it/s]INFO:root:Number of parameters in MyProbTrainRNN: 20082
3%|██▉ | 3/100 [00:00<00:32, 3.02it/s, avg_epoch_loss=1.06]
Traceback (most recent call last):
File "rnn_model.py", line 346, in <module>
predictor = estimator.train(train_ds)
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/model/estimator.py", line 223, in train
return self.train_model(training_data, validation_data).predictor
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/model/estimator.py", line 208, in train_model
validation_iter=validation_data_loader,
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/trainer/_base.py", line 297, in __call__
epoch_loss = loop(epoch_no, train_iter)
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/trainer/_base.py", line 230, in loop
for batch_no, data_entry in enumerate(it, start=1):
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/tqdm/std.py", line 1108, in __iter__
for obj in iterable:
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/dataset/loader.py", line 199, in __iter__
self.batch_size - 1
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/dataset/loader.py", line 166, in _emit_batches_while_buffer_larger_than
yield self._buffer.next_batch()
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/dataset/loader.py", line 54, in next_batch
batch = {k: self.stack(v[:n]) for k, v in self._buffers.items()}
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/dataset/loader.py", line 54, in <dictcomp>
batch = {k: self.stack(v[:n]) for k, v in self._buffers.items()}
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/gluonts/dataset/loader.py", line 65, in stack
return mx.nd.array(data, dtype=data.dtype, ctx=self.ctx)
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/mxnet/ndarray/utils.py", line 146, in array
return _array(source_array, ctx=ctx, dtype=dtype)
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/mxnet/ndarray/ndarray.py", line 3284, in array
arr = empty(source_array.shape, ctx, dtype)
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/mxnet/ndarray/ndarray.py", line 4735, in empty
return NDArray(handle=_new_alloc_handle(shape, ctx, False, dtype))
File "/Users/sayon/Documents/Codes/Python/probabilistic_forecast/forecast/lib/python3.7/site-packages/mxnet/ndarray/ndarray.py", line 174, in _new_alloc_handle
ctypes.c_int(int(_DTYPE_NP_TO_MX[np.dtype(dtype).type])),
KeyError: <class 'numpy.object_'>
Вот мой код:
import mxnet as mx
from mxnet import gluon
import numpy as np
import pandas as pd
from datetime import timedelta
# Imports related to GluonTS
from gluonts.distribution.distribution_output import DistributionOutput
from gluonts.distribution.gaussian import GaussianOutput
from gluonts.model.estimator import GluonEstimator
from gluonts.dataset.common import ListDataset
from gluonts.block.scaler import MeanScaler, NOPScaler
from gluonts.model.predictor import Predictor, RepresentableBlockPredictor
from gluonts.core.component import validated
from gluonts.trainer import Trainer
from gluonts.support.util import copy_parameters
from gluonts.dataset.field_names import FieldName
from mxnet.gluon import HybridBlock
from gluonts.transform import (
AddObservedValuesIndicator,
Chain,
Transformation,
ExpectedNumInstanceSampler,
InstanceSplitter
)
# Random Seeds
mx.random.seed(0)
np.random.seed(0)
class MyProbRNN(gluon.HybridBlock):
def __init__(self,
prediction_length,
context_length,
distr_output,
num_cells,
num_layers,
num_sample_paths=100,
scaling=True,
**kwargs
) -> None:
super().__init__(**kwargs)
self.prediction_length = prediction_length
self.context_length = context_length
self.distr_output = distr_output
self.num_cells = num_cells
self.num_layers = num_layers
self.num_sample_paths = num_sample_paths
self.proj_distr_args = distr_output.get_args_proj()
self.scaling = scaling
with self.name_scope():
self.rnn = mx.gluon.rnn.HybridSequentialRNNCell()
for k in range(self.num_layers):
cell = mx.gluon.rnn.LSTMCell(hidden_size=self.num_cells)
cell = mx.gluon.rnn.ResidualCell(cell) if k > 0 else cell
self.rnn.add(cell)
if scaling:
self.scaler = MeanScaler(keepdims=True)
else:
self.scaler = NOPScaler(keepdims=True)
def compute_scale(self, past_target, past_observed_values):
# scale is computed on the context length last units of the past target
# scale shape is (batch_size, 1, *target_shape)
_, scale = self.scaler(
past_target.slice_axis(
axis=1, begin=-self.context_length, end=None
),
past_observed_values.slice_axis(
axis=1, begin=-self.context_length, end=None
),
)
return scale
def unroll_encoder(self,
F,
past_target,
past_observed_values,
future_target=None,
future_observed_values=None):
# overall target field
# input target from -(context_length + prediction_length + 1) to -1
if future_target is not None: # during training
target_in = F.concat(
past_target, future_target, dim=-1
).slice_axis(
axis=1, begin=-(self.context_length + self.prediction_length + 1), end=-1
)
# overall observed_values field
# input observed_values corresponding to target_in
observed_values_in = F.concat(
past_observed_values, future_observed_values, dim=-1
).slice_axis(
axis=1, begin=-(self.context_length + self.prediction_length + 1), end=-1
)
rnn_length = self.context_length + self.prediction_length
else: # during inference
target_in = past_target.slice_axis(
axis=1, begin=-(self.context_length + 1), end=-1
)
# overall observed_values field
# input observed_values corresponding to target_in
observed_values_in = past_observed_values.slice_axis(
axis=1, begin=-(self.context_length + 1), end=-1
)
rnn_length = self.context_length
# compute scale
scale = self.compute_scale(target_in, observed_values_in)
# scale target_in
target_in_scale = F.broadcast_div(target_in, scale)
# compute network output
net_output, states = self.rnn.unroll(
inputs=target_in_scale,
length=rnn_length,
layout="NTC",
merge_outputs=True,
)
return net_output, states, scale
class MyProbTrainRNN(MyProbRNN):
def hybrid_forward(self,
F,
past_target,
future_target,
past_observed_values,
future_observed_values):
net_output, _, scale = self.unroll_encoder(F,
past_target,
past_observed_values,
future_target,
future_observed_values)
# output target from -(context_length + prediction_length) to end
target_out = F.concat(
past_target, future_target, dim=-1
).slice_axis(
axis=1, begin=-(self.context_length + self.prediction_length), end=None
)
# project network output to distribution parameters domain
distr_args = self.proj_distr_args(net_output)
# compute distribution
distr = self.distr_output.distribution(distr_args, scale=scale)
# negative log-likelihood
loss = distr.loss(target_out)
return loss
class MyProbPredRNN(MyProbTrainRNN):
def sample_decoder(self, F, past_target, states, scale):
# repeat fields: from (batch_size, past_target_length) to
# (batch_size * num_sample_paths, past_target_length)
repeated_states = [
s.repeat(repeats=self.num_sample_paths, axis=0)
for s in states
]
repeated_scale = scale.repeat(repeats=self.num_sample_paths, axis=0)
# first decoder input is the last value of the past_target, i.e.,
# the previous value of the first time step we want to forecast
decoder_input = past_target.slice_axis(
axis=1, begin=-1, end=None
).repeat(
repeats=self.num_sample_paths, axis=0
)
# list with samples at each time step
future_samples = []
# for each future time step we draw new samples for this time step and update the state
# the drawn samples are the inputs to the rnn at the next time step
for k in range(self.prediction_length):
rnn_outputs, repeated_states = self.rnn.unroll(
inputs=decoder_input,
length=1,
begin_state=repeated_states,
layout="NTC",
merge_outputs=True,
)
# project network output to distribution parameters domain
distr_args = self.proj_distr_args(rnn_outputs)
# compute distribution
distr = self.distr_output.distribution(distr_args, scale=repeated_scale)
# draw samples (batch_size * num_samples, 1)
new_samples = distr.sample()
# append the samples of the current time step
future_samples.append(new_samples)
# update decoder input for the next time step
decoder_input = new_samples
samples = F.concat(*future_samples, dim=1)
# (batch_size, num_samples, prediction_length)
return samples.reshape(shape=(-1, self.num_sample_paths, self.prediction_length))
def hybrid_forward(self, F, past_target, past_observed_values):
# unroll encoder over context_length
net_output, states, scale = self.unroll_encoder(F,
past_target,
past_observed_values)
samples = self.sample_decoder(F, past_target, states, scale)
return samples
class MyProbRNNEstimator(GluonEstimator):
@validated()
def __init__(
self,
prediction_length: int,
context_length: int,
freq: str,
distr_output: DistributionOutput,
num_cells: int,
num_layers: int,
num_sample_paths: int = 100,
scaling: bool = True,
trainer: Trainer = Trainer()
) -> None:
super().__init__(trainer=trainer)
self.prediction_length = prediction_length
self.context_length = context_length
self.freq = freq
self.distr_output = distr_output
self.num_cells = num_cells
self.num_layers = num_layers
self.num_sample_paths = num_sample_paths
self.scaling = scaling
def create_transformation(self):
# Feature transformation that the model uses for input.
return Chain(
[
AddObservedValuesIndicator(
target_field=FieldName.TARGET,
output_field=FieldName.OBSERVED_VALUES,
),
InstanceSplitter(
target_field=FieldName.TARGET,
is_pad_field=FieldName.IS_PAD,
start_field=FieldName.START,
forecast_start_field=FieldName.FORECAST_START,
train_sampler=ExpectedNumInstanceSampler(num_instances=1),
past_length=self.context_length + 1,
future_length=self.prediction_length,
time_series_fields=[
FieldName.FEAT_DYNAMIC_REAL,
FieldName.OBSERVED_VALUES,
],
),
]
)
def create_training_network(self) -> MyProbTrainRNN:
return MyProbTrainRNN(
prediction_length=self.prediction_length,
context_length=self.context_length,
distr_output=self.distr_output,
num_cells=self.num_cells,
num_layers=self.num_layers,
num_sample_paths=self.num_sample_paths,
scaling=self.scaling
)
def create_predictor(
self, transformation: Transformation, trained_network: HybridBlock
) -> Predictor:
prediction_network = MyProbPredRNN(
prediction_length=self.prediction_length,
context_length=self.context_length,
distr_output=self.distr_output,
num_cells=self.num_cells,
num_layers=self.num_layers,
num_sample_paths=self.num_sample_paths,
scaling=self.scaling
)
copy_parameters(trained_network, prediction_network)
return RepresentableBlockPredictor(
input_transform=transformation,
prediction_net=prediction_network,
batch_size=self.trainer.batch_size,
freq=self.freq,
prediction_length=self.prediction_length,
ctx=self.trainer.ctx,
)
estimator = MyProbRNNEstimator(
prediction_length=24,
context_length=48,
freq="1H",
num_cells=40,
num_layers=2,
distr_output=GaussianOutput(),
trainer=Trainer(ctx="cpu",
epochs=5,
learning_rate=1e-3,
hybridize=False,
num_batches_per_epoch=100
)
)
# df = pd.read_csv('categorized_timeseries.csv', index_col=None, parse_dates=False)
df = pd.read_csv('10216800.csv', index_col=None, names=['Timestamp', 'kw'], parse_dates=True)
prediction_length = 96*7
freq = "15min"
start = pd.Timestamp(df['Timestamp'][0], freq=freq)
target = df['kw'].to_numpy()
fdr = np.empty((len(target), 2))
fdr.fill(1)
# # train dataset: cut the last window of length "prediction_length", add "target" and "start" fields
train_ds = ListDataset([{FieldName.TARGET: target[:-prediction_length], FieldName.START:start, FieldName.FEAT_DYNAMIC_REAL:fdr[:-prediction_length]}], freq=freq)
# # test dataset: use the whole dataset, add "target" and "start" fields
test_ds = ListDataset([{FieldName.TARGET:target[-prediction_length:], FieldName.START:start, FieldName.FEAT_DYNAMIC_REAL:fdr[-prediction_length:]}], freq=freq)
predictor = estimator.train(train_ds)
Чтобы исправить ошибку, я попытался изменить размер пакета, однако, это, кажется, не оказывает влияния. Любое направление о том, как решить эту проблему, поможет.