Question

Я пытаюсь использовать тензор потока для создания агента DQN.Я черпаю вдохновение из этого хранилища: https://github.com/seann999/dodge_tfjs/blob/master/agent.js

Я написал класс Agent, который состоит из двух нейронных сетей, созданных с помощью этой функции:

const DEFAULT_OPTS: any = {
  activation: 'sigmoid',
};

export function createModel(inputSize: number, outputSize: number, opts: any = {}): tf.Model {
  const model = tf.sequential();
  opts = Object.assign({}, DEFAULT_OPTS, opts);
  /* INPUT */
  model.add(
    tf.layers.dense({
      inputDim: inputSize,
      units: 64,
      activation: opts.activation,
    })
  );

  /* HIDDEN */
  model.add(
    tf.layers.dense({
      inputDim: 64,
      units: 128,
      activation: opts.activation,
    })
  );
  model.add(
    tf.layers.dense({
      inputDim: 128,
      units: 64,
      activation: opts.activation,
    })
  );

  /* OUTPUT */
  model.add(
    tf.layers.dense({
      inputDim: 64,
      units: outputSize,
      activation: 'relu',
    })
  );

  return model;
}

И я написал метод обучения, который вычисляетпотеря и попытаться минимизировать ошибку (все еще в значительной степени вдохновленный работой seann999).

  constructor(config: AgentConfig) {
    this.config = Object.assign({}, DEFAULT_CONFIG, config);
    this.memory = new Memory(config.memorySize);
    this.Q = createModel(config.inputSize, config.outputSize);
    this.QTarget = createModel(config.inputSize, config.outputSize);
    this.optimizer = tf.train.adam(this.config.learningRate);
    // Get weights refs
    this.weights = [];
    for (const w of this.Q.weights) {
      this.weights.push((w as any).val);
    }
    this.updateTarget();
  }

...

  public async learn() {
    if (this.stats.learnCount % this.config.refreshTargetEvery === 0) {
      this.updateTarget();
    }
    const batchSize = 32;
    if (this.memory.getLength() > batchSize) {
      const batch = this.memory.getBatch(batchSize);
      // Batch tensors
      const batchState = tf.tensor2d(batch.map((el: any) => el.state)).asType('float32');
      const batchAction = tf
        .oneHot(tf.tensor1d(batch.map((el: any) => actions.indexOf(el.action)), 'int32'), actions.length)
        .asType('float32');
      const batchReward = tf.tensor1d(batch.map((el: any) => el.reward)).asType('float32');
      const batchNextState = tf.tensor2d(batch.map((el: any) => el.nextState)).asType('float32');
      const batchDone = tf.tensor1d(batch.map((el: any) => el.done)).asType('float32');

      // prodict nextState with targetNet
      const targets = this.calcTarget(batchReward, batchNextState, batchDone).asType('float32');

      const loss = this.optimizer.minimize(
        () => {
          const x = tf.variable(batchState);
          const predictions = (this.Q.predict(x) as tf.Tensor).argMax(1).asType('float32');
          return tf.losses.meanSquaredError(targets, predictions) as any;
        },
        true,
        this.weights
      );
      console.log('loss');
      console.log(loss);
    }
    // qMaxNextState = reward + game + this.QTarget.predict()
    this.stats.learnCount++;
    return;
  }

  private calcTarget(batchReward: any, batchNextState: any, batchDone: any) {
    return tf.tidy(() => {
      const maxQ = (this.QTarget.predict(batchNextState) as tf.Tensor).argMax(1).asType('float32');
      const targets = batchReward.add(maxQ.mul(tf.scalar(this.config.rewardDiscount)).mul(batchDone));
      return targets;
    });
  }

Но когда я выполняю свой код, я получаю ошибку.Ошибка возникает во время функции optimizer.minimize.Трассировка:

TypeError: Cannot read property 'values' of undefined
    at NodeJSKernelBackend.getInputTensorIds (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:99:22)
    at NodeJSKernelBackend.executeSingleOutput (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:123:73)
    at NodeJSKernelBackend.subtract (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:248:21)
    at environment_1.ENV.engine.runKernel.$a (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/binary_ops.ts:202:33)
    at /home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:206:22
    at Engine.scopedRun (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:167:19)
    at Engine.runKernel (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:202:10)
    at sub_ (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/binary_ops.ts:201:21)
    at Object.sub (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/operation.ts:46:24)
    at Tensor.sub (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/tensor.ts:842:22)
    at Object.$x (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/unary_ops.ts:372:46)
    at _loop_1 (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/tape.ts:171:43)
    at Object.backpropagateGradients (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/dist/tape.js:112:9)
    at /home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:500:7
    at /home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:156:20
    at Engine.scopedRun (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:167:19)

Я пытался выяснить, почему он это делает, когда я исследую строку кода в тензорном потоке, где выдается проблема.

NodeJSKernelBackend.prototype.getInputTensorIds = function (tensors) {
    var ids = [];
    for (var i = 0; i < tensors.length; i++) {
        var info = this.tensorMap.get(tensors[i].dataId);
        /*if (!info) {
            console.log('tensors[i]')
            console.log(this.tensorMap)
            console.log(tensors[i])
            console.log(info)
         }*/
        if (info.values != null) {
            info.id =
                this.binding.createTensor(info.shape, info.dtype, info.values);
            info.values = null;
            this.tensorMap.set(tensors[i].dataId, info);
        }
        ids.push(info.id);
    }
    return ids;
};

Я вошел в систему, когда информацияимеет значение null, и я вижу тензор

Tensor {
  isDisposedInternal: true,
  shape: [ 32, 64 ],
  dtype: 'float32',
  size: 2048,
  strides: [ 64 ],
  dataId: {},
  id: 1494,
  rankType: '2' }

Но метод tenorMap.get не может получить его, и это делает информацию неопределенной.Я пытаюсь понять, почему это происходит, чтобы исправить мой код, но я изо всех сил.

Я новичок в тензорном потоке, любая помощь может быть отличной.

Спасибо

РЕДАКТИРОВАТЬ: я не знаю, почему, но когда я изменяю свою функцию активации с «сигмоида» на «рэлу», это работает.Если кто-то понимает, я хотел бы знать причину.

Оптимизация tfjs tenorflowjs с проблемой нестандартных потерь getInputTensorIds

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Оптимизация tfjs tenorflowjs с проблемой нестандартных потерь getInputTensorIds

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Похожие темы