Я пытаюсь создать упрощенный пример rl4j на основе существующих примеров Gym и Malmo. Дана синусоида, и ИИ должен сказать, находимся ли мы на вершине волны, внизу или где-то еще (n oop).
SineRider - это «Игра», State - это значение функция синуса (только одна двойная)
Проблема в том, что она никогда не вызывает функцию step в SineRider для получения вознаграждения. Что я не так?
package aiexample
import org.deeplearning4j.gym.StepReply
import org.deeplearning4j.rl4j.learning.sync.qlearning.QLearning
import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.QLearningDiscreteDense
import org.deeplearning4j.rl4j.mdp.MDP
import org.deeplearning4j.rl4j.network.dqn.DQNFactoryStdDense
import org.deeplearning4j.rl4j.space.DiscreteSpace
import org.deeplearning4j.rl4j.space.Encodable
import org.deeplearning4j.rl4j.space.ObservationSpace
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4j.linalg.factory.Nd4j
import org.nd4j.linalg.learning.config.Adam
import kotlin.math.sin
object Example {
var ql: QLearning.QLConfiguration = QLearning.QLConfiguration(
123, //Random seed
1000, //Max step By epoch
8000, //Max step
1000, //Max size of experience replay
32, //size of batches
100, //target update (hard)
0, //num step noop warmup
0.05, //reward scaling
0.99, //gamma
10.0, //td-error clipping
0.1f, //min epsilon
2000, //num step for eps greedy anneal
true //double DQN
var net: DQNFactoryStdDense.Configuration = DQNFactoryStdDense.Configuration.builder()
fun main(args: Array<String>) {
private fun simpleSine() {
val mdp = Env.create()
val dql = QLearningDiscreteDense(mdp, net, ql)
class Action(val name:String) {
companion object {
val noop = Action("noop")
val top = Action("top")
val bottom = Action("bottom")
class State(private val inputs: DoubleArray): Encodable {
override fun toArray(): DoubleArray {
return inputs
class SineObservationSpace: ObservationSpace<State> {
override fun getLow(): INDArray {
return Nd4j.create(doubleArrayOf(-1.0))
override fun getHigh(): INDArray {
return Nd4j.create(doubleArrayOf(1.0))
override fun getName(): String {
return "Discrete"
override fun getShape(): IntArray {
return intArrayOf(1)
class SineRider{
companion object {
val actions = mapOf(
0 to Action.noop,
1 to Action.top,
2 to Action.bottom)
var i = 0.0
fun step(action:Int): Double{
val act = actions[action]
if(act == Action.top){
return if(i > 0.9) 1.0 else -1.0
if(act == Action.bottom){
return if(i < -0.9) 1.0 else -1.0
if(act == Action.noop){
return if(i < 0.9 && i > -0.9) 1.0 else -1.0
return 0.0
fun reset(){
fun next(){
i += 0.1
fun state(): State {
val sine = sin(i)
return State(arrayOf(sine).toDoubleArray())
class Env(private val sineRider: SineRider) : MDP<State, Int, DiscreteSpace> {
private val actionSpace = DiscreteSpace(3)
private var done = false
override fun getObservationSpace(): ObservationSpace<State> {
return SineObservationSpace()
override fun getActionSpace(): DiscreteSpace {
return actionSpace
override fun step(action: Int): StepReply<State> {
val reward = sineRider.step(action)
val state = sineRider.state()
return StepReply(state, reward, true, null)
override fun isDone(): Boolean {
return true
override fun reset(): State? {
done = false
return sineRider.state()
override fun close() {
override fun newInstance(): Env {
return create()
companion object {
fun create() : Env {
val sinRider = SineRider()
return Env(sinRider)