Получение ValueError: не удалось преобразовать строку в число с плавающей точкой: "'H4'" при вводе строки и числового набора данных - PullRequest
0 голосов
/ 25 февраля 2019

Я создаю нейронную сеть для классификации действий моего покерного бота из моей игры в покер.Я использую простой код нейронной сети для выполнения своей задачи.Но когда я помещаю свой собственный набор данных в код, возникает ошибка.Принимают ли нейронные сети строковые и числовые наборы данных, как у меня?

Ошибка говорит:

ValueError: не удалось преобразовать строку в число с плавающей точкой: "'H4'"

Вот мой код:

from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
import numpy
import matplotlib.pyplot as plt
numpy.random.seed(2)

# e load ang dataset
dataset = numpy.loadtxt("monteCarlo.csv", delimiter=",")

# split input (X) and output (Y) variables, splitting csv data
X = dataset[:,0:8]
Y = dataset[:,8]

#split x,y train,test

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2,     random_state=42)

# create model, add dense layers one by one specifying activation function sigmoid

model = Sequential()
model.add(Dense(15, input_dim=8, activation='relu')) # input layer requires     input_dim param
model.add(Dense(10, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dropout(.2))
model.add(Dense(1, activation='sigmoid'))

# compile the model, adam gradient descent (optimized)
# adam or adamax


model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])

# call the function to fit to the data (training the network)
history = model.fit_(x_train, y_train, epochs = 1000, batch_size=20,     validation_data=(x_test, y_test))

# save the model
model.save('pokerClassifier.h5')

#evaluate model
scores = model.evaluate(X, Y, verbose=1)
print('Test loss: ',scores[0])
print('accuracy: ',scores[1]*100 ,'%')


#plot accuracy

plt.figure(1)

plt.plot(history.history['acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.legend(['test','train'], loc='upper left')
plt.show()

plt.figure(2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(['train','test'], loc='upper left')

plt.show()

Вот мой CSV или набор данных:

'H4','D7','D3','C5','C6',0.82,'C'

'H4','D1','D3','C2','C6',0.22,'F'

'H4','D7','D9','C9','C9',0.55,'C'

'H4','D7','D3','C5','C6',0.82,'C'

'H4','D1','D3','C2','C6',0.22,'F'

'H4','D7','D9','C9','C9',0.55,'C'

'H4','D7','D3','C5','C6',0.82,'C'

'H4','D1','D3','C2','C6',0.22,'F'

'H4','D7','D9','C9','C9',0.55,'C'

'H4','D7','A3','C5','C6',0.84,'C'

'H4','D1','D3','C9','C6',0.44,'F'

1 Ответ

0 голосов
/ 26 февраля 2019

loadtxt со строкой dtype:

In [4]: data = np.loadtxt('h4.csv', delimiter=',', dtype='U4')                  
In [5]: data                                                                    
Out[5]: 
array([["'H4'", "'D7'", "'D3'", "'C5'", "'C6'", '0.82', "'C'"],
       ["'H4'", "'D1'", "'D3'", "'C2'", "'C6'", '0.22', "'F'"],
       ["'H4'", "'D7'", "'D9'", "'C9'", "'C9'", '0.55', "'C'"],
       ["'H4'", "'D7'", "'D3'", "'C5'", "'C6'", '0.82', "'C'"],
       ["'H4'", "'D1'", "'D3'", "'C2'", "'C6'", '0.22', "'F'"],
       ["'H4'", "'D7'", "'D9'", "'C9'", "'C9'", '0.55', "'C'"],
       ["'H4'", "'D7'", "'D3'", "'C5'", "'C6'", '0.82', "'C'"],
       ["'H4'", "'D1'", "'D3'", "'C2'", "'C6'", '0.22', "'F'"],
       ["'H4'", "'D7'", "'D9'", "'C9'", "'C9'", '0.55', "'C'"],
       ["'H4'", "'D7'", "'A3'", "'C5'", "'C6'", '0.84', "'C'"],
       ["'H4'", "'D1'", "'D3'", "'C9'", "'C6'", '0.44', "'F'"]],
      dtype='<U4')

genfromtxt с None dtype:

In [7]: data = np.genfromtxt('h4.csv', delimiter=',', dtype=None, encoding=None)
   ...:                                                                         
In [8]: data                                                                    
Out[8]: 
array([("'H4'", "'D7'", "'D3'", "'C5'", "'C6'", 0.82, "'C'"),
       ("'H4'", "'D1'", "'D3'", "'C2'", "'C6'", 0.22, "'F'"),
       ("'H4'", "'D7'", "'D9'", "'C9'", "'C9'", 0.55, "'C'"),
       ("'H4'", "'D7'", "'D3'", "'C5'", "'C6'", 0.82, "'C'"),
       ("'H4'", "'D1'", "'D3'", "'C2'", "'C6'", 0.22, "'F'"),
       ("'H4'", "'D7'", "'D9'", "'C9'", "'C9'", 0.55, "'C'"),
       ("'H4'", "'D7'", "'D3'", "'C5'", "'C6'", 0.82, "'C'"),
       ("'H4'", "'D1'", "'D3'", "'C2'", "'C6'", 0.22, "'F'"),
       ("'H4'", "'D7'", "'D9'", "'C9'", "'C9'", 0.55, "'C'"),
       ("'H4'", "'D7'", "'A3'", "'C5'", "'C6'", 0.84, "'C'"),
       ("'H4'", "'D1'", "'D3'", "'C9'", "'C6'", 0.44, "'F'")],
      dtype=[('f0', '<U4'), ('f1', '<U4'), ('f2', '<U4'), ('f3', '<U4'), ('f4', '<U4'), ('f5', '<f8'), ('f6', '<U3')])
In [9]: data['f0']                                                              
Out[9]: 
array(["'H4'", "'H4'", "'H4'", "'H4'", "'H4'", "'H4'", "'H4'", "'H4'",
       "'H4'", "'H4'", "'H4'"], dtype='<U4')
In [11]: data['f5']                                                             
Out[11]: array([0.82, 0.22, 0.55, 0.82, 0.22, 0.55, 0.82, 0.22, 0.55, 0.84, 0.44])

В нем нет столбцов;вместо этого есть именованные поля.Это структурированный массив.Но обратите внимание, что столбец 'f5' теперь загружается как число с плавающей точкой, в то время как остальные являются строками.

с пандами

In [15]: df = pd.read_csv('h4.csv')                                             
In [16]: df                                                                     
Out[16]: 
   'H4'  'D7'  'D3'  'C5'  'C6'  0.82  'C'
0  'H4'  'D1'  'D3'  'C2'  'C6'  0.22  'F'
1  'H4'  'D7'  'D9'  'C9'  'C9'  0.55  'C'
2  'H4'  'D7'  'D3'  'C5'  'C6'  0.82  'C'
3  'H4'  'D1'  'D3'  'C2'  'C6'  0.22  'F'
4  'H4'  'D7'  'D9'  'C9'  'C9'  0.55  'C'
5  'H4'  'D7'  'D3'  'C5'  'C6'  0.82  'C'
6  'H4'  'D1'  'D3'  'C2'  'C6'  0.22  'F'
7  'H4'  'D7'  'D9'  'C9'  'C9'  0.55  'C'
8  'H4'  'D7'  'A3'  'C5'  'C6'  0.84  'C'
9  'H4'  'D1'  'D3'  'C9'  'C6'  0.44  'F'
In [17]: df.dtypes                                                              
Out[17]: 
'H4'     object
'D7'     object
'D3'     object
'C5'     object
'C6'     object
0.82    float64
'C'      object
dtype: object
In [18]: df.values                                                              
Out[18]: 
array([["'H4'", "'D1'", "'D3'", "'C2'", "'C6'", 0.22, "'F'"],
       ["'H4'", "'D7'", "'D9'", "'C9'", "'C9'", 0.55, "'C'"],
       ["'H4'", "'D7'", "'D3'", "'C5'", "'C6'", 0.82, "'C'"],
       ["'H4'", "'D1'", "'D3'", "'C2'", "'C6'", 0.22, "'F'"],
       ["'H4'", "'D7'", "'D9'", "'C9'", "'C9'", 0.55, "'C'"],
       ["'H4'", "'D7'", "'D3'", "'C5'", "'C6'", 0.82, "'C'"],
       ["'H4'", "'D1'", "'D3'", "'C2'", "'C6'", 0.22, "'F'"],
       ["'H4'", "'D7'", "'D9'", "'C9'", "'C9'", 0.55, "'C'"],
       ["'H4'", "'D7'", "'A3'", "'C5'", "'C6'", 0.84, "'C'"],
       ["'H4'", "'D1'", "'D3'", "'C9'", "'C6'", 0.44, "'F'"]],
      dtype=object)

Обратите внимание, что это object dtype, чтобы приспособить миксстрок и чисел с плавающей запятой (плюс pandas всегда используйте object вместо строчных типов numpy.

Или как нечто ближе к структурированному массиву:

In [19]: df.to_records()                                                        
Out[19]: 
rec.array([(0, "'H4'", "'D1'", "'D3'", "'C2'", "'C6'", 0.22, "'F'"),
           (1, "'H4'", "'D7'", "'D9'", "'C9'", "'C9'", 0.55, "'C'"),
           (2, "'H4'", "'D7'", "'D3'", "'C5'", "'C6'", 0.82, "'C'"),
           (3, "'H4'", "'D1'", "'D3'", "'C2'", "'C6'", 0.22, "'F'"),
           (4, "'H4'", "'D7'", "'D9'", "'C9'", "'C9'", 0.55, "'C'"),
           (5, "'H4'", "'D7'", "'D3'", "'C5'", "'C6'", 0.82, "'C'"),
           (6, "'H4'", "'D1'", "'D3'", "'C2'", "'C6'", 0.22, "'F'"),
           (7, "'H4'", "'D7'", "'D9'", "'C9'", "'C9'", 0.55, "'C'"),
           (8, "'H4'", "'D7'", "'A3'", "'C5'", "'C6'", 0.84, "'C'"),
           (9, "'H4'", "'D1'", "'D3'", "'C9'", "'C6'", 0.44, "'F'")],
          dtype=[('index', '<i8'), ("'H4'", 'O'), ("'D7'", 'O'), ("'D3'", 'O'), ("'C5'", 'O'), ("'C6'", 'O'), ('0.82', '<f8'), ("'C'", 'O')])
...