Я новичок в tenorflow. Я прочитал несколько статей и попытался использовать его для классификации текста. Набор данных представляет собой файл Excel с 2000 записями. Несколько записей я показал ниже. Я хочу сделать классификатор текста, который может классифицировать предложения по коду
sentences code
Washigton is the capital of America 98876
I love Pizza 54321
I love KFC 86543
White House is in America 89354
Canada has a very cold winters 34567
Washigton is the capital of America 98876
I love Pizza 54321
I love KFC 86543
White House is in America 89354
Washigton is the capital of America 98876
I love Pizza 54321
I love KFC 86543
White House is in America 89354
Canada has a very cold winters 34567
Washigton is the capital of America 98876
I love Pizza 54321
I love KFC 86543
White House is in America 89354
Ниже приведен код, который я использую:
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import tensorflow as tf
import pickle
# reading the excel file CPT_Dataset temp
xl = pd.ExcelFile("test.xlsx")
df = xl.parse('Sheet1') # Query 2.2 Sheet1
# shuffling the data
df = df.sample(frac=1).reset_index(drop=True)
X_train, X_test, y_train, y_test = train_test_split(
df['sentences'], df['code'], random_state=0) # splitting the data
count_vect = CountVectorizer().fit(X_train)
X_train_counts = count_vect.transform(X_train)
tfidf_transformer = TfidfTransformer().fit(X_train_counts)
X_train_tfidf = tfidf_transformer.transform(X_train_counts) # fitting the data
count_vect_test = CountVectorizer().fit(X_test)
X_test_counts = count_vect_test.transform(X_test)
tfidf_transformer_test = TfidfTransformer().fit(X_test_counts)
X_test_tfidf = tfidf_transformer_test.transform(X_test_counts)
#Defining a Multilayer Perceptron Model
def model(x, weights, bias):
layer_1 = tf.add(tf.matmul(x, weights["hidden"]), bias["hidden"])
layer_1 = tf.nn.relu(layer_1)
output_layer = tf.matmul(layer_1, weights["output"]) + bias["output"]
return output_layer
#hyperparameter
learning_rate = 0.01
training_epochs = 20
batch_size = 200
#Network parameters
n_input = 5
n_hidden = 10
n_output = 5
#Graph Nodes
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_output])
#Weights and Biases
weights = {
"hidden" : tf.Variable(tf.random_normal([n_input, n_hidden]), name="weight_hidden"),
"output" : tf.Variable(tf.random_normal([n_hidden, n_output]), name="weight_output")
}
bias = {
"hidden" : tf.Variable(tf.random_normal([n_hidden]), name="bias_hidden"),
"output" : tf.Variable(tf.random_normal([n_output]), name="bias_output")
}
#Define model
pred = model(X, weights, bias)
#Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
#Initializing global variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
_, c = sess.run([optimizer, cost], feed_dict={X: X_train_tfidf, Y: y_train})
if(epoch + 1) % batch_size == 0:
print ("Epoch: ", (epoch+1), "Cost: ", c)
print("Optimization Finished!")
test_result = sess.run(pred, feed_dict={X: X_test_tfidf})
correct_pred = tf.equal(tf.argmax(test_result, 1), tf.argmax(y_train, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, "float"))
print ("Accuracy:", accuracy.eval({X: X_test_tfidf, Y: y_test}))
end_time = time.time()
print ("Completed in ", end_time - start_time , " seconds")
Я получаю ошибку:
ValueError: setting an array element with a sequence.
Могу ли я узнать, как можно исправить свой код, чтобы он классифицировал предложения по коду