Я пишу код для логистики c регресс с нуля. Я использую массивы numpy и выполняю над ними операции. При печати вычисленной стоимости иногда я получаю [[inf]] и [[nan]] в выводе. Как я могу избежать этого? Код здесь:
import numpy as np
import pandas as pd
from random import shuffle
def sigmoid(x):
return 1.0/(1 + np.exp(-x))
data = pd.read_csv("ChurnData.csv")
features = data[['tenure','age','address','income','ed','employ','churn']]
features = features.values.tolist()
shuffle(features)
raw_train, raw_test = features[:int(len(features) * 0.8)], features[int(len(features) * 0.8):]
X_train = []; y_train = []; X_test = []; y_test = []
for i in raw_train:
X_train.append(i[:-1])
y_train.append(i[-1])
X_train = np.asarray(X_train).reshape((len(X_train), len(X_train[0])))
y_train = np.asarray(y_train).reshape((len(y_train), 1))
for i in raw_test:
X_test.append(i[:-1])
y_test.append(i[-1])
X_test = np.asarray(X_test).reshape((len(X_test), len(X_test[0])))
y_test = np.asarray(y_test).reshape((len(y_test), 1))
weights = np.zeros((6, 1))
epoch = 100
for i in range(epoch):
yhat = sigmoid(np.dot(X_train, weights))
cost = -np.dot(y_train.T, np.log(yhat)) - np.dot((1- y_train).T, np.log(1 - yhat))
cost = cost/X_train.shape[0]
print(cost)
dw = np.dot(X_train.T, (yhat - y_train))
dw = dw/X_train.shape[0]
weights -= 0.01 * dw