Я делаю проект по прогнозированию урожайности с помощью логистической регрессии.
Я успешно объявил все функции, но я не могу подобрать их в соответствии со своими требованиями к набору данных.
Так может кто-нибудь помочь мне с этим?
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import numpy as np
#Importing the datasets
df=pd.read_csv(r'E:\shardul\Projects\Agri yantra\baramati.csv')
X = df.drop(['Production'], axis = 1) #here, we would take all the
columns except 'target' as input vector
y= df['Production']
#Convert the column into categorical columns
crop=pd.get_dummies(X['crop'],drop_first=True)
X=X.drop('crop',axis=1)
# concat the dummy variables
X=pd.concat([X,crop],axis=1)
#Convert the column into categorical columns
region=pd.get_dummies(X['region'],drop_first=True)
X=X.drop('region',axis=1)
# concat the dummy variables
X=pd.concat([X,region],axis=1)
ynewtest = y
xnewtest = X
y = y[:, np.newaxis]#converting the output to an array
print(X)
print('The shape of the input is {}'.format(X.shape)) #printing the shape of the input
print('The shape of the output is {}'.format(y.shape)) #printing the shape of the output
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =
0.33, random_state = 101)
print('The shape of the input training set is {}'.format(X_train.shape))
print('The shape of the output training set is {}'.format(y_train.shape))
print('The shape of the input testing set is {}'.format(X_test.shape))
print('The shape of the output testing set is {}'.format(y_test.shape))
#We are initially defining the sigmoid function that could be used later
def sigmoid(z):
s = 1 / (1 + np.exp(-z))
return s
#This is a function that is used to initialize the weights with 0 and biases also with 0
def initialize_with_zeros(dim):
w = np.zeros((dim, 1))
b = 0
return w, b
#this network ensures that there is a forward propagation and at the same time, returns the cost
def propagate(w, b, X, y):
m = X.shape[0]
A = sigmoid(np.dot(X, w) + b)
cost = -(1 / m) * np.sum(y * np.log(A) + (1 - y) * np.log(1 - A))
#computing the cost function or the error function
dw = (1 / m) * np.dot(X.T, (A - y)) #this is derivative of the cost
function with respect to w
db = (1 / m) * np.sum(A - y) #this is the derivative of the cost
function with respect to b
grads = {'dw': dw, 'db': db} #these values are stored in a
dictionary so as to access them later
return grads, cost
#We are trying to get the parameters w and b after modifying them using the knowledge of the cost function
def optimize(w, b, X, y, num_iterations, learning_rate, print_cost =
False):
costs = [] #This is an empty list created so that it stores all the values later
for i in range(num_iterations):
grads, cost = propagate(w, b, X, y) #we are calling the previously defined function
dw = grads['dw'] #we are accessing the derivatives of cost with respect to w
db = grads['db'] #we are accessing the derivatives of cost with respect to b
w = w - learning_rate * dw #we are modifying the parameter w so that the cost would reduce in the long run
b = b - learning_rate * db #we are modifying the parameter b so that the cost would reduce in the long run
np.squeeze(cost)
if i % 100 == 0:
costs.append(cost) #we are giving all the cost values to the empty list that was created initially
if print_cost and i % 1000 == 0:
print("cost after iteration {}: {}".format(i, cost))
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate = " + str(learning_rate))
plt.show()
params = {'w': w, 'b': db} #we are storing this value in the dictionary so that it could be accessed later
grads = {'dw': dw, 'db': db} #we are storing these valeus in the dictionary so that they could be accessed later
return params, grads, costs
#This is a function that gives 1 if the activation is greater that 0.5 and 0 otherwise
def predict(w, b, X):
m = X.shape[0]
y_prediction = np.zeros((m, 1))
A = sigmoid(np.dot(X, w) + b)
for i in range(A.shape[0]):
if (A[i, 0] <= 0.5):
y_prediction[i, 0] = 0
else:
y_prediction[i, 0] = 1
return y_prediction
def model(X_train, X_test, y_train, y_test, num_iterations,
learning_rate, print_cost = True):
w, b = initialize_with_zeros(X.shape[1])
parameters, grads, costs = optimize(w, b, X, y, num_iterations,
learning_rate, print_cost = True)
w = parameters["w"]
b = parameters["b"]
y_prediction_test = predict(w, b, X_test)
y_prediction_train = predict(w, b, X_train)
print('train accuracy: {}'.format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100))
print('test accuracy: {}'.format("test accuracy: {} %".format(100 -
np.mean(np.abs(y_prediction_test - y_test)) * 100)))
d = {"costs": costs,
"y_prediction_test": y_prediction_test,
"y_prediction_train" : y_prediction_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations": num_iterations}
return d
d = model(X_train, X_test, y_train, y_test, num_iterations = 100,
learning_rate = 0.0015, print_cost = True)
xpred = xnewtest
ypred = ynewtest
i = 10 #play around with this number to access each row in the
training and test set and check the accuracy
xnewpred = xpred.iloc[i]
ynewpred = ypred.iloc[i]
print('The input values of the features are:')
print(xnewpred)
print('Predicted crop yeild is:')
print(float(ynewpred))
xnewpred = xnewpred[:, np.newaxis]
xnewpred = xnewpred.T
ynew = predict(d["w"], d["b"], xnewpred)
print('The output of the predicted value is:')
print(ynew[0][0])
train accuracy: -1172738.8888888888
test accuracy: test accuracy: -2992973.0 %
The input values of the features are:
Area 700.00
Iron\n 9.20
IndexofProductivity 100.58
WaterHoldingCapacity 60.92
FieldCapacity 60.14
PH 7.30
ElectricalConductivity 1.37
Organicmatter 0.93
Calciumcarbonate 10.88
Nitrogen 71.00
phosphorous 29.00
Potassium 229.00
Rainfall 457.00
jowar 1.00
sugarcane 0.00
wheat 0.00
daund 0.00
Name: 10, dtype: float64
Predicted crop yeild is:
21000.0
The output of the predicted value is:
1.0