import math
import numpy as np
import pandas as pd
#from pandas import DataFrame
from sklearn import preprocessing,cross_validation
from sklearn.linear_model import LogisticRegression
#from sklearn.cross_validation import train_test_split
from numpy import loadtxt, where
from pylab import scatter, show, legend, xlabel, ylabel
# scale larger positive and values to between -1,1 depending on the largest
# value in the data
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
df = pd.read_excel("Cryotherapy.xlsx", header=0)
# clean up data
df.columns = ["sex","age","Time","Number_of_Warts", "Type",
"Area","Result_of_Treatment"]
x = df["Result_of_Treatment"]
X = df[["Type","Area",]]
X = np.array(X)
X = min_max_scaler.fit_transform(X)
Y = df["Result_of_Treatment"]
Y = np.array(Y)
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y,
test_size=0.4)
# train scikit learn model
clf = LogisticRegression()
clf.fit(X_train, Y_train)
accuracy = clf.score(X_test,Y_test)
print(accuracy)