Я пытаюсь определить оценку и окончательные конформные прогнозы для моей модели с моими данными. Но это дает мне следующую ошибку:
Ошибка
Traceback (most recent call last):
File "/home/maria/CP/scripts/Conformity_PredictionsV4.py", line 89, in <module>
icp.fit(X_train, y_train)
File "/home/maria/.local/lib/python3.8/site-packages/sklearn/utils/__init__.py", line 454, in _get_column_indices
raise ValueError(
ValueError: A given column is not a column of the dataframe
Пример кода
from sklearn.tree import DecisionTreeRegressor
from nonconformist.cp import IcpRegressor
from nonconformist.base import RegressorAdapter
from nonconformist.nc import RegressorNc, AbsErrorErrFunc, RegressorNormalizer, NcFactory
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
# -----------------------------------------------------------------------------
# Load Environment and Models
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Setup training, calibration and test data
# -----------------------------------------------------------------------------
df = pd.read_csv ("prepared_data.csv")
# Initial split into train/test data
train = df.loc[df['split']== 'train']
valid = df.loc[df['split']== 'valid']
# Proper Validation Set (Split the Validation set into features and target)
X_valid = valid.drop(['expression'], axis = 1)
y_valid = valid.drop(columns = ['new_host', 'split', 'sequence'])
# Create Training Set (Split the Training set into features and target)
X_train = valid.drop(['expression'], axis = 1)
y_train = valid.drop(columns = ['new_host', 'split', 'sequence'])
# Split Training set into further training set and calibration set
X_train, X_cal, y_train, y_cal = train_test_split(X_train, y_train, test_size =0.2)
# -----------------------------------------------------------------------------
# Train and calibrate underlying model
# -----------------------------------------------------------------------------
underlying_model = RegressorAdapter(DecisionTreeRegressor(min_samples_leaf=5))
print("Underlying model loaded")
model = RegressorAdapter(underlying_model)
nc = RegressorNc(model, AbsErrorErrFunc())
print("Nonconformity Function Applied")
icp = IcpRegressor(nc) # Create an inductive conformal Regressor
print("ICP Regressor Created")
#Dataset Review
print('{} instances, {} features, {} classes'.format(y_train.size,
X_train.shape[1],
np.unique(y_train).size))
icp.fit(X_train, y_train)
Пример Dataframe
new_host split sequence expression
FALSE train AQVPYGVS 0.039267878
FALSE train ASVPYGVSI 0.039267878
FALSE train STNLYGSGR 0.261456561
FALSE valid NLYGSGLVR 0.265188519
FALSE valid SLGPSNLYG 0.419680588
FALSE valid ATSLGTTNG 0.145710993
Я пробовал разделить набор данных разными способами, но у меня по-прежнему возникают проблемы с этим. В этом случае я хочу разделить данные на обучающие и тестовые наборы в соответствии со значением Data Split наблюдения. После этого я разделю набор поездов на поезд и калибровку на втором этапе. Где myfeatures, X_train и моя цель, y_train