Я строю модель XGBoost на Sagemaker для набора данных IRIS. У меня есть два файла model.py и train.py следующим образом:
Model.py
:
import boto3, sagemaker
import pandas as pd
import numpy as np
from sagemaker import get_execution_role
from sagemaker.xgboost.estimator import XGBoost
role = get_execution_role()
bucket_name = 'my-bucket-name'
train_prefix = 'iris_data/train'
test_prefix = 'iris_data/test'
session = boto3.Session()
sg_session = sagemaker.Session(session)
# Read training data from S3
train_channel = 's3://{0}/{1}'.format(bucket_name, train_prefix)
data_channels = {'train': train_channel}
hyperparameters = {
'max_leaf_nodes': 30
}
model = XGBoost(entry_point="train.py",
train_instance_type="ml.m4.xlarge",
train_instance_count=1,
role=role,
framework_version = '0.90-2',
sagemaker_session=sg_session,
hyperparameters=hyperparameters)
model.fit(inputs=data_channels, logs=True)
transformer = model.transformer(instance_count=1, instance_type='ml.m4.xlarge')
test_channel = 's3://{0}/{1}'.format(bucket_name, test_prefix)
transformer.transform(test_channel, content_type='text/csv')
print('Waiting for transform job: ' + transformer.latest_transform_job.job_name)
transformer.wait()
batch_output = transformer.output_path
print(batch_output)
train.py
:
from __future__ import print_function
import argparse
import os
import pandas as pd
import pickle
from xgboost import XGBClassifier
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# Hyperparameters are described here. In this simple example we are just including one hyperparameter.
parser.add_argument('--max_leaf_nodes', type=int, default=-1)
# Sagemaker specific arguments. Defaults are set in the environment variables.
parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
args = parser.parse_args()
# Take the set of files and read them all into a single pandas dataframe
input_files = [ os.path.join(args.train, file) for file in os.listdir(args.train) ]
if len(input_files) == 0:
raise ValueError(('There are no files in {}.\n' +
'This usually indicates that the channel ({}) was incorrectly specified,\n' +
'the data specification in S3 was incorrectly specified or the role specified\n' +
'does not have permission to access the data.').format(args.train, "train"))
raw_data = [ pd.read_csv(file, header=None, engine="python") for file in input_files ]
train_data = pd.concat(raw_data)
# labels are in the first column
train_y = train_data.iloc[:,0]
train_X = train_data.iloc[:,1:]
# Here we support a single hyperparameter, 'max_leaf_nodes'. Note that you can add as many
# as your training my require in the ArgumentParser above.
max_leaf_nodes = args.max_leaf_nodes
# Now use scikit-learn's decision tree classifier to train the model.
clf = XGBClassifier(max_depth=10, n_estimators=100, random_state=78432)
clf = clf.fit(train_X, train_y)
# Print the coefficients of the trained classifier, and save the coefficients
pickle.dump(clf, open(os.path.join(args.model_dir, "model.bin"), "wb"))
def model_fn(model_dir):
"""Deserialized and return fitted model
Note that this should have the same name as the serialized model in the main method
"""
model = pickle.load(open(os.path.join(model_dir, "model.bin"), "rb"))
return model
Я не могу использовать XGBoost встроенный контейнер, потому что в конце я хочу использовать инфраструктуру XGBoost для своей работы, где train.py делает гораздо больше, чем просто читает данные из S3. Итак, на данный момент, для тестирования я проверяю это с данными IRIS.
Но когда я выполняю model.py, обучение модели завершается успешно, но при btach-преобразовании происходит сбой со следующей ошибкой:
Waiting for transform job: sagemaker-xgboost-2020-03-31-17-21-48-649
......................Processing /opt/ml/code
Building wheels for collected packages: train
Building wheel for train (setup.py): started
Building wheel for train (setup.py): finished with status 'done'
Created wheel for train: filename=train-1.0.0-py2.py3-none-any.whl size=6872 sha256=dd15ce5260f45f557b284e58a126d91440fb72155eca544c64e4faa9ce48ff38
Stored in directory: /tmp/pip-ephem-wheel-cache-zlsbuj5_/wheels/95/c1/85/65aaf48b35aba88c6e896d2fd04a4b69f1cee0d81ea32993ca
Successfully built train
Installing collected packages: train
Successfully installed train-1.0.0
[2020-03-31 17:25:18 +0000] [38] [INFO] Starting gunicorn 19.10.0
[2020-03-31 17:25:18 +0000] [38] [INFO] Listening at: unix:/tmp/gunicorn.sock (38)
[2020-03-31 17:25:18 +0000] [38] [INFO] Using worker: gevent
[2020-03-31 17:25:18 +0000] [41] [INFO] Booting worker with pid: 41
[2020-03-31 17:25:18 +0000] [45] [INFO] Booting worker with pid: 45
[2020-03-31 17:25:19 +0000] [46] [INFO] Booting worker with pid: 46
[2020-03-31 17:25:19 +0000] [47] [INFO] Booting worker with pid: 47
[2020-03-31:17:25:53:INFO] No GPUs detected (normal if no gpus installed)
[2020-03-31:17:25:53:INFO] Installing module with the following command:
/miniconda3/bin/python -m pip install .
Processing /opt/ml/code
Building wheels for collected packages: train
Building wheel for train (setup.py): started
Building wheel for train (setup.py): finished with status 'done'
Created wheel for train: filename=train-1.0.0-py2.py3-none-any.whl size=6871 sha256=e8f227b103bf75716d7967683595b4e6d5caacd312a79b5231b8f653225be8d0
Stored in directory: /tmp/pip-ephem-wheel-cache-hlc0kry6/wheels/95/c1/85/65aaf48b35aba88c6e896d2fd04a4b69f1cee0d81ea32993ca
Successfully built train
Installing collected packages: train
Attempting uninstall: train
Found existing installation: train 1.0.0
Uninstalling train-1.0.0:
Successfully uninstalled train-1.0.0
Successfully installed train-1.0.0
169.254.255.130 - - [31/Mar/2020:17:25:55 +0000] "GET /ping HTTP/1.1" 200 0 "-" "Go-http-client/1.1"
[2020-03-31:17:25:55:INFO] No GPUs detected (normal if no gpus installed)
[2020-03-31:17:25:55:INFO] Installing module with the following command:
/miniconda3/bin/python -m pip install .
Processing /opt/ml/code
Building wheels for collected packages: train
Building wheel for train (setup.py): started
Building wheel for train (setup.py): finished with status 'done'
Created wheel for train: filename=train-1.0.0-py2.py3-none-any.whl size=6870 sha256=dab9513d234f721f798249797424c388f0659588903c01880dc21811e1bf4ea5
Stored in directory: /tmp/pip-ephem-wheel-cache-j30gnab9/wheels/95/c1/85/65aaf48b35aba88c6e896d2fd04a4b69f1cee0d81ea32993ca
Successfully built train
Installing collected packages: train
Attempting uninstall: train
Found existing installation: train 1.0.0
Uninstalling train-1.0.0:
Successfully uninstalled train-1.0.0
Successfully installed train-1.0.0
169.254.255.130 - - [31/Mar/2020:17:25:56 +0000] "GET /execution-parameters HTTP/1.1" 404 232 "-" "Go-http-client/1.1"
[2020-03-31:17:25:56:INFO] Determined delimiter of CSV input is ','
[2020-03-31:17:25:56:ERROR] Exception on /invocations [POST]
TypeError: float() argument must be a string or a number, not 'list'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/miniconda3/lib/python3.6/site-packages/sagemaker_containers/_functions.py", line 93, in wrapper
return fn(*args, **kwargs)
File "/miniconda3/lib/python3.6/site-packages/sagemaker_xgboost_container/serving.py", line 55, in default_input_fn
return xgb_encoders.decode(input_data, content_type)
File "/miniconda3/lib/python3.6/site-packages/sagemaker_xgboost_container/encoder.py", line 121, in decode
return decoder(obj)
File "/miniconda3/lib/python3.6/site-packages/sagemaker_xgboost_container/encoder.py", line 50, in csv_to_dmatrix
np_payload = np.array(list(map(lambda x: _clean_csv_string(x, delimiter), string_like.split('\n')))).astype(dtype)
ValueError: setting an array element with a sequence.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/miniconda3/lib/python3.6/site-packages/flask/app.py", line 2446, in wsgi_app
response = self.full_dispatch_request()
File "/miniconda3/lib/python3.6/site-packages/flask/app.py", line 1951, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/miniconda3/lib/python3.6/site-packages/flask/app.py", line 1820, in handle_user_exception
reraise(exc_type, exc_value, tb)
File "/miniconda3/lib/python3.6/site-packages/flask/_compat.py", line 39, in reraise
raise value
File "/miniconda3/lib/python3.6/site-packages/flask/app.py", line 1949, in full_dispatch_request
rv = self.dispatch_request()
File "/miniconda3/lib/python3.6/site-packages/flask/app.py", line 1935, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/miniconda3/lib/python3.6/site-packages/sagemaker_containers/_transformer.py", line 200, in transform
self._model, request.content, request.content_type, request.accept
File "/miniconda3/lib/python3.6/site-packages/sagemaker_containers/_transformer.py", line 227, in _default_transform_fn
data = self._input_fn(content, content_type)
File "/miniconda3/lib/python3.6/site-packages/sagemaker_containers/_functions.py", line 95, in wrapper
six.reraise(error_class, error_class(e), sys.exc_info()[2])
File "/miniconda3/lib/python3.6/site-packages/six.py", line 702, in reraise
raise value.with_traceback(tb)
File "/miniconda3/lib/python3.6/site-packages/sagemaker_containers/_functions.py", line 93, in wrapper
return fn(*args, **kwargs)
File "/miniconda3/lib/python3.6/site-packages/sagemaker_xgboost_container/serving.py", line 55, in default_input_fn
return xgb_encoders.decode(input_data, content_type)
File "/miniconda3/lib/python3.6/site-packages/sagemaker_xgboost_container/encoder.py", line 121, in decode
return decoder(obj)
File "/miniconda3/lib/python3.6/site-packages/sagemaker_xgboost_container/encoder.py", line 50, in csv_to_dmatrix
np_payload = np.array(list(map(lambda x: _clean_csv_string(x, delimiter), string_like.split('\n')))).astype(dtype)
sagemaker_containers._errors.ClientError: setting an array element with a sequence.
169.254.255.130 - - [31/Mar/2020:17:25:56 +0000] "POST /invocations HTTP/1.1" 500 290 "-" "Go-http-client/1.1"
Если я изменяю свой фреймворк с XGBoost и SKLearn на запустить модель DecisionTree, все работает отлично, и я могу видеть результаты прогнозирования. Пожалуйста, дайте мне знать, что мне здесь не хватает и как это исправить.