В настоящее время я создаю облачную задачу, которая будет периодически импортировать новые данные в набор данных automl. Целью является http-цель облачной функции GCP. Поскольку я не хочу жестко кодировать идентификатор набора данных в облачной функции. Я хочу, чтобы он принимал идентификатор набора данных из веб-интерфейса. Поэтому я набираю код для flask таким образом.
@app.route('/train_model', methods=["POST", "GET"])
def train_model():
if request.method == 'POST':
form = request.form
model = form.get('model_name')
date = form.get('date')
dataset_id=form.get('dataset_id')
datetime_object = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
timezone = pytz.timezone('Asia/Hong_Kong')
timezone_date_time_obj = timezone.localize(datetime_object)
# Create a client.
client = tasks_v2beta3.CloudTasksClient.from_service_account_json(
"xxx.json")
# TODO(developer): Uncomment these lines and replace with your values.
project = 'xxxx'
dataset_id = dataset_id
utf = str(dataset_id, encoding='utf-8')
location = 'us-west2'
url='https://us-central1-cloudfunction.cloudfunctions.net/create_csv/' + "?dataset_id=dataset_id"
queue1 = 'testing-queue'
parent = client.queue_path(project, location, queue1)
task = {
"http_request": {
'http_method': 'POST',
'url': url
}}
# set schedule time
timestamp = timestamp_pb2.Timestamp()
timestamp.FromDatetime(timezone_date_time_obj)
task['schedule_time'] = timestamp
response = client.create_task(parent, task)
print(response)
return redirect(url_for('dataset'))
код облачной функции
import pandas
from google.cloud import datastore
from google.cloud import storage
from google.cloud import automl
project_id=123456995
compute_region='us-central1'
def create_csv(dataset_id):
datastore_client = datastore.Client() #
data = datastore_client.query(kind='{}label'.format(dataset_id))
storage_client = storage.Client()
metadata = list(data.fetch())
path = []
label = []
for result in metadata:
path.append(result['Storage_url'])
label.append(result['label'])
record = {
'Path': path,
'Label': label
}
table = pandas.DataFrame(record)
csv_pandas = table.to_csv('/tmp/label.csv', header=None, index=None) #create csv through query datatore
# upload to cloud storage bucket
bucket_name1='testing'
destination_blob_name='label.csv'
bucket = storage_client.bucket(bucket_name1)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename('/tmp/label.csv')
object = bucket.get_blob(destination_blob_name)
bucket = object.bucket
bucket_name = bucket.name
url = 'gs://' + bucket_name + '/' + object.name
#import data to the dataset
client= automl.AutoMlClient()
dataset_full_id = client.dataset_path(
project_id, "us-central1", dataset_id
)
# Get the multiple Google Cloud Storage URIs
input_uris = url.split(",")
gcs_source = automl.types.GcsSource(input_uris=input_uris)
input_config = automl.types.InputConfig(gcs_source=gcs_source)
# Import data from the input URI
client.import_data(dataset_full_id, input_config)
, но это дает мне эту ошибку.
Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 346, in run_http_function
result = _function_handler.invoke_user_function(flask.request)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 217, in invoke_user_function
return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 210, in call_user_function
return self._user_function(request_or_event)
File "/user_code/main.py", line 52, in create_csv
client.import_data(dataset_full_id, input_config)
File "/env/local/lib/python3.7/site-packages/google/cloud/automl_v1/gapic/auto_ml_client.py", line 793, in import_data
request, retry=retry, timeout=timeout, metadata=metadata
File "/env/local/lib/python3.7/site-packages/google/api_core/gapic_v1/method.py", line 143, in __call__
return wrapped_func(*args, **kwargs)
File "/env/local/lib/python3.7/site-packages/google/api_core/retry.py", line 286, in retry_wrapped_func
on_error=on_error,
File "/env/local/lib/python3.7/site-packages/google/api_core/retry.py", line 184, in retry_target
return target()
File "/env/local/lib/python3.7/site-packages/google/api_core/timeout.py", line 214, in func_with_timeout
return func(*args, **kwargs)
File "/env/local/lib/python3.7/site-packages/google/api_core/grpc_helpers.py", line 59, in error_remapped_callable
six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.InvalidArgument: 400 List of found errors: 1.Field: name; Message: Required field is invalid