Это решение работает для меня.
import boto3
import pandas as pd
import io
import pyarrow
import fastparquet
def dynamically_read_filename_key(bucket, prefix='', suffix=''):
s3 = boto3\
.client("s3",\
region_name=os.environ['AWS_DEFAULT_REGION'],\
aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],\
aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
kwargs = {'Bucket': bucket}
if isinstance(prefix, str):
kwargs['Prefix'] = prefix
resp = s3\
.list_objects_v2(**kwargs)
for obj in resp['Contents']:
key = obj['Key']
if key.startswith(prefix) and key.endswith(suffix):
return key
filename = "".join(i for i in dynamically_read_filename_key\
(bucket="my-bucket",\
prefix="datasets/",\
suffix=".parquet"))
bucket = "my-bucket"
def parquet_read_filename_key(bucket, filename):
client = boto3\
.resource("s3",\
region_name=os.environ['AWS_DEFAULT_REGION'],\
aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],\
aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
buffer = io.BytesIO()
obj = client.Object(bucket, filename)
obj.download_fileobj(buffer)
df = pd.read_parquet(buffer)
return df
df = parquet_read_filename_key(bucket, filename)