Чтобы узнать владельца каждого EC2 instance
, я запрашиваю cloudtrail logs
, хранящийся в S3, по Athena
.
У меня есть таблица в Афинах со следующей структурой:
CREATE EXTERNAL TABLE cloudtrail_logs (
eventversion STRING,
useridentity STRUCT<
type:STRING,
principalid:STRING,
arn:STRING,
accountid:STRING,
invokedby:STRING,
accesskeyid:STRING,
userName:STRING,
sessioncontext:STRUCT<
attributes:STRUCT<
mfaauthenticated:STRING,
creationdate:STRING>,
sessionissuer:STRUCT<
type:STRING,
principalId:STRING,
arn:STRING,
accountId:STRING,
userName:STRING>>>,
eventtime STRING,
eventsource STRING,
eventname STRING,
awsregion STRING,
sourceipaddress STRING,
useragent STRING,
errorcode STRING,
errormessage STRING,
requestparameters STRING,
responseelements STRING,
additionaleventdata STRING,
requestid STRING,
eventid STRING,
resources ARRAY<STRUCT<
ARN:STRING,
accountId:STRING,
type:STRING>>,
eventtype STRING,
apiversion STRING,
readonly STRING,
recipientaccountid STRING,
serviceeventdetails STRING,
sharedeventid STRING,
vpcendpointid STRING
)
PARTITIONED BY (account string, region string, year string)
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde'
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://<BUCKET>/AWSLogs/';
Я хочу найти личность пользователя, который запускает экземпляры EC2, поэтому мне нужно проанализировать поле responseelements
и получить только строки с responseelements
, которые имеют конкретное instanceID
.
поле responseelements
выглядит так:
{
"requestId":"cab34472-31cc-44cd-ae32-a84077e55cb6",
"reservationId":"r-05964c8549788ac50",
"ownerId":"xxxxxxxxxx",
"groupSet":{},
"instancesSet":{
"items":[
{"instanceId":"i-043543cb4c12",
"imageId":"ami-078df974",
"instanceState":{"code":0,"name":"pending"},
"privateDnsName":"ip-444444.eu-west-1.compute.internal",
"keyName":"key-dev","amiLaunchIndex":0,"productCodes":{},
"instanceType":"t2.large",
"launchTime":1488438050000,
"placement":{"availabilityZone":"eu-west-1b","tenancy":"default"},
"monitoring":{"state":"pending"},
"subnetId":"subnet-d8fffff",
"vpcId":"vpc-444435",
"privateIpAddress":"10.0.42.49",
"stateReason":{"code":"pending","message":"pending"},
"architecture":"x86_64",
"rootDeviceType":"ebs",
"rootDeviceName":"/dev/xvda",
"blockDeviceMapping":{},
"virtualizationType":"hvm",
"hypervisor":"xen",
"clientToken":"c6e53004-c561-437d-a642-196489ff297c_subnet-fffffffff",
"groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
"sourceDestCheck":true,
"networkInterfaceSet":{
"items":[
{"networkInterfaceId":"eni-b16b66f0",
"subnetId":"subnet-dffffff",
"vpcId":"vpc-50fffff35",
"ownerId":"xxxxxxxx",
"status":"in-use",
"macAddress":"fdsfdsfsdfqdsf",
"privateIpAddress":"10.0.42.34234213",
"privateDnsName":"ip-1dddddd.eu-west-1.compute.internal",
"sourceDestCheck":true,
"groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
"attachment":{"attachmentId":"eni-attach-45619121","deviceIndex":0,"status":"attaching","attachTime":1488438050000,"deleteOnTermination":true},
"privateIpAddressesSet":{"item":[{"privateIpAddress":"10ffffff","privateDnsName":"ip-ffffff.eu-west-1.compute.internal","primary":true}]},
"ipv6AddressesSet":{},
"tagSet":{}}]}
,"iamInstanceProfile":{"arn":"arn:aws:iam::xxxxx:instance-profile/infra-EC2InstanceProfile-1D59C5YR0LIYJ","id":"eeeeeeeeeeeeeeeeee"},
"ebsOptimized":false}
]
},
"requesterId":"226008221399"
}
Это мой запрос, который я пробовал:
SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instanceId') as instance_id
FROM cloudtrail_logs
WHERE account = 'xxxxxxxxxxxxxxx'
AND eventname = 'RunInstances';
но это дает instance_id
как пустой столбец.Как правильно получить только instance_id
из resposneelement
?