Я играю со стеком Elasti c - используйте Filebeat для сбора .log
файлов в Elasticsearch (и визуализации через Kibana). Однако я вижу, что данные, хранящиеся в Elasticsearch, имеют размер довольно большой для простой одной строки необработанного журнала.
Например, файл .log
может содержать строку (которая занимает меньше 30 байт):
INFO This is a line of log
Но IMHO Filebeat преобразует его примерно в следующее. Привет, , это довольно круто!
{
"_index": "filebeat-7.8.0-2020.08.04-000001",
"_type": "_doc",
"_id": "1nSruXMBx-9kAPowKFuj",
"_score": 1,
"_source": {
"@timestamp": "2020-08-04T13:31:06.863Z",
"agent": {
"version": "7.8.0",
"hostname": "minikube",
"ephemeral_id": "ad8494d4-f9de-434f-bc33-25a1d456062b",
"id": "15191fd6-5274-430a-b66a-d6d772529892",
"name": "minikube",
"type": "filebeat"
},
"message": "INFO This is a line of log",
"log": {
"file": {
"path": "/var/log/containers/storage-provisioner_kube-system_storage-provisioner-db4f5b692edf2b06d3a65438168f937e8160ff808ced837a2b1ce41d415fccb9.log"
},
"offset": 25811285
},
"stream": "stderr",
"input": {
"type": "container"
},
"kubernetes": {
"namespace": "kube-system",
"labels": {
"addonmanager_kubernetes_io/mode": "Reconcile",
"integration-test": "storage-provisioner"
},
"container": {
"name": "storage-provisioner",
"image": "gcr.io/k8s-minikube/storage-provisioner:v1.8.1"
},
"node": {
"name": "minikube"
},
"pod": {
"uid": "eb68d1c7-70c4-4ba7-8878-79419b3d57a7",
"name": "storage-provisioner"
}
},
"ecs": {
"version": "1.5.0"
},
"host": {
"mac": [
"02:42:ff:0e:4e:ae",
"36:e6:7a:83:ca:d5",
"76:bf:3e:91:6b:65",
"6a:62:f0:dc:c2:d6",
"3e:5a:f5:4e:61:85",
"02:42:ac:11:00:02",
"e6:dc:92:3d:09:95",
"1e:27:ff:d1:d2:2f",
"c6:63:33:21:38:19",
"de:19:b8:02:f0:e7",
"ca:cf:25:b4:12:e4"
],
"name": "minikube",
"hostname": "minikube",
"architecture": "x86_64",
"os": {
"codename": "Core",
"platform": "centos",
"version": "7 (Core)",
"family": "redhat",
"name": "CentOS Linux",
"kernel": "4.19.76-linuxkit"
},
"id": "83a8f1f835d84a9a9bf5417cecaf0c8e",
"containerized": true,
"ip": [
"172.18.0.1",
"172.17.0.2"
]
}
},
"fields": {
"cef.extensions.flexDate1": [],
"netflow.flow_end_microseconds": [],
"netflow.system_init_time_milliseconds": [],
"netflow.flow_end_nanoseconds": [],
"misp.observed_data.last_observed": [],
"netflow.max_flow_end_microseconds": [],
"file.mtime": [],
"aws.cloudtrail.user_identity.session_context.creation_date": [],
"netflow.min_flow_start_seconds": [],
"misp.intrusion_set.first_seen": [],
"file.created": [],
"misp.threat_indicator.valid_from": [],
"process.parent.start": [],
"azure.auditlogs.properties.activity_datetime": [],
"crowdstrike.event.ProcessStartTime": [],
"zeek.ocsp.update.this": [],
"crowdstrike.event.IncidentStartTime": [],
"netflow.observation_time_microseconds": [],
"event.start": [],
"cef.extensions.agentReceiptTime": [],
"cef.extensions.oldFileModificationTime": [],
"checkpoint.subs_exp": [],
"event.end": [],
"netflow.max_flow_end_milliseconds": [],
"netflow.min_flow_start_nanoseconds": [],
"zeek.smb_files.times.changed": [],
"crowdstrike.event.StartTimestamp": [],
"netflow.flow_start_nanoseconds": [],
"netflow.flow_start_seconds": [],
"crowdstrike.event.ProcessEndTime": [],
"zeek.x509.certificate.valid.until": [],
"misp.observed_data.first_observed": [],
"netflow.exporter.timestamp": [],
"netflow.monitoring_interval_start_milli_seconds": [],
"cef.extensions.oldFileCreateTime": [],
"event.ingested": [],
"@timestamp": [
"2020-08-04T13:31:06.863Z"
],
"zeek.ocsp.update.next": [],
"crowdstrike.event.UTCTimestamp": [],
"tls.server.not_before": [],
"cef.extensions.startTime": [],
"netflow.min_flow_start_milliseconds": [],
"azure.signinlogs.properties.created_at": [],
"cef.extensions.endTime": [],
"suricata.eve.tls.notbefore": [],
"zeek.kerberos.valid.from": [],
"cef.extensions.fileCreateTime": [],
"misp.threat_indicator.valid_until": [],
"crowdstrike.event.EndTimestamp": [],
"misp.campaign.last_seen": [],
"cef.extensions.deviceReceiptTime": [],
"netflow.observation_time_seconds": [],
"crowdstrike.metadata.eventCreationTime": [],
"cef.extensions.fileModificationTime": [],
"tls.client.not_before": [],
"zeek.smb_files.times.created": [],
"zeek.smtp.date": [],
"netflow.collection_time_milliseconds": [],
"zeek.pe.compile_time": [],
"netflow.max_flow_end_seconds": [],
"tls.client.not_after": [],
"netflow.flow_start_milliseconds": [],
"event.created": [],
"package.installed": [],
"zeek.kerberos.valid.until": [],
"suricata.eve.flow.end": [],
"netflow.observation_time_milliseconds": [],
"netflow.flow_start_microseconds": [],
"tls.server.not_after": [],
"netflow.flow_end_seconds": [],
"process.start": [],
"suricata.eve.tls.notafter": [],
"zeek.snmp.up_since": [],
"azure.enqueued_time": [],
"netflow.max_flow_end_nanoseconds": [],
"misp.intrusion_set.last_seen": [],
"netflow.min_flow_start_microseconds": [],
"netflow.observation_time_nanoseconds": [],
"cef.extensions.managerReceiptTime": [],
"file.accessed": [],
"netflow.flow_end_milliseconds": [],
"misp.campaign.first_seen": [],
"netflow.min_export_seconds": [],
"suricata.eve.flow.start": [],
"suricata.eve.timestamp": [
"2020-08-04T13:31:06.863Z"
],
"cef.extensions.deviceCustomDate1": [],
"cef.extensions.deviceCustomDate2": [],
"netflow.monitoring_interval_end_milli_seconds": [],
"file.ctime": [],
"crowdstrike.event.IncidentEndTime": [],
"zeek.smb_files.times.accessed": [],
"zeek.ocsp.revoke.time": [],
"zeek.x509.certificate.valid.from": [],
"netflow.max_export_seconds": [],
"zeek.smb_files.times.modified": [],
"kafka.block_timestamp": [],
"misp.report.published": []
}
}
ИМХО, это приведет к тому, что файлы журнала, проиндексированные в Elasticsearch (с Filebeat), будут на будут> 10x больше , чем необработанные .log
файлы. Это совершенно неприемлемо, поскольку дисковое пространство стоит денег.
Есть предложения, как этого избежать? Или я ошибаюсь? Спасибо за любые идеи!