Elasti c Stack (Filebeat собирает журналы в Elasticsearch) тратит много дискового пространства? - PullRequest
0 голосов
/ 04 августа 2020

Я играю со стеком Elasti c - используйте Filebeat для сбора .log файлов в Elasticsearch (и визуализации через Kibana). Однако я вижу, что данные, хранящиеся в Elasticsearch, имеют размер довольно большой для простой одной строки необработанного журнала.

Например, файл .log может содержать строку (которая занимает меньше 30 байт):

INFO This is a line of log

Но IMHO Filebeat преобразует его примерно в следующее. Привет, , это довольно круто!

{
  "_index": "filebeat-7.8.0-2020.08.04-000001",
  "_type": "_doc",
  "_id": "1nSruXMBx-9kAPowKFuj",
  "_score": 1,
  "_source": {
    "@timestamp": "2020-08-04T13:31:06.863Z",
    "agent": {
      "version": "7.8.0",
      "hostname": "minikube",
      "ephemeral_id": "ad8494d4-f9de-434f-bc33-25a1d456062b",
      "id": "15191fd6-5274-430a-b66a-d6d772529892",
      "name": "minikube",
      "type": "filebeat"
    },
    "message": "INFO This is a line of log",
    "log": {
      "file": {
        "path": "/var/log/containers/storage-provisioner_kube-system_storage-provisioner-db4f5b692edf2b06d3a65438168f937e8160ff808ced837a2b1ce41d415fccb9.log"
      },
      "offset": 25811285
    },
    "stream": "stderr",
    "input": {
      "type": "container"
    },
    "kubernetes": {
      "namespace": "kube-system",
      "labels": {
        "addonmanager_kubernetes_io/mode": "Reconcile",
        "integration-test": "storage-provisioner"
      },
      "container": {
        "name": "storage-provisioner",
        "image": "gcr.io/k8s-minikube/storage-provisioner:v1.8.1"
      },
      "node": {
        "name": "minikube"
      },
      "pod": {
        "uid": "eb68d1c7-70c4-4ba7-8878-79419b3d57a7",
        "name": "storage-provisioner"
      }
    },
    "ecs": {
      "version": "1.5.0"
    },
    "host": {
      "mac": [
        "02:42:ff:0e:4e:ae",
        "36:e6:7a:83:ca:d5",
        "76:bf:3e:91:6b:65",
        "6a:62:f0:dc:c2:d6",
        "3e:5a:f5:4e:61:85",
        "02:42:ac:11:00:02",
        "e6:dc:92:3d:09:95",
        "1e:27:ff:d1:d2:2f",
        "c6:63:33:21:38:19",
        "de:19:b8:02:f0:e7",
        "ca:cf:25:b4:12:e4"
      ],
      "name": "minikube",
      "hostname": "minikube",
      "architecture": "x86_64",
      "os": {
        "codename": "Core",
        "platform": "centos",
        "version": "7 (Core)",
        "family": "redhat",
        "name": "CentOS Linux",
        "kernel": "4.19.76-linuxkit"
      },
      "id": "83a8f1f835d84a9a9bf5417cecaf0c8e",
      "containerized": true,
      "ip": [
        "172.18.0.1",
        "172.17.0.2"
      ]
    }
  },
  "fields": {
    "cef.extensions.flexDate1": [],
    "netflow.flow_end_microseconds": [],
    "netflow.system_init_time_milliseconds": [],
    "netflow.flow_end_nanoseconds": [],
    "misp.observed_data.last_observed": [],
    "netflow.max_flow_end_microseconds": [],
    "file.mtime": [],
    "aws.cloudtrail.user_identity.session_context.creation_date": [],
    "netflow.min_flow_start_seconds": [],
    "misp.intrusion_set.first_seen": [],
    "file.created": [],
    "misp.threat_indicator.valid_from": [],
    "process.parent.start": [],
    "azure.auditlogs.properties.activity_datetime": [],
    "crowdstrike.event.ProcessStartTime": [],
    "zeek.ocsp.update.this": [],
    "crowdstrike.event.IncidentStartTime": [],
    "netflow.observation_time_microseconds": [],
    "event.start": [],
    "cef.extensions.agentReceiptTime": [],
    "cef.extensions.oldFileModificationTime": [],
    "checkpoint.subs_exp": [],
    "event.end": [],
    "netflow.max_flow_end_milliseconds": [],
    "netflow.min_flow_start_nanoseconds": [],
    "zeek.smb_files.times.changed": [],
    "crowdstrike.event.StartTimestamp": [],
    "netflow.flow_start_nanoseconds": [],
    "netflow.flow_start_seconds": [],
    "crowdstrike.event.ProcessEndTime": [],
    "zeek.x509.certificate.valid.until": [],
    "misp.observed_data.first_observed": [],
    "netflow.exporter.timestamp": [],
    "netflow.monitoring_interval_start_milli_seconds": [],
    "cef.extensions.oldFileCreateTime": [],
    "event.ingested": [],
    "@timestamp": [
      "2020-08-04T13:31:06.863Z"
    ],
    "zeek.ocsp.update.next": [],
    "crowdstrike.event.UTCTimestamp": [],
    "tls.server.not_before": [],
    "cef.extensions.startTime": [],
    "netflow.min_flow_start_milliseconds": [],
    "azure.signinlogs.properties.created_at": [],
    "cef.extensions.endTime": [],
    "suricata.eve.tls.notbefore": [],
    "zeek.kerberos.valid.from": [],
    "cef.extensions.fileCreateTime": [],
    "misp.threat_indicator.valid_until": [],
    "crowdstrike.event.EndTimestamp": [],
    "misp.campaign.last_seen": [],
    "cef.extensions.deviceReceiptTime": [],
    "netflow.observation_time_seconds": [],
    "crowdstrike.metadata.eventCreationTime": [],
    "cef.extensions.fileModificationTime": [],
    "tls.client.not_before": [],
    "zeek.smb_files.times.created": [],
    "zeek.smtp.date": [],
    "netflow.collection_time_milliseconds": [],
    "zeek.pe.compile_time": [],
    "netflow.max_flow_end_seconds": [],
    "tls.client.not_after": [],
    "netflow.flow_start_milliseconds": [],
    "event.created": [],
    "package.installed": [],
    "zeek.kerberos.valid.until": [],
    "suricata.eve.flow.end": [],
    "netflow.observation_time_milliseconds": [],
    "netflow.flow_start_microseconds": [],
    "tls.server.not_after": [],
    "netflow.flow_end_seconds": [],
    "process.start": [],
    "suricata.eve.tls.notafter": [],
    "zeek.snmp.up_since": [],
    "azure.enqueued_time": [],
    "netflow.max_flow_end_nanoseconds": [],
    "misp.intrusion_set.last_seen": [],
    "netflow.min_flow_start_microseconds": [],
    "netflow.observation_time_nanoseconds": [],
    "cef.extensions.managerReceiptTime": [],
    "file.accessed": [],
    "netflow.flow_end_milliseconds": [],
    "misp.campaign.first_seen": [],
    "netflow.min_export_seconds": [],
    "suricata.eve.flow.start": [],
    "suricata.eve.timestamp": [
      "2020-08-04T13:31:06.863Z"
    ],
    "cef.extensions.deviceCustomDate1": [],
    "cef.extensions.deviceCustomDate2": [],
    "netflow.monitoring_interval_end_milli_seconds": [],
    "file.ctime": [],
    "crowdstrike.event.IncidentEndTime": [],
    "zeek.smb_files.times.accessed": [],
    "zeek.ocsp.revoke.time": [],
    "zeek.x509.certificate.valid.from": [],
    "netflow.max_export_seconds": [],
    "zeek.smb_files.times.modified": [],
    "kafka.block_timestamp": [],
    "misp.report.published": []
  }
}

ИМХО, это приведет к тому, что файлы журнала, проиндексированные в Elasticsearch (с Filebeat), будут на будут> 10x больше , чем необработанные .log файлы. Это совершенно неприемлемо, поскольку дисковое пространство стоит денег.

Есть предложения, как этого избежать? Или я ошибаюсь? Спасибо за любые идеи!

...