Как включить мониторинг процессов системы Linux с помощью CollectD - PullRequest
0 голосов
/ 26 сентября 2019

Я пытаюсь отслеживать системный процесс на моей машине с Linux (например, TSDB, Kafka, Tomcat, httpd), используя CollectD Agent

Для этого случая использования я использую следующий конвейер:

  1. Сбор метрик с помощью агента CollectD

  2. Затем отправка данных метрик в InfluxDB

  3. Визуализация состояния с использованием Grafana

Где я застрял:

  1. Я успешно включил количество запущенных, спящих, остановленных процессов, но я хочу отследить за сценой количество запущенных системных процессов (например, Запуск 4 [TSDB, Kafka, Tomcat, httpd], Остановлено 10 [ssh, ...])

Прикреплен вывод файла конфигурации моего текущего CollectD

Hostname    "Master-InfluxDB"
FQDNLookup   true
BaseDir     "/var/lib/collectd"
PIDFile     "/var/run/collectd.pid"
PluginDir   "/usr/lib64/collectd"
TypesDB     "/usr/share/collectd/types.db"
Interval 60
LoadPlugin "cpu"
LoadPlugin "memory"
LoadPlugin "df"
LoadPlugin "processes"
LoadPlugin "disk"
LoadPlugin syslog
LoadPlugin "logfile"
<Plugin "logfile">
  LogLevel "info"
  File "/var/log/collectd.log"
  Timestamp true
</Plugin>
LoadPlugin aggregation
LoadPlugin cpu
LoadPlugin df
LoadPlugin disk
LoadPlugin interface
LoadPlugin load
LoadPlugin memory
LoadPlugin network
LoadPlugin processes
LoadPlugin uptime
LoadPlugin users
<Plugin "aggregation">
  <Aggregation>
    Plugin "cpu"
    Type "cpu"
    GroupBy "Host"
    GroupBy "TypeInstance"
    CalculateAverage true
  </Aggregation>
</Plugin>
<Plugin cpu>
  ReportByCpu false
  ReportByState true
  ValuesPercentage true
</Plugin>
<Plugin df>
  MountPoint "/"
  IgnoreSelected false
  ReportByDevice false
  ReportInodes false
  ValuesAbsolute true
  ValuesPercentage true
</Plugin>
<Plugin disk>
        Disk "xvda1"
        Disk "xvda"
        IgnoreSelected false
</Plugin>
<Plugin interface>
    Interface "eth0"
    IgnoreSelected false
</Plugin>
<Plugin load>
    ReportRelative true
</Plugin>
<Plugin memory>
    ValuesAbsolute true
    ValuesPercentage true
</Plugin>
<Plugin network>
    Server "127.0.0.1" "25826"
</Plugin>
<Plugin processes>
        CollectFileDescriptor true
        CollectContextSwitch true
        CollectMemoryMaps true
        Process "name"
        <Process "collectd">
                CollectFileDescriptor false
                CollectContextSwitch false
        </Process>
</Plugin>
Include "/etc/collectd.d"

Виджет процесса Grafana

Панель процесса Grafana JSON

{
  "aliasColors": {},
  "bars": false,
  "dashLength": 10,
  "dashes": false,
  "datasource": "InfluxDB",
  "editable": true,
  "error": false,
  "fill": 1,
  "grid": {},
  "gridPos": {
    "h": 7,
    "w": 24,
    "x": 0,
    "y": 20
  },
  "id": 9,
  "isNew": true,
  "legend": {
    "alignAsTable": true,
    "avg": true,
    "current": true,
    "max": true,
    "min": true,
    "rightSide": true,
    "show": true,
    "total": false,
    "values": true
  },
  "lines": true,
  "linewidth": 1,
  "links": [],
  "nullPointMode": "connected",
  "options": {},
  "percentage": false,
  "pointradius": 5,
  "points": false,
  "renderer": "flot",
  "seriesOverrides": [],
  "spaceLength": 10,
  "stack": true,
  "steppedLine": false,
  "targets": [
    {
      "alias": "$tag_host: Running",
      "dsType": "influxdb",
      "expr": "",
      "groupBy": [
        {
          "params": [
            "$interval"
          ],
          "type": "time"
        },
        {
          "params": [
            "null"
          ],
          "type": "fill"
        }
      ],
      "intervalFactor": 2,
      "measurement": "processes",
      "orderByTime": "ASC",
      "policy": "default",
      "query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'running' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
      "rawQuery": true,
      "refId": "A",
      "resultFormat": "time_series",
      "select": [
        [
          {
            "params": [
              "running"
            ],
            "type": "field"
          },
          {
            "params": [],
            "type": "mean"
          }
        ]
      ],
      "tags": [
        {
          "key": "host",
          "operator": "=~",
          "value": "/^$host$/"
        }
      ]
    },
    {
      "alias": "$tag_host: Blocked",
      "dsType": "influxdb",
      "expr": "",
      "groupBy": [
        {
          "params": [
            "$interval"
          ],
          "type": "time"
        },
        {
          "params": [
            "null"
          ],
          "type": "fill"
        }
      ],
      "intervalFactor": 2,
      "measurement": "processes",
      "orderByTime": "ASC",
      "policy": "default",
      "query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'blocked' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
      "rawQuery": true,
      "refId": "B",
      "resultFormat": "time_series",
      "select": [
        [
          {
            "params": [
              "blocked"
            ],
            "type": "field"
          },
          {
            "params": [],
            "type": "mean"
          }
        ]
      ],
      "tags": [
        {
          "key": "host",
          "operator": "=~",
          "value": "/^$host$/"
        }
      ]
    },
    {
      "alias": "$tag_host: Sleeping",
      "dsType": "influxdb",
      "groupBy": [
        {
          "params": [
            "$interval"
          ],
          "type": "time"
        },
        {
          "params": [
            "null"
          ],
          "type": "fill"
        }
      ],
      "orderByTime": "ASC",
      "policy": "default",
      "query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'sleeping' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
      "rawQuery": true,
      "refId": "C",
      "resultFormat": "time_series",
      "select": [
        [
          {
            "params": [
              "value"
            ],
            "type": "field"
          },
          {
            "params": [],
            "type": "mean"
          }
        ]
      ],
      "tags": []
    },
    {
      "alias": "$tag_host: Stopped",
      "dsType": "influxdb",
      "groupBy": [
        {
          "params": [
            "$interval"
          ],
          "type": "time"
        },
        {
          "params": [
            "null"
          ],
          "type": "fill"
        }
      ],
      "orderByTime": "ASC",
      "policy": "default",
      "query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'stopped' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
      "rawQuery": true,
      "refId": "D",
      "resultFormat": "time_series",
      "select": [
        [
          {
            "params": [
              "value"
            ],
            "type": "field"
          },
          {
            "params": [],
            "type": "mean"
          }
        ]
      ],
      "tags": []
    },
    {
      "alias": "$tag_host: Paging",
      "dsType": "influxdb",
      "groupBy": [
        {
          "params": [
            "$interval"
          ],
          "type": "time"
        },
        {
          "params": [
            "null"
          ],
          "type": "fill"
        }
      ],
      "orderByTime": "ASC",
      "policy": "default",
      "query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'paging' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
      "rawQuery": true,
      "refId": "E",
      "resultFormat": "time_series",
      "select": [
        [
          {
            "params": [
              "value"
            ],
            "type": "field"
          },
          {
            "params": [],
            "type": "mean"
          }
        ]
      ],
      "tags": []
    },
    {
      "alias": "$tag_host: Zombies",
      "dsType": "influxdb",
      "groupBy": [
        {
          "params": [
            "$interval"
          ],
          "type": "time"
        },
        {
          "params": [
            "null"
          ],
          "type": "fill"
        }
      ],
      "orderByTime": "ASC",
      "policy": "default",
      "query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'zombies' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
      "rawQuery": true,
      "refId": "F",
      "resultFormat": "time_series",
      "select": [
        [
          {
            "params": [
              "value"
            ],
            "type": "field"
          },
          {
            "params": [],
            "type": "mean"
          }
        ]
      ],
      "tags": []
    }
  ],
  "thresholds": [],
  "timeFrom": null,
  "timeRegions": [],
  "timeShift": null,
  "title": "(Stacked) Processes",
  "tooltip": {
    "msResolution": true,
    "shared": true,
    "sort": 0,
    "value_type": "individual"
  },
  "type": "graph",
  "xaxis": {
    "buckets": null,
    "mode": "time",
    "name": null,
    "show": true,
    "values": []
  },
  "yaxes": [
    {
      "format": "short",
      "label": null,
      "logBase": 1,
      "max": null,
      "min": 0,
      "show": true
    },
    {
      "format": "short",
      "label": null,
      "logBase": 1,
      "max": null,
      "min": 0,
      "show": true
    }
  ],
  "yaxis": {
    "align": false,
    "alignLevel": null
  }
}

Спасибо

...