Я пытаюсь отслеживать системный процесс на моей машине с Linux (например, TSDB, Kafka, Tomcat, httpd), используя CollectD Agent
Для этого случая использования я использую следующий конвейер:
Сбор метрик с помощью агента CollectD
Затем отправка данных метрик в InfluxDB
Визуализация состояния с использованием Grafana
Где я застрял:
- Я успешно включил количество запущенных, спящих, остановленных процессов, но я хочу отследить за сценой количество запущенных системных процессов (например, Запуск 4 [TSDB, Kafka, Tomcat, httpd], Остановлено 10 [ssh, ...])
Прикреплен вывод файла конфигурации моего текущего CollectD
Hostname "Master-InfluxDB"
FQDNLookup true
BaseDir "/var/lib/collectd"
PIDFile "/var/run/collectd.pid"
PluginDir "/usr/lib64/collectd"
TypesDB "/usr/share/collectd/types.db"
Interval 60
LoadPlugin "cpu"
LoadPlugin "memory"
LoadPlugin "df"
LoadPlugin "processes"
LoadPlugin "disk"
LoadPlugin syslog
LoadPlugin "logfile"
<Plugin "logfile">
LogLevel "info"
File "/var/log/collectd.log"
Timestamp true
</Plugin>
LoadPlugin aggregation
LoadPlugin cpu
LoadPlugin df
LoadPlugin disk
LoadPlugin interface
LoadPlugin load
LoadPlugin memory
LoadPlugin network
LoadPlugin processes
LoadPlugin uptime
LoadPlugin users
<Plugin "aggregation">
<Aggregation>
Plugin "cpu"
Type "cpu"
GroupBy "Host"
GroupBy "TypeInstance"
CalculateAverage true
</Aggregation>
</Plugin>
<Plugin cpu>
ReportByCpu false
ReportByState true
ValuesPercentage true
</Plugin>
<Plugin df>
MountPoint "/"
IgnoreSelected false
ReportByDevice false
ReportInodes false
ValuesAbsolute true
ValuesPercentage true
</Plugin>
<Plugin disk>
Disk "xvda1"
Disk "xvda"
IgnoreSelected false
</Plugin>
<Plugin interface>
Interface "eth0"
IgnoreSelected false
</Plugin>
<Plugin load>
ReportRelative true
</Plugin>
<Plugin memory>
ValuesAbsolute true
ValuesPercentage true
</Plugin>
<Plugin network>
Server "127.0.0.1" "25826"
</Plugin>
<Plugin processes>
CollectFileDescriptor true
CollectContextSwitch true
CollectMemoryMaps true
Process "name"
<Process "collectd">
CollectFileDescriptor false
CollectContextSwitch false
</Process>
</Plugin>
Include "/etc/collectd.d"
Виджет процесса Grafana
Панель процесса Grafana JSON
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "InfluxDB",
"editable": true,
"error": false,
"fill": 1,
"grid": {},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 20
},
"id": 9,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "connected",
"options": {},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"alias": "$tag_host: Running",
"dsType": "influxdb",
"expr": "",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"intervalFactor": 2,
"measurement": "processes",
"orderByTime": "ASC",
"policy": "default",
"query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'running' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"running"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": [
{
"key": "host",
"operator": "=~",
"value": "/^$host$/"
}
]
},
{
"alias": "$tag_host: Blocked",
"dsType": "influxdb",
"expr": "",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"intervalFactor": 2,
"measurement": "processes",
"orderByTime": "ASC",
"policy": "default",
"query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'blocked' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
"rawQuery": true,
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"blocked"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": [
{
"key": "host",
"operator": "=~",
"value": "/^$host$/"
}
]
},
{
"alias": "$tag_host: Sleeping",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'sleeping' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
"rawQuery": true,
"refId": "C",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"alias": "$tag_host: Stopped",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'stopped' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
"rawQuery": true,
"refId": "D",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"alias": "$tag_host: Paging",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'paging' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
"rawQuery": true,
"refId": "E",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"alias": "$tag_host: Zombies",
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "select mean(value) from \"processes_value\" WHERE \"type_instance\" = 'zombies' AND \"type\" = 'ps_state' AND \"host\" =~ /^$host$/ AND $timeFilter GROUP BY time($interval),\"host\"",
"rawQuery": true,
"refId": "F",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "(Stacked) Processes",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
Спасибо