安装Prometheus
下载Prometheus
解压安装包
[root@lhx media]# tar -xvf prometheus-2.27.0.linux-amd64.tar.gz
[root@lhx media]# mv prometheus-2.27.0.linux-amd64 /usr/local/prometheus
[root@lhx media]# echo "export PATH=$PATH:/usr/local/prometheus" >>/etc/profile
[root@lhx media]# source /etc/profile
配置参数
[root@lhx media]# cat /usr/local/prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
检查配置文件:promtool check config promethus.yml
创建系统用户
[root@lhx media]# groupadd prometheus
[root@lhx media]# useradd -g prometheus -m -d /data/prometheus/ -s /sbin/nologin prometheus
设置系统服务
[root@lhx media]# cat /usr/lib/systemd/system/prometheus.service
[Unit]
Description=Prometheus
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=forking
User=prometheus
ExecStart=/usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --web.enable-lifecycle --storage.tsdb.path=/data/prometheus/ --storage.tsdb.retention=60d
Restart=on-failure
[Install]
WantedBy=multi-user.target
[root@lhx media]# systemctl daemon-reload
[root@lhx media]# systemctl enable prometheus.service
[root@lhx media]# systemctl start prometheus.service
- --web.enable-lifecycle:2.0之后版本支持热加载(
curl -X POST http://host:9090/-/reload
) - --storage.tsdb.path:TSDB数据保存位置
- --storage.tsdb.retention:数据保留周期
访问prometheus
配置node_exporter
添加用户
[root@lhx media]# groupadd prometheus
[root@lhx media]# useradd -g prometheus -m -d /data/prometheus/ -s /sbin/nologin prometheus
下载node_exporter
DownLoad
解压安装包
[root@lhx media]# tar -xvf node_exporter-1.1.2.linux-amd64.tar.gz
[root@lhx media]# mv node_exporter-1.1.2.linux-amd64 /usr/local/node_exporter
配置系统服务
[root@lhx media]# cat /usr/lib/systemd/system/node_exporter.service
[Unit]
Description=node_exporter
After=network.target
[Service]
Type=forking
User=prometheus
ExecStart=/usr/local/node_exporter/node_exporter
Restart=on-failure
[Install]
WantedBy=multi-user.target
[root@lhx media]# systemctl daemon-reload
[root@lhx media]# systemctl enable node_exporter.service
[root@lhx media]# systemctl start node_exporter.service
注册到prometheus
[root@lhx media]# cat /data/prometheus/linux.yml
- targets: ['118.24.119.116:9100']
labels:
name: linux-node1
[root@lhx media]# cat >> /usr/local/prometheus/prometheus.yml << EOF
- job_name: 'linux'
file_sd_configs:
- files: ['/data/prometheus/linux.yml']
refresh_interval: 5s
EOF
[root@lhx media]# curl -X POST http://localhost:9090/-/reload
查看注册服务
Supervisor
supervisor是一个用pytho编写的进程管理工具,它可以很方便的监听、启动、停止、重启一个或多个进程。当一个进程意外被杀死,supervisor监听到进程死后,可以自动恢复。
安装supervisor
在线安装可以执行下列语句
[root@lhx media]# yum install supervisor -y
离线安装则需要下列软件
- supervisor:DownLoad
- python
- setuptools
- meld3
配置supervisor
[root@lhx media]# cat /etc/supervisord.conf
[supervisord]
;http_port=/var/tmp/supervisor.sock ; (default is to run a UNIX domain socket server)
;http_port=127.0.0.1:9001 ; (alternately, ip_address:port specifies AF_INET)
sockchmod=0700 ; AF_UNIX socketmode (AF_INET ignore, default 0700)
sockchown=nobody.nogroup ; AF_UNIX socket uid.gid owner (AF_INET ignores)
umask=022 ; (process file creation umask;default 022)
logfile=/var/log/supervisor/supervisord.log ; (main log file;default $CWD/supervisord.log)
logfile_maxbytes=50MB ; (max main logfile bytes b4 rotation;default 50MB)
logfile_backups=10 ; (num of main logfile rotation backups;default 10)
loglevel=info ; (logging level;default info; others: debug,warn)
pidfile=/var/run/supervisord.pid ; (supervisord pidfile;default supervisord.pid)
nodaemon=false ; (start in foreground if true;default false)
minfds=1024 ; (min. avail startup file descriptors;default 1024)
minprocs=200 ; (min. avail process descriptors;default 200)
;nocleanup=true ; (don't clean up tempfiles at start;default false)
;http_username=user ; (default is no username (open system))
;http_password=123 ; (default is no password (open system))
;childlogdir=/tmp ; ('AUTO' child log dir, default $TEMP)
;user=chrism ; (default is current user, required if root)
;directory=/tmp ; (default is not to cd during start)
;environment=KEY=value ; (key value pairs to add to environment)
[supervisorctl]
serverurl=unix:///var/tmp/supervisor.sock ; use a unix:// URL for a unix socket
;serverurl=http://127.0.0.1:9001 ; use an http:// url to specify an inet socket
;username=chris ; should be same as http_username if set
;password=123 ; should be same as http_password if set
;prompt=mysupervisor ; cmd line prompt (default "supervisor")
[program:prometheus]
command=systemctl start prometheus.service ; the program (relative uses PATH, can take args)
priority=999 ; the relative start priority (default 999)
autostart=true ; start at supervisord start (default: true)
autorestart=true ; retstart at unexpected quit (default: true)
startsecs=10 ; number of secs prog must stay running (def. 10)
startretries=3 ; max # of serial start failures (default 3)
exitcodes=0,2 ; 'expected' exit codes for process (default 0,2)
stopsignal=QUIT ; signal used to kill process (default TERM)
stopwaitsecs=10 ; max num secs to wait before SIGKILL (default 10)
user=prometheus ; setuid to this UNIX account to run the program
log_stdout=true ; if true, log program stdout (default true)
log_stderr=true ; if true, log program stderr (def false)
logfile=/var/log/supervisor/cat.log ; child log path, use NONE for none; default AUTO
logfile_maxbytes=1MB ; max # logfile bytes b4 rotation (default 50MB)
logfile_backups=10 ; # of logfile backups (default 10)
启动supervisor
[root@lhx media]# supervisord -c /etc/supervisord.conf
进程管理
[root@lhx media]# supervisorctl status
[root@lhx media]# supervisorctl start prometheus
[root@lhx media]# supervisorctl stop prometheus
Grafana
Grafana是一款用Go语言开发的开源数据可视化工具,可以做数据监控和数据统计和展示,可以用于对接Prometheus的监控数据
下载安装包
DownLoad
安装字体包
[root@lhx media]# yum install urw-fonts freetype* fontconfig -y
安装grafana
[root@lhx media]# rpm -ivh grafana-7.5.6-1.x86_64.rpm
warning: grafana-7.5.6-1.x86_64.rpm: Header V4 RSA/SHA256 Signature, key ID 24098cb6: NOKEY
Preparing... ########################################### [100%]
1:grafana ########################################### [100%]
配置参数
[root@lhx media]# cat /etc/grafana/provisioning/dashboards/sample.yaml
# # config file version
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
folderUid: ''
type: file
options:
path: /var/lib/grafana/dashboards
开启插件功能
[root@lhx media]# cat /etc/grafana/grafana.ini
plugins = /var/lib/grafana/plugins
下载Dashboard模板
DownLoad
[root@lhx media]# mkdir /var/lib/grafana/dashboards
[root@lhx media]# cp 1-node-exporter-0-16-0-18-for-prometheus_rev1.json /var/lib/grafana/dashboards
[root@lhx media]# chown -R grafana.grafana /var/lib/grafana/dashboards/1-node-exporter-0-16-0-18-for-prometheus_rev1.json
需要修改文件中的datasource为Prometheus
[root@lhx dashboards]# sed -i 's/\${DS_PROMETHEUS_111}/Prometheus/' linux.json
下载饼状图插件
DownLoad
[root@lhx media]# unzip grafana-piechart-panel-1.6.1.zip
[root@lhx media]# mkdir /var/lib/grafana/plugins
[root@lhx media]# mv grafana-piechart-panel /var/lib/grafana/plugins/
启动grafana
[root@lhx media]# systemctl start grafana-server
导入datasource
地址:http://http://118.24.119.116:3000/
默认用户名/密码为admin/admin
点击config->data sources->add data source
查看监控面板
点击Dashboard->Manage