拨测服务部署及配置
[TOC]
部署docker
- 下载docker安装包
[root@localhost local]# wget https://install.jishuliu.cn/docker/docker-24.0.4.tgz
--2023-04-11 18:30:36-- https://install.jishuliu.cn/docker/docker-24.0.4.tgz
Resolving package.jishuliu.cn (package.jishuliu.cn)... 58.216.106.230, 58.221.30.104, 58.221.30.105, ...
Connecting to package.jishuliu.cn (package.jishuliu.cn)|58.216.106.230|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 60730088 (58M) [application/x-compressed]
Saving to: ‘docker-24.0.4.tgz’
100%[====================================================================================================================================================================================================================================>] 60,730,088 505KB/s in 1m 59s
2023-04-11 18:32:36 (498 KB/s) - ‘docker-24.0.4.tgz’ saved [60730088/60730088]
[root@localhost local]#
- 解压并创建软连接
[root@localhost local]# tar xf docker-24.0.4.tgz
[root@localhost local]# pwd
/usr/local
[root@localhost local]# ln -s /usr/local/docker/* /usr/local/bin/
[root@localhost local]# docker -v
Docker version 19.03.9, build 9d988398e7
[root@localhost local]# which docker
/usr/local/sbin/docker
[root@localhost local]#
- 创建docker系统服务
[root@localhost local]# vim /etc/systemd/system/docker.service
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network-online.target firewalld.service
Wants=network-online.target
[Service]
Type=notify
ExecStart=/usr/local/bin/dockerd -H unix://var/run/docker.sock
ExecReload=/bin/kill -s HUP $MAINPID
TimeoutSec=0
RestartSec=2
Restart=always
StartLimitBurst=3
StartLimitInterval=60s
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity
Delegate=yes
KillMode=process
[Install]
WantedBy=multi-user.target
- 启动docker
[root@localhost local]# setenforce 0
[root@localhost local]# systemctl stop firewalld.service
[root@localhost local]# service docker status
Redirecting to /bin/systemctl status docker.service
● docker.service - Docker Application Container Engine
Loaded: loaded (/etc/systemd/system/docker.service; enabled; vendor preset: disabled)
Active: active (running) since Tue 2023-04-11 18:35:57 CST; 3s ago
Docs: https://docs.docker.com
Main PID: 16203 (dockerd)
Tasks: 34
Memory: 36.6M
CGroup: /system.slice/docker.service
├─16203 /usr/local/sbin/dockerd -H unix://var/run/docker.sock
└─16216 containerd --config /var/run/docker/containerd/containerd.toml --log-level info
Apr 11 18:35:56 localhost.localdomain dockerd[16203]: time="2023-04-11T18:35:56.827706634+08:00" level=info msg="scheme \"unix\" not registered, fallback to default scheme" module=grpc
Apr 11 18:35:56 localhost.localdomain dockerd[16203]: time="2023-04-11T18:35:56.827718906+08:00" level=info msg="ccResolverWrapper: sending update to cc: {[{unix:///var/run/docker/containerd/containerd.sock 0 <nil>}] <nil>}" module=grpc
Apr 11 18:35:56 localhost.localdomain dockerd[16203]: time="2023-04-11T18:35:56.827724351+08:00" level=info msg="ClientConn switching balancer to \"pick_first\"" module=grpc
Apr 11 18:35:56 localhost.localdomain dockerd[16203]: time="2023-04-11T18:35:56.922053025+08:00" level=info msg="Loading containers: start."
Apr 11 18:35:57 localhost.localdomain dockerd[16203]: time="2023-04-11T18:35:57.025237913+08:00" level=info msg="Default bridge (docker0) is assigned with an IP address 172.17.0.0/16. Daemon option --bip can be used to set a preferred IP address"
Apr 11 18:35:57 localhost.localdomain dockerd[16203]: time="2023-04-11T18:35:57.059798153+08:00" level=info msg="Loading containers: done."
Apr 11 18:35:57 localhost.localdomain dockerd[16203]: time="2023-04-11T18:35:57.110612008+08:00" level=info msg="Docker daemon" commit=9d988398e7 graphdriver(s)=overlay2 version=19.03.9
Apr 11 18:35:57 localhost.localdomain dockerd[16203]: time="2023-04-11T18:35:57.110788074+08:00" level=info msg="Daemon has completed initialization"
Apr 11 18:35:57 localhost.localdomain dockerd[16203]: time="2023-04-11T18:35:57.135047257+08:00" level=info msg="API listen on var/run/docker.sock"
Apr 11 18:35:57 localhost.localdomain systemd[1]: Started Docker Application Container Engine.
[root@localhost local]#
拨测
部署拨测
- 创建本地数据存储路径
[root@localhost local]# mkdir uptime-kuma
[root@localhost local]# pwd
/usr/local
- 启动拨测
[root@localhost local]# docker run -d --restart=always -p 3001:3001 -v /etc/localtime:/etc/localtime -v /usr/local/uptime-kuma:/app/data --name uptime-kuma louislam/uptime-kuma:1
Unable to find image 'louislam/uptime-kuma:1' locally
1: Pulling from louislam/uptime-kuma
3689b8de819b: Pull complete
4178a276654a: Pull complete
b46162c13de5: Pull complete
4d3ac03f17d8: Pull complete
b935255dae7e: Pull complete
792f129a81f3: Pull complete
4110002867ba: Pull complete
390f8662c74f: Pull complete
9dd174cf6e30: Pull complete
4f4fb700ef54: Pull complete
703bad70ccf2: Pull complete
Digest: sha256:cf61d3262b29e1c48cc2ac284c9264227bbc46168f408e5f4c4d6301f0629e41
Status: Downloaded newer image for louislam/uptime-kuma:1
9bb3779459611f192b53d05bea65d4760dbc69130f4aa36ecd992f8a4ed5bdd9
- 修改通知的时间为服务器时间
[root@localhost local]# docker cp uptime-kuma:/app/server/notification.js ./
[root@localhost local]# vim notification.js
# 在notification.js文件首行加入"const dayjs = require("dayjs");"
# 把notification.js文件143行修改为以下配置
static async send(notification, msg, monitorJSON = null, heartbeatJSON = null) {
if (this.providerList[notification.type]) {
return this.providerList[notification.type].send(notification, msg, monitorJSON, heartbeatJSON);
if (heartbeatJSON != null) {
heartbeatJSON['time'] = dayjs.utc(heartbeatJSON['time']).tz().format("YYYY-MM-DD HH:mm:ss.SSS");
}
} else {
throw new Error("Notification type is not supported");
}
}
[root@localhost local]# docker cp notification.js uptime-kuma:/app/server/
[root@localhost local]# docker restart uptime-kuma
- 浏览器访问并创建管理员账号密码
添加监控项
- 部署nginx
[root@localhost ~]# yum install -y gcc gcc-c++ libevent pcre pcre-devel zlib zlib-devel openssl openssl-devel
[root@localhost ~]# wget https://install.jishuliu.cn/nginx/nginx-1.21.3.tar.gz
[root@localhost ~]# tar xf nginx-1.21.3.tar.gz
[root@localhost ~]# cd nginx-1.21.3
[root@localhost nginx-1.21.3]# ./configure --prefix=/usr/local/nginx
[root@localhost nginx-1.21.3]# make && make install
[root@localhost nginx-1.21.3]# /usr/local/nginx/sbin/nginx
- 访问nginx测试
- 拨测服务添加监控项
- 保存查看
配置钉钉报警
钉钉报警首先需要三个人拉群,并在群里设置报警机器人
- 设置报警机器人
- 拨测设置通知
- 测试报警,首先把nginx停掉
[root@localhost sbin]# hostname -I
192.168.9.108 172.17.0.1
[root@localhost sbin]# /usr/local/nginx/sbin/nginx -s stop
[root@localhost sbin]# ps -ef |grep nginx
root 25303 16002 0 14:09 pts/0 00:00:00 grep --color=auto nginx
[root@localhost sbin]#
- 恢复报警,把nginx重新启动
[root@localhost sbin]# /usr/local/nginx/sbin/nginx
[root@localhost sbin]# hostname -I
192.168.9.108 172.17.0.1
[root@localhost sbin]# ps -ef |grep nginx
root 25361 1 0 14:11 ? 00:00:00 nginx: master process /usr/local/nginx/sbin/nginx
nobody 25362 25361 0 14:11 ? 00:00:00 nginx: worker process
root 25392 16002 0 14:12 pts/0 00:00:00 grep --color=auto nginx
[root@localhost sbin]#
拨测集成grafana
部署Prometheus
- 下载Prometheus安装包
[root@localhost local]# wget https://install.jishuliu.cn/prometheus/prometheus-2.32.1.linux-amd64.tar.gz
--2023-04-11 14:18:51-- https://install.jishuliu.cn/prometheus/prometheus-2.32.1.linux-amd64.tar.gz
Resolving package.jishuliu.cn (package.jishuliu.cn)... 58.221.30.105, 58.221.30.104, 240e:97b:501:102:6c::31, ...
Connecting to package.jishuliu.cn (package.jishuliu.cn)|58.221.30.105|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 75073989 (72M) [application/x-gzip]
Saving to: ‘prometheus-2.32.1.linux-amd64.tar.gz’
100%[====================================================================================================================================================================================================================================>] 75,073,989 502KB/s in 2m 27s
2023-04-11 14:21:19 (497 KB/s) - ‘prometheus-2.32.1.linux-amd64.tar.gz’ saved [75073989/75073989]
[root@localhost local]#
- 拨测服务创建API密钥
- 修改Prometheus配置文件
[root@localhost local]# tar xf prometheus-2.32.1.linux-amd64.tar.gz
[root@localhost local]# mv prometheus-2.32.1.linux-amd64 prometheus
[root@localhost local]# cd prometheus
[root@localhost prometheus]# ls
console_libraries consoles LICENSE NOTICE prometheus prometheus.yml promtool
[root@localhost prometheus]# vim prometheus.yml
[root@localhost prometheus]# vim prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
- job_name: 'uptime-9.108' #项目名称
scrape_interval: 30s #抓取时间
scheme: http
static_configs:
- targets: ['192.168.9.108:3001'] #拨测服务IP:port
basic_auth:
password: uk1_v0K5sPfQLiVUedIbX-aetwiQP7Thx4d1bDg0HmEa #创建的api密钥
- 启动prometheus
[root@localhost prometheus]# vim /etc/systemd/system/prometheus.service
[Unit]
Description=Prometheus
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
User=root
ExecStart=/usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --storage.tsdb.path=/usr/local/prometheus/data
Restart=on-failure
[Install]
WantedBy=multi-user.target
[root@localhost prometheus]# systemctl daemon-reload
[root@localhost prometheus]# service prometheus start
Redirecting to /bin/systemctl start prometheus.service
[root@localhost prometheus]# service prometheus status
Redirecting to /bin/systemctl status prometheus.service
● prometheus.service - Prometheus
Loaded: loaded (/etc/systemd/system/prometheus.service; disabled; vendor preset: disabled)
Active: active (running) since Tue 2023-04-11 14:37:09 CST; 2s ago
Docs: https://prometheus.io/
Main PID: 26340 (prometheus)
Tasks: 12
Memory: 29.1M
CGroup: /system.slice/prometheus.service
└─26340 /usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --storage.tsdb.path=/usr/local/prometheus/data
Apr 11 14:37:10 localhost.localdomain prometheus[26340]: ts=2023-04-11T06:37:10.056Z caller=head.go:522 level=info component=tsdb msg="On-disk memory mappable chunks replay completed" duration=5.474µs
Apr 11 14:37:10 localhost.localdomain prometheus[26340]: ts=2023-04-11T06:37:10.056Z caller=head.go:528 level=info component=tsdb msg="Replaying WAL, this may take a while"
Apr 11 14:37:10 localhost.localdomain prometheus[26340]: ts=2023-04-11T06:37:10.149Z caller=head.go:599 level=info component=tsdb msg="WAL segment loaded" segment=0 maxSegment=1
Apr 11 14:37:10 localhost.localdomain prometheus[26340]: ts=2023-04-11T06:37:10.150Z caller=head.go:599 level=info component=tsdb msg="WAL segment loaded" segment=1 maxSegment=1
Apr 11 14:37:10 localhost.localdomain prometheus[26340]: ts=2023-04-11T06:37:10.150Z caller=head.go:605 level=info component=tsdb msg="WAL replay completed" checkpoint_replay_duration=25.278µs wal_replay_duration=93.912477ms total_replay_duration=93.95701ms
Apr 11 14:37:10 localhost.localdomain prometheus[26340]: ts=2023-04-11T06:37:10.151Z caller=main.go:945 level=info fs_type=XFS_SUPER_MAGIC
Apr 11 14:37:10 localhost.localdomain prometheus[26340]: ts=2023-04-11T06:37:10.151Z caller=main.go:948 level=info msg="TSDB started"
Apr 11 14:37:10 localhost.localdomain prometheus[26340]: ts=2023-04-11T06:37:10.151Z caller=main.go:1129 level=info msg="Loading configuration file" filename=/usr/local/prometheus/prometheus.yml
Apr 11 14:37:10 localhost.localdomain prometheus[26340]: ts=2023-04-11T06:37:10.154Z caller=main.go:1166 level=info msg="Completed loading of configuration file" filename=/usr/local/prometheus/prometheus.yml totalDuration=3.146644ms db_storage=2.144µs remote_storage=…µs
Apr 11 14:37:10 localhost.localdomain prometheus[26340]: ts=2023-04-11T06:37:10.154Z caller=main.go:897 level=info msg="Server is ready to receive web requests."
Hint: Some lines were ellipsized, use -l to show in full.
[root@localhost prometheus]#
- 在浏览器使用prometheus查看拨测服务的数据
部署grafana
- 本次grafana使用docker部署,默认账号密码为admin\admin
[root@localhost ~]# mkdir /usr/local/grafana
[root@localhost ~]# docker run -itd --restart=always --user 0 --name grafana -p 8080:3000 -v /etc/localtime:/etc/localtime -v /usr/local/grafana:/var/lib/grafana grafana/grafana:latest
- grafana添加数据源。选择Data Source>>>>Add data source>>>>prometheus,输入Prometheus的地址、名称等信息。
创建dashboard
- 创建筛选项
- 添加响应时间的仪表盘
- 添加拨测项状态监控
- 添加证书有效期监控
首先需要添加一个https证书的网站,比如https://www.baidu.com
- 用到的查询语句
#查询拨测时间语句:
monitor_response_time{job=~"$job",monitor_name=~"$monitor_name"}
#查询拨测状态语句:
monitor_status{job=~"$job",monitor_name=~"$monitor_name"}
#查询证书有效期语句:
monitor_cert_days_remaining{job=~"$job",monitor_name=~"$monitor_name"}
- 测试监控,首先停掉nginx然后等待拨测报错查看grafana能否获取到拨测状态
[root@localhost ~]# /usr/local/nginx/sbin/nginx -s stop
扫描二维码,在手机上阅读