266 lines
No EOL
7.9 KiB
YAML
Executable file
266 lines
No EOL
7.9 KiB
YAML
Executable file
# Run with docker stack deploy -c monitoring.yaml monitoring
|
|
services:
|
|
|
|
# ============================================================
|
|
# PROMETHEUS — Metrics collection
|
|
# ============================================================
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
user: "1964:1964"
|
|
environment:
|
|
TZ: America/Chicago
|
|
command:
|
|
- --config.file=/etc/prometheus/prometheus.yml
|
|
- --storage.tsdb.path=/prometheus
|
|
- --storage.tsdb.retention.time=30d
|
|
- --web.enable-lifecycle
|
|
- --web.console.libraries=/usr/share/prometheus/console_libraries
|
|
- --web.console.templates=/usr/share/prometheus/consoles
|
|
volumes:
|
|
- /DockerVol/prometheus/data:/prometheus
|
|
- /DockerVol/prometheus/config:/etc/prometheus:ro
|
|
networks:
|
|
- netgrimoire
|
|
deploy:
|
|
placement:
|
|
constraints:
|
|
- node.hostname == znas
|
|
- node.platform.arch != aarch64
|
|
- node.platform.arch != arm
|
|
restart_policy:
|
|
condition: any
|
|
delay: 5s
|
|
max_attempts: 3
|
|
window: 120s
|
|
labels:
|
|
# --- Caddy ---
|
|
caddy: prometheus.netgrimoire.com
|
|
caddy.reverse_proxy: prometheus:9090
|
|
caddy.import: crowdsec
|
|
|
|
# --- Uptime Kuma ---
|
|
kuma.prometheus.http.name: Prometheus
|
|
kuma.prometheus.http.url: https://prometheus.netgrimoire.com
|
|
|
|
# --- Homepage ---
|
|
homepage.group: Monitoring
|
|
homepage.name: Prometheus
|
|
homepage.icon: prometheus.png
|
|
homepage.href: https://prometheus.netgrimoire.com
|
|
homepage.description: Metrics Collection
|
|
homepage.widget.type: prometheus
|
|
homepage.widget.url: http://prometheus:9090
|
|
|
|
# --- DIUN ---
|
|
diun.enable: "true"
|
|
|
|
# ============================================================
|
|
# GRAFANA — Dashboards
|
|
# ============================================================
|
|
grafana:
|
|
image: grafana/grafana:latest
|
|
user: "1964:1964"
|
|
environment:
|
|
TZ: America/Chicago
|
|
GF_SECURITY_ADMIN_USER: admin
|
|
GF_SECURITY_ADMIN_PASSWORD: F@lcon13
|
|
GF_USERS_DEFAULT_THEME: dark
|
|
GF_SERVER_ROOT_URL: https://grafana.netgrimoire.com
|
|
GF_FEATURE_TOGGLES_ENABLE: publicDashboards
|
|
volumes:
|
|
- /DockerVol/grafana/data:/var/lib/grafana
|
|
- /DockerVol/grafana/provisioning:/etc/grafana/provisioning
|
|
networks:
|
|
- netgrimoire
|
|
deploy:
|
|
placement:
|
|
constraints:
|
|
- node.hostname == znas
|
|
- node.platform.arch != aarch64
|
|
- node.platform.arch != arm
|
|
restart_policy:
|
|
condition: any
|
|
delay: 5s
|
|
max_attempts: 3
|
|
window: 120s
|
|
labels:
|
|
# --- Caddy ---
|
|
caddy: grafana.netgrimoire.com
|
|
caddy.reverse_proxy: grafana:3000
|
|
caddy.import: crowdsec
|
|
|
|
# --- Uptime Kuma ---
|
|
kuma.grafana.http.name: Grafana
|
|
kuma.grafana.http.url: https://grafana.netgrimoire.com
|
|
|
|
# --- Homepage ---
|
|
homepage.group: Monitoring
|
|
homepage.name: Grafana
|
|
homepage.icon: grafana.png
|
|
homepage.href: https://grafana.netgrimoire.com
|
|
homepage.description: Metrics Dashboards
|
|
homepage.widget.type: grafana
|
|
homepage.widget.url: http://grafana:3000
|
|
homepage.widget.username: admin
|
|
homepage.widget.password: F@lcon13
|
|
|
|
# --- DIUN ---
|
|
diun.enable: "true"
|
|
|
|
# ============================================================
|
|
# ALERTMANAGER — Alert routing → ntfy
|
|
# ============================================================
|
|
alertmanager:
|
|
image: prom/alertmanager:latest
|
|
user: "1964:1964"
|
|
environment:
|
|
TZ: America/Chicago
|
|
command:
|
|
- --config.file=/etc/alertmanager/alertmanager.yml
|
|
- --storage.path=/alertmanager
|
|
- --web.external-url=https://alertmanager.netgrimoire.com
|
|
volumes:
|
|
- /DockerVol/alertmanager/data:/alertmanager
|
|
- /DockerVol/alertmanager/config:/etc/alertmanager:ro
|
|
networks:
|
|
- netgrimoire
|
|
deploy:
|
|
placement:
|
|
constraints:
|
|
- node.hostname == znas
|
|
- node.platform.arch != aarch64
|
|
- node.platform.arch != arm
|
|
restart_policy:
|
|
condition: any
|
|
delay: 5s
|
|
max_attempts: 3
|
|
window: 120s
|
|
labels:
|
|
# --- Caddy ---
|
|
caddy: alertmanager.netgrimoire.com
|
|
caddy.reverse_proxy: alertmanager:9093
|
|
caddy.import: crowdsec
|
|
|
|
# --- Uptime Kuma ---
|
|
kuma.alertmanager.http.name: Alertmanager
|
|
kuma.alertmanager.http.url: https://alertmanager.netgrimoire.com
|
|
|
|
# --- Homepage ---
|
|
homepage.group: Monitoring
|
|
homepage.name: Alertmanager
|
|
homepage.icon: alertmanager.png
|
|
homepage.href: https://alertmanager.netgrimoire.com
|
|
homepage.description: Alert Routing
|
|
|
|
# --- DIUN ---
|
|
diun.enable: "true"
|
|
|
|
# ============================================================
|
|
# BLACKBOX EXPORTER — HTTP/TCP/ICMP probing
|
|
# ============================================================
|
|
blackbox:
|
|
image: prom/blackbox-exporter:latest
|
|
environment:
|
|
TZ: America/Chicago
|
|
cap_add:
|
|
- NET_RAW
|
|
command:
|
|
- --config.file=/etc/blackbox/blackbox.yml
|
|
volumes:
|
|
- /DockerVol/blackbox/config:/etc/blackbox:ro
|
|
networks:
|
|
- netgrimoire
|
|
deploy:
|
|
placement:
|
|
constraints:
|
|
- node.hostname == znas
|
|
- node.platform.arch != aarch64
|
|
- node.platform.arch != arm
|
|
restart_policy:
|
|
condition: any
|
|
delay: 5s
|
|
max_attempts: 3
|
|
window: 120s
|
|
labels:
|
|
# --- Caddy ---
|
|
caddy: blackbox.netgrimoire.com
|
|
caddy.reverse_proxy: blackbox:9115
|
|
caddy.import: crowdsec
|
|
|
|
# --- Uptime Kuma ---
|
|
kuma.blackbox.http.name: Blackbox Exporter
|
|
kuma.blackbox.http.url: https://blackbox.netgrimoire.com
|
|
|
|
# --- Homepage ---
|
|
homepage.group: Monitoring
|
|
homepage.name: Blackbox
|
|
homepage.icon: prometheus.png
|
|
homepage.href: https://blackbox.netgrimoire.com
|
|
homepage.description: HTTP/TCP Probing
|
|
|
|
# --- DIUN ---
|
|
diun.enable: "true"
|
|
|
|
# ============================================================
|
|
# Multi-arch image — runs on aarch64 and x86_64
|
|
# ============================================================
|
|
cadvisor:
|
|
image: gcr.io/cadvisor/cadvisor:latest
|
|
hostname: "{{.Node.Hostname}}"
|
|
environment:
|
|
TZ: America/Chicago
|
|
command:
|
|
- --docker_only=true
|
|
- --store_container_labels=false
|
|
- --disable_metrics=disk,diskIO,network,tcp,udp,percpu,sched,process
|
|
volumes:
|
|
- /:/rootfs:ro
|
|
- /var/run:/var/run:ro
|
|
- /sys:/sys:ro
|
|
- /var/lib/docker:/var/lib/docker:ro
|
|
networks:
|
|
- netgrimoire
|
|
deploy:
|
|
mode: global
|
|
restart_policy:
|
|
condition: any
|
|
delay: 5s
|
|
max_attempts: 3
|
|
window: 120s
|
|
labels:
|
|
# --- DIUN ---
|
|
diun.enable: "true"
|
|
|
|
# ============================================================
|
|
# NODE EXPORTER — Host metrics (all nodes including Pi)
|
|
# Multi-arch image — runs on aarch64 and x86_64
|
|
# ============================================================
|
|
node-exporter:
|
|
image: prom/node-exporter:latest
|
|
hostname: "{{.Node.Hostname}}"
|
|
environment:
|
|
TZ: America/Chicago
|
|
NODE_HOSTNAME: "{{.Node.Hostname}}"
|
|
command:
|
|
- --path.rootfs=/host
|
|
- --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
|
|
- --collector.textfile.directory=/etc/node-exporter
|
|
volumes:
|
|
- /:/host:ro,rslave
|
|
- /etc/hostname:/etc/hostname:ro
|
|
networks:
|
|
- netgrimoire
|
|
deploy:
|
|
mode: global
|
|
restart_policy:
|
|
condition: any
|
|
delay: 5s
|
|
max_attempts: 3
|
|
window: 120s
|
|
labels:
|
|
# --- DIUN ---
|
|
diun.enable: "true"
|
|
|
|
networks:
|
|
netgrimoire:
|
|
external: true |