# Run with docker stack deploy -c monitoring.yaml monitoring services: # ============================================================ # PROMETHEUS — Metrics collection # ============================================================ prometheus: image: prom/prometheus:latest user: "1964:1964" environment: TZ: America/Chicago command: - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.path=/prometheus - --storage.tsdb.retention.time=30d - --web.enable-lifecycle - --web.console.libraries=/usr/share/prometheus/console_libraries - --web.console.templates=/usr/share/prometheus/consoles volumes: - /DockerVol/prometheus/data:/prometheus - /DockerVol/prometheus/config:/etc/prometheus:ro networks: - netgrimoire deploy: placement: constraints: - node.hostname == znas - node.platform.arch != aarch64 - node.platform.arch != arm restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s labels: # --- Caddy --- caddy: prometheus.netgrimoire.com caddy.reverse_proxy: prometheus:9090 caddy.import: crowdsec caddy.import_1: authentik # --- Uptime Kuma --- kuma.prometheus.http.name: Prometheus kuma.prometheus.http.url: https://prometheus.netgrimoire.com # --- Homepage --- homepage.group: Monitoring homepage.name: Prometheus homepage.icon: prometheus.png homepage.href: https://prometheus.netgrimoire.com homepage.description: Metrics Collection homepage.widget.type: prometheus homepage.widget.url: http://prometheus:9090 # --- DIUN --- diun.enable: "true" # ============================================================ # GRAFANA — Dashboards # ============================================================ grafana: image: grafana/grafana:latest user: "1964:1964" environment: TZ: America/Chicago GF_SECURITY_ADMIN_USER: admin GF_SECURITY_ADMIN_PASSWORD: F@lcon13 GF_USERS_DEFAULT_THEME: dark GF_SERVER_ROOT_URL: https://grafana.netgrimoire.com GF_FEATURE_TOGGLES_ENABLE: publicDashboards volumes: - /DockerVol/grafana/data:/var/lib/grafana - /DockerVol/grafana/provisioning:/etc/grafana/provisioning networks: - netgrimoire deploy: placement: constraints: - node.hostname == znas - node.platform.arch != aarch64 - node.platform.arch != arm restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s labels: # --- Caddy --- caddy: grafana.netgrimoire.com caddy.reverse_proxy: grafana:3000 caddy.import: crowdsec caddy.import_1: authentik # --- Uptime Kuma --- kuma.grafana.http.name: Grafana kuma.grafana.http.url: https://grafana.netgrimoire.com # --- Homepage --- homepage.group: Monitoring homepage.name: Grafana homepage.icon: grafana.png homepage.href: https://grafana.netgrimoire.com homepage.description: Metrics Dashboards homepage.widget.type: grafana homepage.widget.url: http://grafana:3000 homepage.widget.username: admin homepage.widget.password: F@lcon13 # --- DIUN --- diun.enable: "true" # ============================================================ # ALERTMANAGER — Alert routing → ntfy # ============================================================ alertmanager: image: prom/alertmanager:latest user: "1964:1964" environment: TZ: America/Chicago command: - --config.file=/etc/alertmanager/alertmanager.yml - --storage.path=/alertmanager - --web.external-url=https://alertmanager.netgrimoire.com volumes: - /DockerVol/alertmanager/data:/alertmanager - /DockerVol/alertmanager/config:/etc/alertmanager:ro networks: - netgrimoire deploy: placement: constraints: - node.hostname == znas - node.platform.arch != aarch64 - node.platform.arch != arm restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s labels: # --- Caddy --- caddy: alertmanager.netgrimoire.com caddy.reverse_proxy: alertmanager:9093 caddy.import: crowdsec caddy.import_1: authentik # --- Uptime Kuma --- kuma.alertmanager.http.name: Alertmanager kuma.alertmanager.http.url: https://alertmanager.netgrimoire.com # --- Homepage --- homepage.group: Monitoring homepage.name: Alertmanager homepage.icon: alertmanager.png homepage.href: https://alertmanager.netgrimoire.com homepage.description: Alert Routing # --- DIUN --- diun.enable: "true" # ============================================================ # BLACKBOX EXPORTER — HTTP/TCP/ICMP probing # ============================================================ blackbox: image: prom/blackbox-exporter:latest environment: TZ: America/Chicago command: - --config.file=/etc/blackbox/blackbox.yml volumes: - /DockerVol/blackbox/config:/etc/blackbox:ro networks: - netgrimoire deploy: placement: constraints: - node.hostname == znas - node.platform.arch != aarch64 - node.platform.arch != arm restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s labels: # --- Caddy --- caddy: blackbox.netgrimoire.com caddy.reverse_proxy: blackbox:9115 caddy.import: crowdsec caddy.import_1: authentik # --- Uptime Kuma --- kuma.blackbox.http.name: Blackbox Exporter kuma.blackbox.http.url: https://blackbox.netgrimoire.com # --- Homepage --- homepage.group: Monitoring homepage.name: Blackbox homepage.icon: prometheus.png homepage.href: https://blackbox.netgrimoire.com homepage.description: HTTP/TCP Probing # --- DIUN --- diun.enable: "true" # ============================================================ # Multi-arch image — runs on aarch64 and x86_64 # ============================================================ cadvisor: image: gcr.io/cadvisor/cadvisor:latest hostname: "{{.Node.Hostname}}" environment: TZ: America/Chicago command: - --docker_only=true - --store_container_labels=false - --disable_metrics=disk,diskIO,network,tcp,udp,percpu,sched,process volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker:/var/lib/docker:ro networks: - netgrimoire deploy: mode: global restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s labels: # --- DIUN --- diun.enable: "true" # ============================================================ # NODE EXPORTER — Host metrics (all nodes including Pi) # Multi-arch image — runs on aarch64 and x86_64 # ============================================================ node-exporter: image: prom/node-exporter:latest hostname: "{{.Node.Hostname}}" environment: TZ: America/Chicago NODE_HOSTNAME: "{{.Node.Hostname}}" command: - --path.rootfs=/host - --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/) - --collector.textfile.directory=/etc/node-exporter volumes: - /:/host:ro,rslave - /etc/hostname:/etc/hostname:ro networks: - netgrimoire deploy: mode: global restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s labels: # --- DIUN --- diun.enable: "true" networks: netgrimoire: external: true