# Run with docker stack deploy -c monitoring.yaml monitoring services: # ============================================================ # PROMETHEUS — Metrics collection # ============================================================ prometheus: image: prom/prometheus:latest user: "1964:1964" environment: TZ: America/Chicago command: - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.path=/prometheus - --storage.tsdb.retention.time=30d - --web.enable-lifecycle - --web.console.libraries=/usr/share/prometheus/console_libraries - --web.console.templates=/usr/share/prometheus/consoles volumes: - /DockerVol/prometheus/data:/prometheus - /DockerVol/prometheus/config:/etc/prometheus:ro networks: - netgrimoire deploy: placement: constraints: - node.hostname == znas - node.platform.arch != aarch64 - node.platform.arch != arm restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s labels: # --- Caddy --- caddy: prometheus.netgrimoire.com caddy.reverse_proxy: prometheus:9090 caddy.import: crowdsec # caddy.import_1: authentik # --- Uptime Kuma --- kuma.prometheus.http.name: Prometheus kuma.prometheus.http.url: https://prometheus.netgrimoire.com # --- Homepage --- homepage.group: Monitoring homepage.name: Prometheus homepage.icon: prometheus.png homepage.href: https://prometheus.netgrimoire.com homepage.description: Metrics Collection homepage.widget.type: prometheus homepage.widget.url: http://prometheus:9090 # --- DIUN --- diun.enable: "true" # ============================================================ # GRAFANA — Dashboards # ============================================================ grafana: image: grafana/grafana:latest user: "1964:1964" environment: TZ: America/Chicago GF_SECURITY_ADMIN_USER: admin GF_SECURITY_ADMIN_PASSWORD: F@lcon13 GF_USERS_DEFAULT_THEME: dark GF_SERVER_ROOT_URL: https://grafana.netgrimoire.com GF_FEATURE_TOGGLES_ENABLE: publicDashboards volumes: - /DockerVol/grafana/data:/var/lib/grafana - /DockerVol/grafana/provisioning:/etc/grafana/provisioning networks: - netgrimoire deploy: placement: constraints: - node.hostname == znas - node.platform.arch != aarch64 - node.platform.arch != arm restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s labels: # --- Caddy --- caddy: grafana.netgrimoire.com caddy.reverse_proxy: grafana:3000 caddy.import: crowdsec # caddy.import_1: authentik # --- Uptime Kuma --- kuma.grafana.http.name: Grafana kuma.grafana.http.url: https://grafana.netgrimoire.com # --- Homepage --- homepage.group: Monitoring homepage.name: Grafana homepage.icon: grafana.png homepage.href: https://grafana.netgrimoire.com homepage.description: Metrics Dashboards homepage.widget.type: grafana homepage.widget.url: http://grafana:3000 homepage.widget.username: admin homepage.widget.password: F@lcon13 # --- DIUN --- diun.enable: "true" # ============================================================ # ALERTMANAGER — Alert routing → ntfy # ============================================================ alertmanager: image: prom/alertmanager:latest user: "1964:1964" environment: TZ: America/Chicago command: - --config.file=/etc/alertmanager/alertmanager.yml - --storage.path=/alertmanager - --web.external-url=https://alertmanager.netgrimoire.com volumes: - /DockerVol/alertmanager/data:/alertmanager - /DockerVol/alertmanager/config:/etc/alertmanager:ro networks: - netgrimoire deploy: placement: constraints: - node.hostname == znas - node.platform.arch != aarch64 - node.platform.arch != arm restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s labels: # --- Caddy --- caddy: alertmanager.netgrimoire.com caddy.reverse_proxy: alertmanager:9093 caddy.import: crowdsec # caddy.import_1: authentik # --- Uptime Kuma --- kuma.alertmanager.http.name: Alertmanager kuma.alertmanager.http.url: https://alertmanager.netgrimoire.com # --- Homepage --- homepage.group: Monitoring homepage.name: Alertmanager homepage.icon: alertmanager.png homepage.href: https://alertmanager.netgrimoire.com homepage.description: Alert Routing # --- DIUN --- diun.enable: "true" # ============================================================ # CADVISOR — Container metrics (all nodes) # ============================================================ cadvisor: image: gcr.io/cadvisor/cadvisor:latest environment: TZ: America/Chicago command: - --docker_only=true - --store_container_labels=false - --disable_metrics=disk,diskIO,network,tcp,udp,percpu,sched,process volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker:/var/lib/docker:ro networks: - netgrimoire deploy: mode: global restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s placement: constraints: - node.platform.arch != aarch64 - node.platform.arch != arm labels: # --- DIUN --- diun.enable: "true" # ============================================================ # NODE EXPORTER — Host metrics (all nodes) # ============================================================ node-exporter: image: prom/node-exporter:latest environment: TZ: America/Chicago command: - --path.rootfs=/host - --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/) volumes: - /:/host:ro,rslave networks: - netgrimoire deploy: mode: global restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s placement: constraints: - node.platform.arch != aarch64 - node.platform.arch != arm labels: # --- DIUN --- diun.enable: "true" networks: netgrimoire: external: true