# Prometheus Alert Rules for VEGAPULS Air Sensors # # Installation: # 1. Copy this file to /etc/prometheus/rules/vegapuls-alerts.yml # 2. Add to prometheus.yml: # rule_files: # - /etc/prometheus/rules/vegapuls-alerts.yml # 3. Reload Prometheus: systemctl reload prometheus groups: - name: ttn_vegapuls_air_alerts interval: 60s rules: # === Exporter Health === - alert: VEGAPULSExporterDown expr: up{job="vegapuls-air"} == 0 for: 5m labels: severity: critical component: exporter annotations: summary: "VEGAPULS Air exporter is down" description: "The VEGAPULS Air Prometheus exporter has been down for more than 5 minutes. Check the service status." runbook: "Check systemctl status vegapuls-exporter and journalctl -u vegapuls-exporter" # === Device Online Status === - alert: VEGAPULSSensorOffline expr: vegapulsair_device_online == 0 for: 10m labels: severity: warning component: sensor annotations: summary: "VEGAPULS sensor {{ $labels.device_id }} is offline" description: "Sensor {{ $labels.device_id }} has not sent an uplink for more than 19 hours and is considered offline." runbook: "Check sensor battery, LoRaWAN coverage, and TTN Console for error messages" - alert: VEGAPULSSensorMissing expr: | (time() - vegapulsair_last_uplink_seconds_ago) > 86400 for: 30m labels: severity: critical component: sensor annotations: summary: "VEGAPULS sensor {{ $labels.device_id }} missing for over 24h" description: "Sensor {{ $labels.device_id }} has not transmitted for over 24 hours. Last uplink: {{ $value | humanizeDuration }} ago." runbook: "Physical inspection required. Check sensor power and installation." # === Battery Monitoring === - alert: VEGAPULSBatteryCritical expr: vegapulsair_battery_percent < 10 for: 1h labels: severity: critical component: battery annotations: summary: "VEGAPULS sensor {{ $labels.device_id }} battery critically low" description: "Battery level at {{ $value }}%. Sensor will stop functioning soon. Immediate replacement required." runbook: "Schedule urgent battery replacement" - alert: VEGAPULSBatteryLow expr: vegapulsair_battery_percent < 20 for: 6h labels: severity: warning component: battery annotations: summary: "VEGAPULS sensor {{ $labels.device_id }} battery low" description: "Battery level at {{ $value }}%. Plan battery replacement soon." runbook: "Schedule battery replacement within 2-4 weeks" - alert: VEGAPULSBatteryWarning expr: vegapulsair_battery_percent < 30 for: 12h labels: severity: info component: battery annotations: summary: "VEGAPULS sensor {{ $labels.device_id }} battery below 30%" description: "Battery level at {{ $value }}%. Monitor and plan replacement." runbook: "Add to maintenance schedule for next quarter" # === Signal Quality === - alert: VEGAPULSWeakSignal expr: vegapulsair_rssi_dbm < -120 for: 1h labels: severity: warning component: network annotations: summary: "VEGAPULS sensor {{ $labels.device_id }} has weak signal" description: "RSSI is {{ $value }} dBm (very weak). May indicate coverage issues or antenna problems." runbook: "Check gateway coverage, sensor placement, and antenna connection" - alert: VEGAPULSPoorSNR expr: vegapulsair_snr_db < -15 for: 1h labels: severity: warning component: network annotations: summary: "VEGAPULS sensor {{ $labels.device_id }} has poor SNR" description: "Signal-to-Noise Ratio is {{ $value }} dB. Signal quality is degraded." runbook: "Check for interference, gateway issues, or repositioning sensor" # === Temperature Monitoring === - alert: VEGAPULSTemperatureExtreme expr: | vegapulsair_temperature_celsius > 60 or vegapulsair_temperature_celsius < -20 for: 30m labels: severity: warning component: environment annotations: summary: "VEGAPULS sensor {{ $labels.device_id }} extreme temperature" description: "Temperature is {{ $value }}°C, outside normal operating range." runbook: "Check sensor location and environmental conditions" # === Data Quality === - alert: VEGAPULSNoDataReceived expr: | rate(vegapulsair_exporter_requests_total[5m]) > 0 and vegapulsair_devices_total == 0 for: 15m labels: severity: warning component: integration annotations: summary: "VEGAPULS exporter receiving no device data" description: "Exporter is running and being scraped, but no device data is available. Check MQTT connection and TTN configuration." runbook: "Check exporter logs, TTN Console live data, and MQTT credentials" - alert: VEGAPULSAllDevicesOffline expr: | vegapulsair_devices_total > 0 and vegapulsair_devices_online == 0 for: 30m labels: severity: critical component: system annotations: summary: "All VEGAPULS sensors are offline" description: "{{ $value }} devices are registered but none are online. System-wide issue suspected." runbook: "Check TTN gateway status, network connectivity, and power supply" # === Performance Monitoring === - alert: VEGAPULSHighScrapeRate expr: rate(vegapulsair_exporter_requests_total[5m]) > 2 for: 10m labels: severity: info component: performance annotations: summary: "High scrape rate on VEGAPULS exporter" description: "Prometheus is scraping at {{ $value }} requests/second. Consider increasing scrape_interval." runbook: "Review Prometheus configuration and adjust scrape_interval if needed" # === Recording Rules for Easier Querying === - name: vegapuls_air_recording_rules interval: 60s rules: # Battery drain rate (percent per day) - record: vegapulsair_battery_drain_rate_percent_per_day expr: | rate(vegapulsair_battery_percent[7d]) * -86400 # Average signal strength per device (7 day) - record: vegapulsair_rssi_avg_7d expr: | avg_over_time(vegapulsair_rssi_dbm[7d]) # Uplink frequency (uplinks per day) - record: vegapulsair_uplink_frequency_per_day expr: | 86400 / avg_over_time(vegapulsair_last_uplink_seconds_ago[7d]) # Device availability percentage (24h) - record: vegapulsair_device_availability_percent_24h expr: | avg_over_time(vegapulsair_device_online[24h]) * 100 # === Usage Examples === # # Query battery drain rate: # vegapulsair_battery_drain_rate_percent_per_day # # Query devices with availability < 95%: # vegapulsair_device_availability_percent_24h < 95 # # Query average RSSI over 7 days: # vegapulsair_rssi_avg_7d # # Query uplink frequency: # vegapulsair_uplink_frequency_per_day