ant-node 0.10.0

Pure quantum-proof network node for the Autonomi decentralized network
Documentation
groups:
  - name: ant-node-alerts
    rules:
      # Node availability
      - alert: AntNodeDown
        expr: up == 0
        for: 2m
        labels:
          severity: critical
        annotations:
          summary: "Autonomi node unreachable"
          description: "Node {{ $labels.instance }} in {{ $labels.region }} has been down for more than 2 minutes"

      # Health status
      - alert: AntNodeUnhealthy
        expr: p2p_health_status == 0
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Autonomi node unhealthy"
          description: "Node {{ $labels.instance }} is reporting unhealthy status for more than 5 minutes"

      # Peer connectivity
      - alert: AntLowPeerCount
        expr: p2p_network_peer_count < 3
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Low peer count"
          description: "Node {{ $labels.instance }} has only {{ $value }} peers (minimum 3 expected)"

      - alert: AntNoPeers
        expr: p2p_network_peer_count == 0
        for: 2m
        labels:
          severity: critical
        annotations:
          summary: "Node has no peers"
          description: "Node {{ $labels.instance }} has no peers - potentially isolated"

      # DHT health
      - alert: AntLowDHTSize
        expr: p2p_dht_routing_table_size < 20
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "Low DHT routing table"
          description: "Node {{ $labels.instance }} has only {{ $value }} DHT entries (minimum 20 expected)"

      # Network stability
      - alert: AntHighPacketLoss
        expr: rate(p2p_network_failed_connections_total[5m]) > 0.1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High connection failure rate"
          description: "Node {{ $labels.instance }} is experiencing high connection failures"

  - name: ant-cluster-alerts
    rules:
      # Cluster-wide health
      - alert: AntClusterUnhealthy
        expr: (sum(p2p_health_status == 1) / count(p2p_health_status)) < 0.95
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "Cluster health below 95%"
          description: "Only {{ $value | humanizePercentage }} of nodes are healthy"

      # Regional issues
      - alert: AntRegionDegraded
        expr: (sum(p2p_health_status == 1) by (region) / count(p2p_health_status) by (region)) < 0.9
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Region {{ $labels.region }} degraded"
          description: "Only {{ $value | humanizePercentage }} of nodes in {{ $labels.region }} are healthy"

      # Network partitioning
      - alert: AntPotentialPartition
        expr: stddev(p2p_network_peer_count) > 10
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "Possible network partition"
          description: "High variance in peer counts suggests potential network partition"

  - name: ant-payment-alerts
    rules:
      # Payment verification
      - alert: AntPaymentVerificationFailures
        expr: rate(payment_verification_failed_total[5m]) > 0.01
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Payment verification failures"
          description: "Node {{ $labels.instance }} has elevated payment verification failures"

      # Quote generation
      - alert: AntQuoteGenerationSlow
        expr: histogram_quantile(0.95, rate(quote_generation_duration_seconds_bucket[5m])) > 1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Slow quote generation"
          description: "95th percentile quote generation time is {{ $value }}s (expected < 1s)"

  # NEW in 0.7.5/0.7.6: Security and Trust Alerts
  - name: ant-security-alerts
    rules:
      # IP Diversity (NEW in 0.7.5)
      - alert: AntHighIPDiversityRejections
        expr: rate(ant_ip_diversity_rejections_total[5m]) > 0.1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High IP diversity rejections"
          description: "Node {{ $labels.instance }} is rejecting {{ $value | humanize }}/s peers due to IP diversity enforcement"

      # Geographic Diversity (NEW in 0.7.5)
      - alert: AntHighGeoDiversityRejections
        expr: rate(ant_geographic_diversity_rejections_total[5m]) > 0.1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High geographic diversity rejections"
          description: "Node {{ $labels.instance }} is rejecting {{ $value | humanize }}/s peers due to geographic diversity enforcement"

      # Trust Threshold Violations (NEW in 0.7.6)
      - alert: AntTrustViolationsHigh
        expr: rate(ant_trust_threshold_violations_total[5m]) > 0.05
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Elevated trust violations"
          description: "Node {{ $labels.instance }} is seeing {{ $value | humanize }}/s trust threshold violations"

      # Low Trust Nodes (NEW in 0.7.6)
      - alert: AntHighLowTrustNodes
        expr: ant_low_trust_nodes_current > 10
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "Many low trust nodes detected"
          description: "Node {{ $labels.instance }} is tracking {{ $value }} low trust nodes in its routing table"

      # Enforcement Mode Strict (NEW in 0.7.6)
      - alert: AntStrictEnforcementActivated
        expr: ant_enforcement_mode_strict == 1
        for: 1m
        labels:
          severity: info
        annotations:
          summary: "Strict enforcement mode activated"
          description: "Node {{ $labels.instance }} has activated strict trust enforcement mode"

      # Close Group Failures (NEW in 0.7.6)
      - alert: AntCloseGroupFailures
        expr: rate(ant_close_group_failure_by_type[5m]) > 0.01
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Close group failures detected"
          description: "Node {{ $labels.instance }} is experiencing close group failures of type {{ $labels.type }} at {{ $value | humanize }}/s"

      # High Eviction Rate (NEW in 0.7.6)
      - alert: AntHighEvictionRate
        expr: sum(rate(ant_eviction_by_reason[5m])) by (instance) > 0.1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High node eviction rate"
          description: "Node {{ $labels.instance }} is evicting nodes at {{ $value | humanize }}/s"