diff --git a/admin/kubekorner_geosphere_prometheus_rules.yaml b/admin/kubekorner_geosphere_prometheus_rules.yaml index 54c17819d3c9428e905051a646216c881c31c9d7..aa15d8c1c4c4313a010c75a0f7ea994f8aaa3045 100644 --- a/admin/kubekorner_geosphere_prometheus_rules.yaml +++ b/admin/kubekorner_geosphere_prometheus_rules.yaml @@ -211,23 +211,23 @@ spec: summary: "Host unusual network throughput out (instance {{ $labels.instance }})" description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" - alert: HostUnusualDiskReadRate - expr: sum by (instance) (irate(node_disk_read_bytes_total[5m])) / 1024 / 1024 > 50 + expr: sum by (instance) (irate(node_disk_read_bytes_total[5m])) / 1024 / 1024 > 150 for: 5m labels: severity: warning ruleGroup: geosphere-node annotations: summary: "Host unusual disk read rate (instance {{ $labels.instance }})" - description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" + description: "Disk is probably reading too much data (> 150 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" - alert: HostUnusualDiskWriteRate - expr: sum by (instance) (irate(node_disk_written_bytes_total[5m])) / 1024 / 1024 > 50 + expr: sum by (instance) (irate(node_disk_written_bytes_total[5m])) / 1024 / 1024 > 150 for: 5m labels: severity: warning ruleGroup: geosphere-node annotations: summary: "Host unusual disk write rate (instance {{ $labels.instance }})" - description: "Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" + description: "Disk is probably writing too much data (> 150 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" - alert: HostOutOfDiskSpace expr: (node_filesystem_avail_bytes{mountpoint="/"} * 100) / node_filesystem_size_bytes{mountpoint="/"} < 10 for: 5m diff --git a/admin/prometheus_kubernetes_values.yaml b/admin/prometheus_kubernetes_values.yaml index 9b338a3f017e8d3713477f33ecb7c855a3ba8223..5e82f257cebf75ea5510b5a5249f564cc2a710b5 100644 --- a/admin/prometheus_kubernetes_values.yaml +++ b/admin/prometheus_kubernetes_values.yaml @@ -43,7 +43,7 @@ alertmanager: - name: "null" - name: "geosphere-dev-team" slack_configs: - - channel: "#geo2grid" + - channel: "#geosphere" send_resolved: true color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' title: '[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}'