diff --git a/admin/kubekorner_geosphere_prometheus_rules.yaml b/admin/kubekorner_geosphere_prometheus_rules.yaml index 345d74917918162a9d7b2a009e9a44c0d89b5df1..abafad6980f0a2174386004ff6b126fb14b9934e 100644 --- a/admin/kubekorner_geosphere_prometheus_rules.yaml +++ b/admin/kubekorner_geosphere_prometheus_rules.yaml @@ -434,15 +434,15 @@ spec: summary: "Kubernetes StatefulSet down (instance {{ $labels.instance }})" description: "A StatefulSet went down\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" - - alert: KubernetesPodNotHealthy - expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[1h:5m]) > 0 - for: 5m - labels: - severity: critical - ruleGroup: geosphere-kubernetes - annotations: - summary: "Kubernetes Pod not healthy (instance {{ $labels.instance }})" - description: "Pod has been in a non-ready state for longer than an hour.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" +# - alert: KubernetesPodNotHealthy +# expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[1h:5m]) > 0 +# for: 5m +# labels: +# severity: critical +# ruleGroup: geosphere-kubernetes +# annotations: +# summary: "Kubernetes Pod not healthy (instance {{ $labels.instance }})" +# description: "Pod has been in a non-ready state for longer than an hour.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" - alert: KubernetesPodCrashLooping expr: rate(kube_pod_container_status_restarts_total[15m]) * 60 * 5 > 5 for: 5m