diff --git a/prometheus-blackbox-exporter/Chart.yaml b/prometheus-blackbox-exporter/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03a807cbb3bfd153343aabd32766129a745fbdda --- /dev/null +++ b/prometheus-blackbox-exporter/Chart.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +description: Prometheus Blackbox Exporter +name: prometheus-blackbox-exporter +version: 0.1.1 +appVersion: 0.12.0 +home: https://github.com/prometheus/blackbox_exporter +sources: + - https://github.com/prometheus/blackbox_exporter +keywords: + - prometheus + - blackbox + - monitoring +maintainers: + - name: gianrubio + email: gianrubio@gmail.com diff --git a/prometheus-blackbox-exporter/OWNERS b/prometheus-blackbox-exporter/OWNERS new file mode 100644 index 0000000000000000000000000000000000000000..fe9b2c3d33f0211fd07b721518bee7d782336470 --- /dev/null +++ b/prometheus-blackbox-exporter/OWNERS @@ -0,0 +1,4 @@ +approvers: +- gianrubio +reviewers: +- gianrubio \ No newline at end of file diff --git a/prometheus-blackbox-exporter/README.md b/prometheus-blackbox-exporter/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c1ccdfa285c2e7a16faae2a4afe109a989ae1eba --- /dev/null +++ b/prometheus-blackbox-exporter/README.md @@ -0,0 +1,85 @@ +# Prometheus Blackbox Exporter + +Prometheus exporter for blackbox testing + +Learn more: [https://github.com/prometheus/blackbox_exporter](https://github.com/prometheus/blackbox_exporter) + +## TL;DR; + +```bash +$ helm install stable/prometheus-blackbox-exporter +``` + +## Introduction + +This chart creates a Blackbox-Exporter deployment on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager. + +## Prerequisites + +- Kubernetes 1.8+ with Beta APIs enabled + +## Installing the Chart + +To install the chart with the release name `my-release`: + +```bash +$ helm install --name my-release stable/prometheus-blackbox-exporter +``` + +The command deploys Blackbox Exporter on the Kubernetes cluster using the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation. + +## Uninstalling the Chart + +To uninstall/delete the `my-release` deployment: + +```bash +$ helm delete --purge my-release +``` +The command removes all the Kubernetes components associated with the chart and deletes the release. + +## Configuration + +The following table lists the configurable parameters of the Blackbox-Exporter chart and their default values. + +| Parameter | Description | Default | +| -------------------------------------- | ----------------------------------------------- | ----------------------------- | +| `config` | Prometheus blackbox configuration | {} | +| `configmapReload.name` | configmap-reload container name | `configmap-reload` | +| `configmapReload.image.repository` | configmap-reload container image repository | `jimmidyson/configmap-reload` | +| `configmapReload.image.tag` | configmap-reload container image tag | `v0.2.2` | +| `configmapReload.image.pullPolicy` | configmap-reload container image pull policy | `IfNotPresent` | +| `configmapReload.extraArgs` | Additional configmap-reload container arguments | `{}` | +| `configmapReload.extraConfigmapMounts` | Additional configmap-reload configMap mounts | `[]` | +| `configmapReload.resources` | configmap-reload pod resource requests & limits | `{}` | +| `extraArgs` | Optional flags for blackbox | `[]` | +| `image.repository` | container image repository | `prom/blackbox-exporter` | +| `image.tag` | container image tag | `v0.12.0` | +| `image.pullPolicy` | container image pull policy | `IfNotPresent` | +| `ingress.annotations` | Ingress annotations | None | +| `ingress.enabled` | Enables Ingress | `false` | +| `ingress.hosts` | Ingress accepted hostnames | None | +| `ingress.tls` | Ingress TLS configuration | None | +| `nodeSelector` | node labels for pod assignment | `{}` | +| `podAnnotations` | annotations to add to each pod | `{}` | +| `resources` | pod resource requests & limits | `{}` | +| `restartPolicy` | container restart policy | `Always` | +| `service.type` | type of service to create | `ClusterIP` | +| `service.port` | port for the blackbox http service | `9115` | +| `service.externalIPs` | list of external ips | [] | + +Specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. For example, + +```bash +$ helm install --name my-release \ + --set key_1=value_1,key_2=value_2 \ + stable/prometheus-blackbox-exporter +``` + +Alternatively, a YAML file that specifies the values for the parameters can be provided while installing the chart. For example, + +```bash +# example for staging +$ helm install --name my-release -f values.yaml stable/prometheus-blackbox-exporter +``` + +> **Tip**: You can use the default [values.yaml](values.yaml) diff --git a/prometheus-blackbox-exporter/templates/NOTES.txt b/prometheus-blackbox-exporter/templates/NOTES.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3ee883946859e2959d5f23379a79c375e4402ff --- /dev/null +++ b/prometheus-blackbox-exporter/templates/NOTES.txt @@ -0,0 +1 @@ +See https://github.com/prometheus/blackbox_exporter/ for how to configure Prometheus and the Blackbox Exporter. diff --git a/prometheus-blackbox-exporter/templates/_helpers.tpl b/prometheus-blackbox-exporter/templates/_helpers.tpl new file mode 100644 index 0000000000000000000000000000000000000000..c537fd623df87fe9a369cb1a60e36ee55bd31e03 --- /dev/null +++ b/prometheus-blackbox-exporter/templates/_helpers.tpl @@ -0,0 +1,31 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "prometheus-blackbox-exporter.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "prometheus-blackbox-exporter.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "prometheus-blackbox-exporter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/prometheus-blackbox-exporter/templates/configmap.yaml b/prometheus-blackbox-exporter/templates/configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d57bd6b9ae92340e9705ee97c75ffc31025410d --- /dev/null +++ b/prometheus-blackbox-exporter/templates/configmap.yaml @@ -0,0 +1,14 @@ +{{- if .Values.config }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + labels: + chart: {{ template "prometheus-blackbox-exporter.chart" . }} + app: {{ template "prometheus-blackbox-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +data: + blackbox.yaml: | +{{ toYaml .Values.config | indent 4 }} +{{- end }} diff --git a/prometheus-blackbox-exporter/templates/deployment.yaml b/prometheus-blackbox-exporter/templates/deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6d0b1a6055bd213312c292214e409aed266bf96 --- /dev/null +++ b/prometheus-blackbox-exporter/templates/deployment.yaml @@ -0,0 +1,83 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + labels: + chart: {{ template "prometheus-blackbox-exporter.chart" . }} + app: {{ template "prometheus-blackbox-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + app: {{ template "prometheus-blackbox-exporter.name" . }} + release: "{{ .Release.Name }}" + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + type: RollingUpdate + template: + metadata: + labels: + app: {{ template "prometheus-blackbox-exporter.name" . }} + release: "{{ .Release.Name }}" + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + spec: + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + restartPolicy: {{ .Values.restartPolicy }} + containers: + - name: blackbox-exporter + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + args: +{{- if .Values.config }} + - "--config.file=/config/blackbox.yaml" +{{- else }} + - "--config.file=/etc/blackbox_exporter/config.yml" +{{- end }} + {{- if .Values.extraArgs }} +{{ toYaml .Values.extraArgs | indent 12 }} + {{- end }} + securityContext: + readOnlyRootFilesystem: true + resources: +{{ toYaml .Values.resources | indent 12 }} + ports: + - containerPort: {{ .Values.service.port }} + name: http + livenessProbe: + httpGet: + path: /health + port: http + readinessProbe: + httpGet: + path: /health + port: http + volumeMounts: + - mountPath: /config + name: config + - name: configmap-reload + image: "{{ .Values.configmapReload.image.repository }}:{{ .Values.configmapReload.image.tag }}" + imagePullPolicy: "{{ .Values.configmapReload.image.pullPolicy }}" + args: + - --volume-dir=/etc/config + - --webhook-url=http://localhost:{{ .Values.service.port }}/-/reload + resources: +{{ toYaml .Values.configmapReload.resources | indent 12 }} + volumeMounts: + - mountPath: /etc/config + name: config + readOnly: true + volumes: + - name: config + configMap: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} diff --git a/prometheus-blackbox-exporter/templates/ingress.yaml b/prometheus-blackbox-exporter/templates/ingress.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50779ab8d52449b478feeea007034f065a6996a3 --- /dev/null +++ b/prometheus-blackbox-exporter/templates/ingress.yaml @@ -0,0 +1,30 @@ +{{- if .Values.ingress.enabled -}} +{{- $serviceName := include "prometheus-blackbox-exporter.fullname" . -}} +{{- $servicePort := .Values.service.port -}} +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + labels: + app: {{ template "prometheus-blackbox-exporter.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + annotations: +{{ toYaml .Values.ingress.annotations | indent 4 }} +spec: + rules: + {{- range $host := .Values.ingress.hosts }} + - host: {{ $host }} + http: + paths: + - path: / + backend: + serviceName: {{ $serviceName }} + servicePort: {{ $servicePort }} + {{- end -}} + {{- if .Values.ingress.tls }} + tls: +{{ toYaml .Values.ingress.tls | indent 4 }} + {{- end -}} +{{- end -}} \ No newline at end of file diff --git a/prometheus-blackbox-exporter/templates/service.yaml b/prometheus-blackbox-exporter/templates/service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..621bfe0d56d00218d9fdcfc7edc0b6770e63024c --- /dev/null +++ b/prometheus-blackbox-exporter/templates/service.yaml @@ -0,0 +1,22 @@ +kind: Service +apiVersion: v1 +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + labels: + chart: {{ template "prometheus-blackbox-exporter.chart" . }} + app: {{ template "prometheus-blackbox-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +spec: + type: {{ .Values.service.type }} + ports: + - name: http + port: {{ .Values.service.port }} + protocol: TCP +{{- if .Values.service.externalIPs }} + externalIPs: +{{ toYaml .Values.service.externalIPs | indent 4 }} +{{- end }} + selector: + app: {{ template "prometheus-blackbox-exporter.name" . }} + release: "{{ .Release.Name }}" diff --git a/prometheus-blackbox-exporter/values.yaml b/prometheus-blackbox-exporter/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00a7582fdc0831871251ab1a0cca57c9e7197f9e --- /dev/null +++ b/prometheus-blackbox-exporter/values.yaml @@ -0,0 +1,89 @@ +restartPolicy: Always + +image: + repository: prom/blackbox-exporter + tag: v0.12.0 + pullPolicy: IfNotPresent + +nodeSelector: {} + +config: + modules: + http_2xx: + prober: http + timeout: 5s + http: + headers: + Host: web.nynja.net + authorization: Basic bnluamE6bnluamEyMDE4 + accept: text/event-stream + cache-control: no-cache + authority: web.dev-eu.nynja.net + referer: https://web.dev-eu.nynja.net/dashboard + valid_http_versions: ["HTTP/1.1", "HTTP/2"] + no_follow_redirects: false + preferred_ip_protocol: "ip4" + tls_config: + insecure_skip_verify: true + + http_400: + prober: http + timeout: 5s + http: + valid_status_codes: [400] + valid_http_versions: ["HTTP/1.1", "HTTP/2"] + no_follow_redirects: false + preferred_ip_protocol: "ip4" + +resources: + # limits: + # memory: 300Mi + # requests: + # memory: 50Mi + +service: + type: ClusterIP + port: 9115 + +## An Ingress resource can provide name-based virtual hosting and TLS +## termination among other things for CouchDB deployments which are accessed +## from outside the Kubernetes cluster. +## ref: https://kubernetes.io/docs/concepts/services-networking/ingress/ +ingress: + enabled: false + hosts: [] + # - chart-example.local + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + tls: [] + # Secrets must be manually created in the namespace. + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +podAnnotations: {} + +extraArgs: [] +# --history.limit=1000 + +replicas: 1 +## Monitors ConfigMap changes and POSTs to a URL +## Ref: https://github.com/jimmidyson/configmap-reload +## +configmapReload: + ## configmap-reload container name + ## + name: configmap-reload + + ## configmap-reload container image + ## + image: + repository: jimmidyson/configmap-reload + tag: v0.2.2 + pullPolicy: IfNotPresent + + ## configmap-reload resource requests and limits + ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: {} diff --git a/prometheus/values.yaml b/prometheus/values.yaml index 9619b4430865713f6a47c1206f607eb6f6c70f00..9f541a24bd96c12748251ccc76d495b78fcb055d 100644 --- a/prometheus/values.yaml +++ b/prometheus/values.yaml @@ -460,7 +460,7 @@ server: global: ## How frequently to scrape targets by default ## - scrape_interval: 1m + scrape_interval: 20s ## How long until a scrape request times out ## scrape_timeout: 10s @@ -581,7 +581,7 @@ server: ## Prometheus server data Persistent Volume size ## - size: 8Gi + size: 10Gi ## Prometheus server data Persistent Volume Storage Class ## If defined, storageClassName: @@ -772,6 +772,8 @@ alertmanagerFiles: {{ end }} {{ end }} icon_emoji: '{{ if eq .Status "firing" }}:fire:{{ else }}:sun_with_face:{{ end }}' + pagerduty_configs: + - service_key: 85e41512edcb43c8a8265ddaa9a37dff route: group_wait: 30s @@ -835,6 +837,42 @@ serverFiles: summary: "Monitoring Stack probe down" description: "The Monitoring Stack probe of {{ $labels.instance }} ({{$labels.env}}) is down" + - name: nynja-blackbox-exported-metrics + rules: + - alert: nynja-app-web_http_duration_seconds + expr: sum(probe_http_duration_seconds{instance="https://35.198.128.143/status"}) by (instance) > 1 + for: 120s + labels: + severity: "major" + annotations: + summary: "nynja-app-web {{ $labels.instance }} response time is above the threshold." + description: "The response time of {{ $labels.instance }} has been above the threshold of 1 second for the past 2 minutes." + - alert: messaging_http_duration_seconds + expr: sum(probe_http_duration_seconds{instance="http://35.198.84.145:8083/mqtt"}) by (instance) > 1 + for: 120s + labels: + severity: "major" + annotations: + summary: "Messaging server {{ $labels.instance }} response time is above the threshold." + description: "The response time of {{ $labels.instance }} has been above the threshold of 1 second for the past 2 minutes." + - alert: calling-service_http_duration_seconds + expr: sum(probe_http_duration_seconds{instance="http://calling-service.callconf.svc.cluster.local:41514/nynjacoin.nccs.shp.StateHolder/Fetch?account-id=alive_probe&instance-id=account%3Aalive_probe&body=Cg91c2VyLWNvbmZlcmVuY2UYAQ%3D%3D"}) by (instance) > 1 + for: 120s + labels: + severity: "major" + annotations: + summary: "Calling-service {{ $labels.instance }} response time is above the threshold." + description: "The response time of {{ $labels.instance }} has been above the threshold of 1 second for the past 2 minutes." + - alert: blackbox_http_probe_success + expr: probe_success == 0 + for: 60s + labels: + severity: "critical" + annotations: + summary: "Endpoint {{ $labels.instance }} not responding." + description: "{{ $labels.instance }} has been unresponsive for the past 1 minute." + + prometheus.yml: rule_files: - /etc/config/rules @@ -1216,6 +1254,154 @@ serverFiles: action: keep regex: cassandra-exporter +# - job_name: nynja_app_web +# metrics_path: /status +# scheme: http +# static_configs: +# - targets: +# - "nynja-app-web.nynja-app.svc.cluster.local:9080" +# labels: +# env: monitoring + + - job_name: blackbox-exporter_200 + scrape_interval: 10s + metrics_path: /probe + params: + module: [http_2xx] + static_configs: + - targets: + # nynja-app-web public IP, the request is over https as we pass username password in the headers for all blackbox exported services that return 200 OK + - "https://35.198.128.143/status" + - "http://calling-service.callconf.svc.cluster.local:41514/nynjacoin.nccs.shp.StateHolder/Fetch?account-id=alive_probe&instance-id=account%3Aalive_probe&body=Cg91c2VyLWNvbmZlcmVuY2UYAQ%3D%3D" + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - source_labels: [__meta_kubernetes_service_annotation_job] + action: keep + - target_label: __address__ + replacement: blackbox-exporter-prometheus-blackbox-exporter:9115 + + - job_name: blackbox-exporter_400 + scrape_interval: 10s + metrics_path: /probe + params: + module: [http_400] + static_configs: + - targets: + - "http://35.198.84.145:8083/mqtt" # Messaging server + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - source_labels: [__meta_kubernetes_service_annotation_job] + action: keep + - target_label: __address__ + replacement: blackbox-exporter-prometheus-blackbox-exporter:9115 + + + - job_name: 'istio-mesh' + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - istio-system + + relabel_configs: + - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: istio-telemetry;prometheus + + - job_name: 'envoy' + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - istio-system + + relabel_configs: + - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: istio-statsd-prom-bridge;statsd-prom + + - job_name: 'istio-policy' + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - istio-system + + + relabel_configs: + - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: istio-policy;http-monitoring + + - job_name: 'istio-telemetry' + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - istio-system + + relabel_configs: + - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: istio-telemetry;http-monitoring + + - job_name: 'pilot' + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - istio-system + + relabel_configs: + - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: istio-pilot;http-monitoring + + - job_name: 'galley' + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - istio-system + + relabel_configs: + - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: istio-galley;http-monitoring networkPolicy: ## Enable creation of NetworkPolicy resources.