Thank you for reading this post, don't forget to subscribe!
появилась задача мониторить когда истекают valut token которые мы создавали вручную и используем для подписания транзакций в приложении.
решили что сделаем свой экспортёр а алерты будем отправлять в slack.
использовать будем terraform module чтобы было проще разворачивать.
модуль:
/TEST/infra/terraform-module/vault-exporter/main.tf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
terraform { required_version = ">= 0.13" required_providers { kubectl = { source = "gavinbunney/kubectl" version = ">= 1.14.0" } } } data "aws_secretsmanager_secret" "vault_root_token" { name = var.secret_name_vault_token } data "aws_secretsmanager_secret_version" "vault_root_token" { secret_id = data.aws_secretsmanager_secret.vault_root_token.id } resource "kubectl_manifest" "configmap" { yaml_body = templatefile("${path.module}/templates/configmap.yaml.tpl", { exporter_script = indent(4, file("${path.module}/templates/exporter.py")), namespace = var.namespace }) } resource "kubectl_manifest" "secret" { yaml_body = templatefile("${path.module}/templates/secret.yaml.tpl", { vault_addr = base64encode(var.vault_addr), vault_token = base64encode(data.aws_secretsmanager_secret_version.vault_root_token.secret_string), namespace = var.namespace }) } resource "kubectl_manifest" "deployment" { yaml_body = templatefile("${path.module}/templates/deployment.yaml.tpl", { namespace = var.namespace }) } resource "kubectl_manifest" "service" { yaml_body = templatefile("${path.module}/templates/service.yaml.tpl", { namespace = var.namespace }) } resource "kubectl_manifest" "servicemonitor" { yaml_body = templatefile("${path.module}/templates/servicemonitor.yaml.tpl", { namespace = var.namespace }) } |
/TEST/infra/terraform-module/vault-exporter/variables.tf
1 2 3 4 5 6 7 8 9 10 11 12 |
variable "namespace" { description = "The namespace for the resources" } variable "vault_addr" { description = "The address of the Vault server" } variable "secret_name_vault_token" { description = "The name of the secret in AWS Secrets Manager for Vault token" } |
/TEST/infra/terraform-module/vault-exporter/templates/configmap.yaml.tpl
1 2 3 4 5 6 7 8 9 |
apiVersion: v1 kind: ConfigMap metadata: name: vault-metrics-exporter-config namespace: ${namespace} data: exporter.py: | ${exporter_script} |
/TEST/infra/terraform-module/vault-exporter/templates/deployment.yaml.tpl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
apiVersion: apps/v1 kind: Deployment metadata: name: vault-metrics-exporter namespace: ${namespace} spec: replicas: 1 selector: matchLabels: app: vault-metrics-exporter template: metadata: labels: app: vault-metrics-exporter spec: containers: - name: vault-metrics-exporter image: python:3.12-slim command: ["sh", "-c"] args: ["pip install prometheus_client requests python-dateutil && python /app/exporter.py"] env: - name: VAULT_ADDR valueFrom: secretKeyRef: name: vault-secrets key: VAULT_ADDR - name: VAULT_TOKEN valueFrom: secretKeyRef: name: vault-secrets key: VAULT_TOKEN - name: POLLING_INTERVAL_SECONDS value: "120" - name: EXPORTER_PORT value: "9877" volumeMounts: - name: app-volume mountPath: /app ports: - containerPort: 9877 name: http livenessProbe: tcpSocket: port: 9877 initialDelaySeconds: 30 periodSeconds: 20 timeoutSeconds: 5 failureThreshold: 3 successThreshold: 1 readinessProbe: tcpSocket: port: 9877 initialDelaySeconds: 10 periodSeconds: 20 timeoutSeconds: 5 failureThreshold: 3 successThreshold: 1 resources: requests: memory: "100Mi" cpu: "50m" limits: memory: "100Mi" cpu: "100m" volumes: - name: app-volume configMap: name: vault-metrics-exporter-config |
/TEST/infra/terraform-module/vault-exporter/templates/secret.yaml.tpl
1 2 3 4 5 6 7 8 9 10 |
apiVersion: v1 kind: Secret metadata: name: vault-secrets namespace: ${namespace} type: Opaque data: VAULT_ADDR: ${vault_addr} VAULT_TOKEN: ${vault_token} |
/TEST/infra/terraform-module/vault-exporter/templates/service.yaml.tpl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
apiVersion: v1 kind: Service metadata: name: vault-metrics-exporter namespace: ${namespace} labels: app: vault-metrics-exporter spec: ports: - name: http port: 9877 targetPort: 9877 selector: app: vault-metrics-exporter |
/TEST/infra/terraform-module/vault-exporter/templates/servicemonitor.yaml.tpl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: vault-metrics-exporter namespace: ${namespace} labels: release: kube-prometheus-stack spec: selector: matchLabels: app: vault-metrics-exporter endpoints: - port: http path: /metrics |
сам скрипт, он исключает из списка root approle и сегодняшнюю дату
/TEST/infra/terraform-module/vault-exporter/templates/exporter.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import os import time import logging import requests from prometheus_client import start_http_server, Gauge from datetime import datetime, timedelta, timezone from dateutil import parser class VaultMetricsExporter: def __init__(self, vault_addr, vault_token, polling_interval_seconds=120): self.vault_addr = vault_addr self.vault_token = vault_token self.polling_interval_seconds = polling_interval_seconds # Настройка логгера self.logger = logging.getLogger('VaultMetricsExporter') self.logger.setLevel(logging.INFO) handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) self.logger.addHandler(handler) # Prometheus метрики self.vault_expire_token_date = Gauge('vault_expire_token_date', 'Vault token expiration date', ['display_name', 'accessor']) def run_metrics_loop(self): """Цикл для получения метрик""" while True: self.fetch_metrics() time.sleep(self.polling_interval_seconds) def fetch_metrics(self): """Получение метрик из Vault и обновление Prometheus метрик""" try: self.logger.info("Fetching metrics from Vault") all_tokens = self.get_all_tokens() current_date = datetime.now(timezone.utc) for token in all_tokens: token_info = self.get_token_info(token) expire_time = token_info.get('expire_time') display_name = token_info.get('display_name') # Проверяем, если expire_time и display_name существуют, # display_name не 'root' или 'approle', и display_name не содержит 'jwt-google-oauth2' if expire_time and display_name and display_name not in ['root', 'approle'] and 'jwt-google-oauth2' not in display_name: expire_date = parser.parse(expire_time) # Проверка истек ли срок действия токена более чем 2 часа if expire_date < current_date - timedelta(hours=2): self.logger.info(f"Skipping expired token: {display_name}") continue expire_timestamp = int(expire_date.timestamp()) self.vault_expire_token_date.labels(display_name=display_name, accessor=token).set(expire_timestamp) # Логирование успешного опроса self.logger.info("Vault polled successfully") except Exception as e: self.logger.error(f"Error polling Vault: {e}") def convert_to_timestamp(self, time_str): """Конвертация времени в таймстамп""" try: expire_time = parser.parse(time_str) return int(expire_time.timestamp()) except ValueError: expire_time = parser.parse(time_str) return int(expire_time.timestamp()) def get_all_tokens(self): """Получение всех токенов""" response = requests.get( url=f"{self.vault_addr}/v1/auth/token/accessors", headers={"X-Vault-Token": self.vault_token}, params={"list": "true"} ) response.raise_for_status() return response.json().get('data', {}).get('keys', []) def get_token_info(self, accessor): """Получение информации о токене по accessor""" response = requests.post( url=f"{self.vault_addr}/v1/auth/token/lookup-accessor", headers={"X-Vault-Token": self.vault_token}, json={"accessor": accessor} ) response.raise_for_status() return response.json().get('data', {}) def main(): """Основная точка входа""" vault_addr = os.getenv('VAULT_ADDR') vault_token = os.getenv('VAULT_TOKEN') polling_interval_seconds = int(os.getenv('POLLING_INTERVAL_SECONDS', '120')) exporter_port = int(os.getenv('EXPORTER_PORT', '9877')) logging.basicConfig(level=logging.INFO) logger = logging.getLogger('Main') logger.info(f"Starting Vault Metrics Exporter with VAULT_ADDR={vault_addr} and POLLING_INTERVAL_SECONDS={polling_interval_seconds}") exporter = VaultMetricsExporter( vault_addr=vault_addr, vault_token=vault_token, polling_interval_seconds=polling_interval_seconds ) start_http_server(exporter_port) logger.info(f"HTTP server started on port {exporter_port}") exporter.run_metrics_loop() if __name__ == "__main__": main() |
запускаем мы это вот так:
/TEST/infra/infrastructure/aws/test-dev/vault.tf
1 2 3 4 5 6 7 |
module "vault_metrics_exporter" { source = "../../../terraform-module/vault-exporter" namespace = "vault" vault_addr = "https://vault.dev.test.ru" secret_name_vault_token = "dev/vault_initial_root_token" } |
мы задаём namespace адрес волта и секрет в котором у нас находится root token
так же создаём алерт в alertmanager
/TEST/infra/infrastructure/aws/test-dev/templates/kube-prometheus-stack-values.yaml.tmpl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
- name: VaultTokenExpire rules: - alert: VaultTokenExpireMonth expr: vault_expire_token_date - time() < 2592000 for: 5m labels: severity: warning annotations: summary: "Vault token {{ $labels.display_name }} is expiring in less than a month" description: | Vault token {{ $labels.display_name }} with accessor {{ $labels.accessor }} in namespace {{ $labels.namespace }} is expiring in less than 30 days. {{- if $value }} Expiration date: {{ with query "vault_expire_token_date" }}{{ . | first | value | humanizeTimestamp }}{{ end }} The token {{ $labels.display_name }} has time left until expiration: {{ humanizeDuration $value }} {{- else }} Expiration date: N/A {{- end }} - alert: VaultTokenExpireDays expr: vault_expire_token_date - time() < 172800 for: 5m labels: severity: critical annotations: summary: "Vault token {{ $labels.display_name }} is expiring in less than 2 days" description: | Vault token {{ $labels.display_name }} with accessor {{ $labels.accessor }} in namespace {{ $labels.namespace }} is expiring in less than 2 days. {{- if $value }} Expiration date: {{ with query "vault_expire_token_date" }}{{ . | first | value | humanizeTimestamp }}{{ end }} The token {{ $labels.display_name }} has time left until expiration: {{ humanizeDuration $value }} {{- else }} Expiration date: N/A {{- end }} |