Thank you for reading this post, don't forget to subscribe!
есть вот такой модуль для rabbitmq
infra/terraform/modules/service-apps/rabbitmq.tf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
locals { rabbit = merge(var.service_apps[index(var.service_apps.*.chart, "rabbitmq")], var.rabbitmq) rabbit_auth_password = local.rabbit.enabled ? data.aws_secretsmanager_secret_version.rabbit_password_version[0].secret_string : "" rabbit_values = <<VALUES global: storageClass: ${local.rabbit.global_storage_class} image: registry: registry.infra.test.tech repository: docker/bitnami/rabbitmq tag: ${local.rabbit.image_tag} auth: username: "${local.rabbit.rabbit_user_name}" password: "${local.rabbit_auth_password}" clustering: enabled: ${local.rabbit.clustering} replicaCount: ${local.rabbit.replica_count} pdb: create: ${local.rabbit.pdb_create} minAvailable: 0 maxUnavailable: 1 nodeSelector: ${indent(2, yamlencode(local.rabbit.node_selector))} tolerations: ${indent(2, yamlencode(local.rabbit.tolerations))} resources: ${indent(2, yamlencode(local.rabbit.resources))} affinity: ${indent(2, yamlencode(local.rabbit.affinity))} ingress: enabled: true hostname: "${local.rabbit.rabbitmq_dns_name}" ingressClassName: nginx-int annotations: nginx.ingress.kubernetes.io/proxy-body-size: "128m" nginx.ingress.kubernetes.io/proxy-send-timeout: "7200" nginx.ingress.kubernetes.io/proxy-read-timeout: "7200" existingSecret: ${local.rabbit.rabbitmq_tls_secret} tls: hosts: - ${local.rabbit.rabbitmq_dns_name} persistence: enabled: true storageClass: "gp3" accessModes: - ReadWriteOnce mountPath: /opt/bitnami/rabbitmq/.rabbitmq/mnesia size: ${local.rabbit.persistence_size} metrics: enabled: true serviceMonitor: enabled: true VALUES } data "aws_secretsmanager_secret_version" "rabbit_password_version" { count = local.rabbit.enabled ? 1 : 0 secret_id = var.secret_id[local.rabbit.secret_name] } resource "helm_release" "rabbitmq" { count = local.rabbit.enabled ? 1 : 0 name = local.rabbit.name chart = local.rabbit.chart repository = local.rabbit.repository version = local.rabbit.chart_version namespace = local.rabbit.namespace max_history = local.rabbit.max_history values = [local.rabbit_values] } |
infra/terraform/modules/service-apps/main.tf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
provider "kubernetes" { host = data.aws_eks_cluster.main.endpoint cluster_ca_certificate = base64decode(data.aws_eks_cluster.main.certificate_authority.0.data) token = data.aws_eks_cluster_auth.main.token } provider "kubectl" { host = data.aws_eks_cluster.main.endpoint cluster_ca_certificate = base64decode(data.aws_eks_cluster.main.certificate_authority.0.data) token = data.aws_eks_cluster_auth.main.token } provider "helm" { kubernetes { host = data.aws_eks_cluster.main.endpoint cluster_ca_certificate = base64decode(data.aws_eks_cluster.main.certificate_authority.0.data) token = data.aws_eks_cluster_auth.main.token } experiments { manifest = false } } data "aws_eks_cluster" "main" { name = var.eks_cluster_id } data "aws_eks_cluster_auth" "main" { name = var.eks_cluster_id } data "aws_caller_identity" "current" {} data "aws_secretsmanager_secret_version" "vault" { secret_id = var.secret_id[local.vault.vault_secret_name] } provider "vault" { address = "https://${local.vault.vault_dns}" token = data.aws_secretsmanager_secret_version.vault.secret_string } |
infra/terraform/modules/service-apps/variables.tf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
variable "region" { description = "Target region for all infrastructure resources" default = "" } variable "project_name" { type = string description = "Project name" } variable "environment" { description = "Environment name" default = "" } variable "account_id" { type = string description = "Account ID" } variable "eks_cluster_id" { type = string description = "ID of the created EKS cluster." } variable "eks_cluster_endpoint" { type = string description = "Endpoint for EKS control plane." } variable "oidc_provider_arn" { description = "The ARN of the OIDC Provider" default = "" } variable "oidc_provider" { description = "The OIDC Provider" default = "" } variable "vpc_cidr" { type = string default = "10.0.0.0/16" description = "CIDR block of the VPC" } variable "public_subnets" { type = string default = "" description = "Public subnets of the VPC" } variable "private_subnets" { type = string default = "" description = "Private subnets of the VPC" } variable "grafana_endpoint" { type = string default = "" description = "Grafana endpoint" } variable "victoria_metrics_endpoint" { type = string default = "" description = "VM endpoint" } variable "helm_release_history_size" { description = "How much helm releases to store" default = 5 } variable "service_apps" { description = "Base infra apps list" type = any } variable "external_secrets" { type = any default = {} description = "External secrets configuration" } variable "rabbitmq" { type = any default = {} description = "Cert manager configuration" } variable "redis" { type = any default = {} description = "Redis configuration" } variable "redis_haproxy" { type = any default = {} description = "Redis haproxy configuration" } variable "vault" { type = any default = {} description = "Vault configuration" } variable "jaeger" { type = any default = {} description = "Jaeger configuration" } variable "vault_iam_role_arn" { type = map(string) default = {} description = "Vault IAM role ARN" } variable "secret_id" { type = map(string) default = {} description = "Secret ID" } variable "rds_address" { type = map(string) default = {} description = "RDS address" } variable "apps" { description = "Apps list" type = any } variable "vault_mounts" { description = "Vault mounts" type = any } variable "vault_policies" { description = "Vault policies" type = any } |
infra/terragrunt/pre/eu-central-1/pre/service-apps-dev/terragrunt.hcl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
include "root" { path = find_in_parent_folders() expose = true merge_strategy = "deep" } include "env_dev" { path = find_in_parent_folders("env_dev.hcl") expose = true merge_strategy = "deep" } include "vpc" { path = find_in_parent_folders("vpc.hcl") expose = true merge_strategy = "deep" } include "vault" { path = find_in_parent_folders("vault_dev.hcl") expose = true merge_strategy = "deep" } include "service_apps_dev" { path = find_in_parent_folders("service_apps_dev.hcl") expose = true merge_strategy = "deep" } include "apps" { path = find_in_parent_folders("apps.hcl") expose = true merge_strategy = "deep" } terraform { source = "${get_path_to_repo_root()}/terraform/modules//service-apps" } dependency "eks" { config_path = "../common/aws-eks" mock_outputs_allowed_terraform_commands = ["init", "validate", "plan", "destroy"] mock_outputs = { eks_cluster_id = "test" eks_oidc_provider = "oidc.eks.us-east-1.amazonaws.com/id/D55EEBDFE5510B81EEE2381B88888888" eks_oidc_provider_arn = "arn:aws:iam::11111111:oidc-provider/oidc.eks.us-east-1.amazonaws.com/id/D55EEBDFE5510B81EEE2381B88888888" node_group_default_iam_role_arn = "arn::" node_group_default_iam_role_name = "test" eks_node_security_group_id = "sg-0f5b1b5f788888888" } } dependency "vpc" { config_path = "../common/aws-vpc" mock_outputs_allowed_terraform_commands = ["init", "validate", "plan", "destroy"] mock_outputs = { vpc_id = "vpc-0f5b1b5f788888888" vpc_cidr = "10.0.0.0/16" private_subnets = ["10.0.0.0/16"] public_subnets = ["10.0.0.0/16"] } } dependency "rds" { config_path = "../rds" mock_outputs_allowed_terraform_commands = ["init", "validate", "plan", "destroy", "import", "state", "apply"] mock_outputs = { db_instance_address = { test-preprod-db = "db.ckv.eu-central-1.rds.amazonaws.com" } } } dependency "apps_components" { config_path = "../apps-components" } generate "providers_versions" { path = "versions.tf" if_exists = "overwrite" contents = <<EOF terraform { required_version = ">= 1.7.0" required_providers { aws = { source = "hashicorp/aws" version = "${include.root.locals.tf_providers.aws}" } kubernetes = { source = "hashicorp/kubernetes" version = "${include.root.locals.tf_providers.kubernetes}" } kubectl = { source = "gavinbunney/kubectl" version = "${include.root.locals.tf_providers.kubectl}" } helm = { source = "hashicorp/helm" version = "${include.root.locals.tf_providers.helm}" } http = { source = "hashicorp/http" version = "${include.root.locals.tf_providers.http}" } vault = { source = "hashicorp/vault" version = "${include.root.locals.tf_providers.vault}" } } } EOF } inputs = { region = include.env_dev.locals.region project_name = include.env_dev.locals.project_name environment = include.env_dev.locals.environment name = include.env_dev.locals.name apps = include.apps.locals.apps service_apps = include.service_apps_dev.locals.service_apps account_id = get_aws_account_id() eks_cluster_id = dependency.eks.outputs.eks_cluster_id eks_cluster_endpoint = dependency.eks.outputs.eks_cluster_endpoint oidc_provider = dependency.eks.outputs.eks_oidc_provider oidc_provider_arn = dependency.eks.outputs.eks_oidc_provider_arn vpc_id = dependency.vpc.outputs.vpc_id vpc_cidr = include.vpc.locals.vpc.cidr private_subnets = dependency.vpc.outputs.private_subnets public_subnets = dependency.vpc.outputs.public_subnets security_groups = [dependency.eks.outputs.eks_node_security_group_id] project_name = include.env_dev.locals.project_name rds_address = dependency.rds.outputs.db_instance_address secret_id = dependency.apps_components.outputs.extra_secrets_ids vault_iam_role_arn = dependency.apps_components.outputs.aws_service_accounts_role_arn vault_mounts = include.vault.locals.mounts vault_policies = include.vault.locals.policies } |
infra/terragrunt/pre/eu-central-1/pre/service_apps_dev.hcl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
locals { #---------------------------------------------------------------------------------- # Service apps #---------------------------------------------------------------------------------- service_apps = [ #-------------------------------------------------------------------------------- # RabbitMQ #-------------------------------------------------------------------------------- { name = "rabbitmq" enabled = true chart = "rabbitmq" repository = "https://charts.bitnami.com/bitnami" chart_version = "14.4.5" image_tag = "3.13.4-debian-12-r0" namespace = "rabbitmq-dev" max_history = 3 rabbit_user_name = "admin" secret_name = "dev_rabbitmq_admin_password" rabbitmq_dns_name = "rabbit.dev.test.dev" rabbitmq_tls_secret = "tls-rabbitmq" global_storage_class = "gp3" clustering = false replica_count = 1 pdb_create = false persistence_size = "1Gi" affinity = { nodeAffinity = { requiredDuringSchedulingIgnoredDuringExecution = { nodeSelectorTerms = [ { matchExpressions = [ { key = "topology.kubernetes.io/zone" operator = "In" values = ["eu-central-1c"] } ] } ] } } } node_selector = { dedication = "infra-ondemand" provisioning = "karpenter" } tolerations = [ { key = "dedicated" operator = "Equal" value = "infra-ondemand" effect = "NoSchedule" } ] resources = { requests = { cpu = "100m" memory = "200Mi" } limits = { cpu = "500m" memory = "500Mi" } } } ] } |
чтобы нормально обновиться до 15.5.3 нужно поправить values
infra/terraform/modules/service-apps/rabbitmq.tf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
locals { rabbit = merge(var.service_apps[index(var.service_apps.*.chart, "rabbitmq")], var.rabbitmq) rabbit_auth_password = local.rabbit.enabled ? data.aws_secretsmanager_secret_version.rabbit_password_version[0].secret_string : "" rabbit_values = <<VALUES global: defaultStorageClass: ${local.rabbit.global_storage_class} security: allowInsecureImages: true image: registry: registry.infra.test.tech repository: docker/bitnami/rabbitmq tag: ${local.rabbit.image_tag} auth: username: "${local.rabbit.rabbit_user_name}" password: "${local.rabbit_auth_password}" securePassword: false clustering: enabled: ${local.rabbit.clustering} replicaCount: ${local.rabbit.replica_count} pdb: create: ${local.rabbit.pdb_create} minAvailable: 0 maxUnavailable: 1 nodeSelector: ${indent(2, yamlencode(local.rabbit.node_selector))} tolerations: ${indent(2, yamlencode(local.rabbit.tolerations))} resources: ${indent(2, yamlencode(local.rabbit.resources))} affinity: ${indent(2, yamlencode(local.rabbit.affinity))} ingress: enabled: true hostname: "${local.rabbit.rabbitmq_dns_name}" ingressClassName: nginx-int annotations: nginx.ingress.kubernetes.io/proxy-body-size: "128m" nginx.ingress.kubernetes.io/proxy-send-timeout: "7200" nginx.ingress.kubernetes.io/proxy-read-timeout: "7200" existingSecret: ${local.rabbit.rabbitmq_tls_secret} tls: hosts: - ${local.rabbit.rabbitmq_dns_name} persistence: enabled: true storageClass: "gp3" accessModes: - ReadWriteOnce mountPath: /opt/bitnami/rabbitmq/.rabbitmq/mnesia size: ${local.rabbit.persistence_size} metrics: enabled: true serviceMonitor: enabled: true usePasswordFiles: false VALUES } data "aws_secretsmanager_secret_version" "rabbit_password_version" { count = local.rabbit.enabled ? 1 : 0 secret_id = var.secret_id[local.rabbit.secret_name] } resource "helm_release" "rabbitmq" { count = local.rabbit.enabled ? 1 : 0 name = local.rabbit.name chart = local.rabbit.chart repository = local.rabbit.repository version = local.rabbit.chart_version namespace = local.rabbit.namespace max_history = local.rabbit.max_history values = [local.rabbit_values] } |
так как у меня свой registry, нужно разрешить его использовать
1 2 3 4 |
global: storageClass: "gp3" security: allowInsecureImages: true |
и перенести pbd
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
master: count: 1 disableCommands: [] pdb: create: ${local.redis.pdb_create} minAvailable: 0 maxUnavailable: 1 replica: replicaCount: ${local.redis.replica_replicacount} disableCommands: [] pdb: create: ${local.redis.pdb_create} minAvailable: 0 maxUnavailable: 1 |
поправить storage class
1 2 3 4 |
global: defaultStorageClass: ${local.rabbit.global_storage_class} security: allowInsecureImages: true |
и
1 2 |
auth: securePassword: false |
так же
1 2 |
usePasswordFiles: false |
так же меняем версию чарта, тэг образа и репозиторий с
repository = "https://charts.bitnami.com/bitnami"
на
repository = "oci://registry-1.docker.io/bitnamicharts"
infra/terragrunt/pre/eu-central-1/pre/service_apps_dev.hcl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
locals { #---------------------------------------------------------------------------------- # Service apps #---------------------------------------------------------------------------------- service_apps = [ #-------------------------------------------------------------------------------- # RabbitMQ #-------------------------------------------------------------------------------- { name = "rabbitmq" enabled = true chart = "rabbitmq" repository = "oci://registry-1.docker.io/bitnamicharts" chart_version = "15.5.3" image_tag = "4.0.9-debian-12-r1" namespace = "rabbitmq-dev" max_history = 3 rabbit_user_name = "admin" secret_name = "dev_rabbitmq_admin_password" rabbitmq_dns_name = "rabbit.dev.test.dev" rabbitmq_tls_secret = "tls-rabbitmq" global_storage_class = "gp3" clustering = false replica_count = 1 pdb_create = false persistence_size = "1Gi" affinity = { nodeAffinity = { requiredDuringSchedulingIgnoredDuringExecution = { nodeSelectorTerms = [ { matchExpressions = [ { key = "topology.kubernetes.io/zone" operator = "In" values = ["eu-central-1c"] } ] } ] } } } node_selector = { dedication = "infra-ondemand" provisioning = "karpenter" } tolerations = [ { key = "dedicated" operator = "Equal" value = "infra-ondemand" effect = "NoSchedule" } ] resources = { requests = { cpu = "100m" memory = "200Mi" } limits = { cpu = "500m" memory = "500Mi" } } } ] } |
перед обновлением ОБЯЗАТЕЛЬНО нужно включить фича флаг restart_streams
для надёжности лучше включить следующие фича флаги
1 2 3 4 5 6 |
rabbitmqctl enable_feature_flag restart_streams rabbitmqctl enable_feature_flag message_containers rabbitmqctl enable_feature_flag message_containers_deaths_v2 rabbitmqctl enable_feature_flag stream_filtering rabbitmqctl enable_feature_flag stream_sac_coordinator_unblock_group rabbitmqctl enable_feature_flag stream_update_config_command |
включить можно внутри контейнера rabbitmq
у меня ещё возникла проблема при включении
missing_clustered_nodes
она возникла потому что мы скейлили кластер из 3х нод в 1 ноду. но в самом рэббите эти ноды просто стали отображаться как недоступные.
чтобы это поправить нужно их вывести из кластера
1 2 3 |
rabbitmqctl forget_cluster_node rabbit@rabbitmq-1.rabbitmq-headless.rabbitmq.svc.cluster.local rabbitmqctl forget_cluster_node rabbit@rabbitmq-2.rabbitmq-headless.rabbitmq.svc.cluster.local |
после этого и флаги включились и обновление прошло. а если не включать флаги в логах рэббита получаем ошибку:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
rabbitmq 05:41:50.83 INFO ==> ** Starting RabbitMQ ** 2025-04-29 05:42:03.510037+00:00 [error] <0.216.0> Feature flags: `restart_streams`: required feature flag not enabled! It must be enabled before upgrading RabbitMQ. 2025-04-29 05:42:03.567214+00:00 [error] <0.216.0> Failed to initialize feature flags registry: {disabled_required_feature_flag, 2025-04-29 05:42:03.567214+00:00 [error] <0.216.0> restart_streams} 2025-04-29 05:42:03.593682+00:00 [error] <0.216.0> 2025-04-29 05:42:03.593682+00:00 [error] <0.216.0> BOOT FAILED 2025-04-29 05:42:03.593682+00:00 [error] <0.216.0> =========== 2025-04-29 05:42:03.593682+00:00 [error] <0.216.0> Error during startup: {error,failed_to_initialize_feature_flags_registry} 2025-04-29 05:42:03.593682+00:00 [error] <0.216.0> BOOT FAILED =========== Error during startup: {error,failed_to_initialize_feature_flags_registry} 2025-04-29 05:42:04.597407+00:00 [notice] <0.45.0> Application rabbit exited with reason: {failed_to_initialize_feature_flags_registry,{rabbit,start,[normal,[]]}} {exit,terminating,[{application_controller,call,2,[{file,"application_controller.erl"},{line,511}]},{application,'-ensure_all_started/3-lc$^0/1-0-',1,[{file,"application.erl"},{line,367}]},{application,ensure_all_started,3,[{file,"application.erl"},{line,367}]},{rabbit,'-start_it/1-fun-0-',1,[{file,"rabbit.erl"},{line,425}]},{timer,tc,2,[{file,"timer.erl"},{line,595}]},{rabbit,start_it,1,[{file,"rabbit.erl"},{line,421}]},{init,start_it,1,[]},{init,start_em,1,[]}]} 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> crasher: 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> initial call: application_master:init/3 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> pid: <0.215.0> 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> registered_name: [] 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> exception exit: {failed_to_initialize_feature_flags_registry, 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> {rabbit,start,[normal,[]]}} 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> in function application_master:init/3 (application_master.erl, line 143) 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> ancestors: [application_controller,<0.10.0>] 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> message_queue_len: 1 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> messages: [{'EXIT',<0.216.0>,normal}] 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> links: [<0.45.0>] 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> dictionary: [] 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> trap_exit: true 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> status: running 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> heap_size: 233 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> stack_size: 29 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> reductions: 67 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> neighbours: 2025-04-29 05:42:04.597695+00:00 [error] <0.215.0> Kernel pid terminated (application_controller) ("{application_start_failure,rabbit,{failed_to_initialize_feature_flags_registry,{rabbit,start,[normal,[]]}}}") Crash dump is being written to: erl_crash.dump.. |