AWS - FailedInvocations с экземпляром EC2 - PullRequest
0 голосов
/ 04 февраля 2020

В настоящее время я пытаюсь создать cronjob, используя AWS Fargate и Cloudwatch Events. Но это приводит к FailedInvocations, и я не знаю почему.

Сначала я настроил переменные:

# The AWS region
variable "region" {
  type = string
  default = "eu-central-1"
}

# The application's name
variable "app" {
  type = string
  default = "fargate"
}

# The environment that is being built
variable "environment" {
  type = string
  default = "dev"
}

# The expression for the CloudWatch event
variable "schedule_expression" {
  type = string
  default = "rate(1 minute)"
}

# The tag mutability setting for the repository (defaults to MUTABLE)
variable "image_tag_mutability" {
  type        = string
  default     = "MUTABLE"
}

variable "availability_zones" {
  type = list(string)
  default = [
    "eu-central-1a",
    "eu-central-1b"
  ]
}

variable "task_cpu" {
  type = string
  default = "256"
}

variable "task_memory" {
  type = string
  default = "512"
}

variable "saml_users" {
  type = list(string)
  default = []
}

# locals

locals {
  namespace = "${var.app}-${var.environment}"
  log_group = "/fargate/task/${local.namespace}"
}

Затем я создал роль пользователя, работающую с моей учетной записью saml:

# The user role policy document with SAML identification
data "aws_iam_policy_document" "developers" {
  statement {
    effect = "Allow"
    actions = ["sts:AssumeRoleWithSAML"]

    principals {
      type = "Federated"
      identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:saml-provider/simplesaml"]
    }

    condition {
      test = "StringEquals"
      values = ["https://signin.aws.amazon.com/saml"]
      variable = "SAML:aud"
    }
  }
}

# Create a user role
resource "aws_iam_role" "developers" {
  name                 = "developers"
  description          = "The user role with SAML identification"
  max_session_duration = "43200"
  assume_role_policy   = data.aws_iam_policy_document.developers.json
}

data "aws_iam_policy" "ReadOnlyAccess" {
  arn = "arn:aws:iam::aws:policy/ReadOnlyAccess"
}

resource "aws_iam_role_policy_attachment" "developers_ReadOnlyAccess" {
  policy_arn = data.aws_iam_policy.ReadOnlyAccess.arn
  role       = aws_iam_role.developers.name
}

Также ECR:

# Create an ECR repo at the app/image level
resource "aws_ecr_repository" "app" {
  name                 = var.app
  image_tag_mutability = var.image_tag_mutability
}

# Grant access to saml users
resource "aws_ecr_repository_policy" "app" {
  repository = aws_ecr_repository.app.name
  policy     = data.aws_iam_policy_document.ecr.json
}

# The ECR policies for saml users
data "aws_iam_policy_document" "ecr" {
  statement {
    actions = [
      "ecr:GetDownloadUrlForLayer",
      "ecr:BatchGetImage",
      "ecr:BatchCheckLayerAvailability",
      "ecr:PutImage",
      "ecr:InitiateLayerUpload",
      "ecr:UploadLayerPart",
      "ecr:CompleteLayerUpload",
      "ecr:DescribeRepositories",
      "ecr:GetRepositoryPolicy",
      "ecr:ListImages",
      "ecr:DescribeImages",
      "ecr:DeleteRepository",
      "ecr:BatchDeleteImage",
      "ecr:SetRepositoryPolicy",
      "ecr:DeleteRepositoryPolicy",
      "ecr:GetLifecyclePolicy",
      "ecr:PutLifecyclePolicy",
      "ecr:DeleteLifecyclePolicy",
      "ecr:GetLifecyclePolicyPreview",
      "ecr:StartLifecyclePolicyPreview",
    ]

    principals {
      type = "AWS"

      # Add permission for every saml user since assumed roles can't be wildcard
      identifiers = [
    for saml_user in var.saml_users:
      "arn:aws:sts::${data.aws_caller_identity.current.account_id}:assumed-role/${aws_iam_role.developers.name}/${saml_user}"
      ]
    }
  }
}

# Returns the name of the ECR registry, this will be used later in various scripts
output "docker_registry" {
  value = aws_ecr_repository.app.repository_url
}

Теперь требуется VPS:

resource "aws_vpc" "main" {
  cidr_block = "10.10.0.0/16"
}

# Create private subnets, each in a given AZ
resource "aws_subnet" "private" {
  count             = length(var.availability_zones)
  cidr_block        = cidrsubnet(aws_vpc.main.cidr_block, 8, count.index)
  availability_zone = var.availability_zones[count.index]
  vpc_id            = aws_vpc.main.id
}

# Create public subnets, each in a given AZ
resource "aws_subnet" "public" {
  count                   = length(var.availability_zones)
  cidr_block              = cidrsubnet(aws_vpc.main.cidr_block, 8, length(var.availability_zones) + count.index)
  availability_zone       = var.availability_zones[count.index]
  vpc_id                  = aws_vpc.main.id
  map_public_ip_on_launch = true
}

# IGW for the public subnet
resource "aws_internet_gateway" "gw" {
  vpc_id = aws_vpc.main.id
}

# Route the public subnet traffic through the IGW
resource "aws_route" "internet_access" {
  route_table_id         = aws_vpc.main.main_route_table_id
  destination_cidr_block = "0.0.0.0/0"
  gateway_id             = aws_internet_gateway.gw.id
}

# Create a NAT gateway with an EIP for each private subnet to get internet connectivity
resource "aws_eip" "gw" {
  count      = length(var.availability_zones)
  vpc        = true
  depends_on = [aws_internet_gateway.gw]
}

resource "aws_nat_gateway" "gw" {
  count         = length(var.availability_zones)
  subnet_id     = element(aws_subnet.public.*.id, count.index)
  allocation_id = element(aws_eip.gw.*.id, count.index)
}

# Create a new route table for the private subnets
# And make it route non-local traffic through the NAT gateway to the internet
resource "aws_route_table" "private" {
  count  = length(var.availability_zones)
  vpc_id = aws_vpc.main.id

  route {
    cidr_block = "0.0.0.0/0"
    nat_gateway_id = element(aws_nat_gateway.gw.*.id, count.index)
  }
}

# Explicitely associate the newly created route tables to the private subnets (so they don't default to the main route table)
resource "aws_route_table_association" "private" {
  count          = length(var.availability_zones)
  subnet_id      = element(aws_subnet.private.*.id, count.index)
  route_table_id = element(aws_route_table.private.*.id, count.index)
}

resource "aws_security_group" "sg" {
  name        = local.namespace
  description = "Default security group"
  vpc_id      = aws_vpc.main.id

}

# Allows task to establish connections to all resources
resource "aws_security_group_rule" "ecs_task_egress_rule" {
  description = "Allows task to establish connections to all resources"
  type        = "egress"
  from_port   = "0"
  to_port     = "0"
  protocol    = "-1"
  cidr_blocks = ["0.0.0.0/0"]

  security_group_id = aws_security_group.sg.id
}

Наконец, кластер ECS и события Cloudwatch:

resource "aws_ecs_cluster" "cluster" {
  name = "cluster"
}

resource "aws_ecs_task_definition" "cron" {
  family                   = "cron"
  network_mode             = "awsvpc"
  requires_compatibilities = ["FARGATE"]
  execution_role_arn       = aws_iam_role.ecs-tasks.arn
  cpu                      = var.task_cpu
  memory                   = var.task_memory

  container_definitions    = <<DEFINITION
[
  {
    "image": "${aws_ecr_repository.app.repository_url}",
    "name": "app",
    "cpu": ${var.task_cpu},
    "memory": ${var.task_memory},
    "networkMode": "awsvpc",
    "portMappings": []
  }
]
DEFINITION
}

resource "aws_ecs_service" "service" {
  name            = "service"
  cluster         = aws_ecs_cluster.cluster.id
  task_definition = aws_ecs_task_definition.cron.arn
  desired_count   = 0
  launch_type     = "FARGATE"

  network_configuration {
    security_groups = [aws_security_group.sg.id]
    subnets         = [
      for subnet in aws_subnet.private:
    subnet.id
    ]
  }
}

# Allow task execution role to be assumed by ecs
data "aws_iam_policy_document" "ecs-tasks_assume_role_policy" {
  statement {
    actions = ["sts:AssumeRole"]

    principals {
      type = "Service"
      identifiers = ["ecs-tasks.amazonaws.com"]
    }
  }
}

# ECS Tasks role
resource "aws_iam_role" "ecs-tasks" {
  name               = "${local.namespace}-ecs"
  assume_role_policy = data.aws_iam_policy_document.ecs-tasks_assume_role_policy.json
}

# Allow task execution role to work with ecr and cw logs
resource "aws_iam_role_policy_attachment" "ecs-tasks_AmazonECSTaskExecutionRolePolicy" {
  role = aws_iam_role.ecs-tasks.name
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}

# Events execution role
resource "aws_iam_role" "events" {
  name               = "${local.namespace}-events"
  assume_role_policy = data.aws_iam_policy_document.events_assume_role_policy.json
}

# Allow events role to be assumed by events service
data "aws_iam_policy_document" "events_assume_role_policy" {
  statement {
    actions = ["sts:AssumeRole"]

    principals {
      type = "Service"
      identifiers = ["events.amazonaws.com"]
    }
  }
}

# Setup a scheduled task
resource "aws_cloudwatch_event_rule" "scheduled_task" {
  is_enabled          = true
  name                = local.namespace
  description         = "Runs fargate task ${local.namespace}: ${var.schedule_expression}"
  schedule_expression = var.schedule_expression
}

# Setup the target for the scheduled task
resource "aws_cloudwatch_event_target" "scheduled_task" {
  rule = aws_cloudwatch_event_rule.scheduled_task.name
  target_id = local.namespace
  arn = aws_ecs_cluster.cluster.arn
  role_arn = aws_iam_role.events.arn
  ecs_target {
    task_count = 1
    task_definition_arn = aws_ecs_task_definition.cron.arn
  }
}

Единственный способ запуска Служба поминутно, чтобы установить требуемый_счет на 1, но я хочу установить 5-минутный крон.

...