Terraform EMR изменяет размеры, разрушает весь кластер - PullRequest
0 голосов
/ 03 мая 2018

Я развернул кластер EMR с помощью terraform, и он работает нормально. Я вращаю 3 типа экземпляров, а именно CORE, MASTER, TASK. Теперь мне нужно изменить размер группы экземпляров TASK с помощью terraform, но это, похоже, проблема, поскольку она в основном разрушает весь кластер, а затем восстанавливает все это.

Вот модуль:

data "template_file" "emr_configurations" {
  template = "${file("configurations/default.json")}"
}

module "emr" {
source = "terraform-aws-emr-cluster/"

name          = "testclustersetup"
vpc_id        = "vpc-b231a9d9"
release_label = "emr-5.13.0"

applications = [
  "Hadoop",
  "Ganglia",
  "Spark",
  "Zeppelin",
]

configurations = "${data.template_file.emr_configurations.rendered}"
key_name       = "jumpbox"
subnet_id      = "subnet-3a3db747"

instance_groups = [
  {
    name           = "MasterInstanceGroup"
    instance_role  = "MASTER"
    instance_type  = "m3.xlarge"
    instance_count = "1"
  },
  {
    name           = "CoreInstanceGroup"
    instance_role  = "TASK"
    instance_type  = "m3.xlarge"
    instance_count = "3"
    bid_price      = "0.30"
    autoscaling_policy = <<EOF
{
"Constraints": {
"MinCapacity": 1,
"MaxCapacity": 4
},
"Rules": [
{
  "Name": "ScaleOutMemoryPercentage",
  "Description": "Scale out if YARNMemoryAvailablePercentage is less than 15",
  "Action": {
    "SimpleScalingPolicyConfiguration": {
      "AdjustmentType": "CHANGE_IN_CAPACITY",
      "ScalingAdjustment": 1,
      "CoolDown": 300
    }
  },
  "Trigger": {
    "CloudWatchAlarmDefinition": {
      "ComparisonOperator": "LESS_THAN",
      "EvaluationPeriods": 1,
      "MetricName": "YARNMemoryAvailablePercentage",
      "Namespace": "AWS/ElasticMapReduce",
      "Period": 300,
      "Statistic": "AVERAGE",
      "Threshold": 15.0,
      "Unit": "PERCENT"
    }
  }
}
]
}
EOF
  },
  {
    name           = "CoreInstanceGroup"
    instance_role  = "CORE"
    instance_type  = "m3.xlarge"
    instance_count = 1
    bid_price      = "0.30"
  },
]

##master_node_type = "m3.xlarge"
##core_node_type   = "m3.xlarge"
##core_node_count  = 2


#bootstrap_name = "runif"
#bootstrap_uri  = "s3://handmade-s3-emr/bootstrap-actions/run-if"
#bootstrap_args = ["instance.isMaster=true", "echo running on master node"]

log_uri        = "s3n://handmade-s3-emr/"

project     = "Test"
environment = "Staging"
}

Теперь изменение, которое я выполняю, заключается в количестве экземпляров группы экземпляров TASK. Может кто-нибудь помочь мне с тем, как я могу изменить предположить, что количество экземпляров 3-> 4?

Файл main.tf из приведенного выше файла выглядит как

resource "aws_emr_cluster" "cluster" {
  name           = "${var.name}"
  release_label  = "${var.release_label}"
  applications   = "${var.applications}"
  configurations = "${var.configurations}"

  ec2_attributes {
    key_name                          = "${var.key_name}"
    subnet_id                         = "${var.subnet_id}"
    emr_managed_master_security_group = "${aws_security_group.emr_master.id}"
    emr_managed_slave_security_group  = "${aws_security_group.emr_slave.id}"
    instance_profile                  = "${aws_iam_instance_profile.emr_ec2_instance_profile.name}"
    ###instance_profile                  = "${aws_iam_instance_profile.emr_profile.name}"
    service_access_security_group     = "${aws_security_group.emr_service_access.id}"
  }
################################################
instance_group = "${var.instance_groups}"

#####################################################
##  master_instance_type = "${var.master_node_type}"
##  core_instance_type   = "${var.core_node_type}"
##  core_instance_count  = "${var.core_node_count}"

  scale_down_behavior = "TERMINATE_AT_TASK_COMPLETION"
#  bootstrap_action {
#    path = "${var.bootstrap_uri}"
#    name = "${var.bootstrap_name}"
#    args = "${var.bootstrap_args}"
#  }

  step {
   action_on_failure = "TERMINATE_CLUSTER"
   name   = "Setup Hadoop Debugging"
   hadoop_jar_step {
     jar  = "command-runner.jar"
     args = ["state-pusher-script"]
    }
  }

  lifecycle {
    ignore_changes = ["step"]
    #create_before_destroy = true
  }

  log_uri      = "${var.log_uri}"
  service_role = "${aws_iam_role.emr_service_role.name}"
  ##service_role = "${aws_iam_role.iam_emr_service_role.name}"

  visible_to_all_users = true
  autoscaling_role = "${aws_iam_role.emr_autoscale_role.name}"

  tags {
    Name        = "${var.name}"
    Project     = "${var.project}"
    Environment = "${var.environment}"
  }
}

ПЛАН ВЫХОДА

------------------------------------------------------------------------

An execution plan has been generated and is shown below.
Resource actions are indicated with the following symbols:
  ~ update in-place
-/+ destroy and then create replacement

Terraform will perform the following actions:

-/+ module.emr.aws_emr_cluster.cluster (new resource required)
      id:                                                 "j-1QKCMWBZM0F7M" => <computed> (forces new resource)
      applications.#:                                     "4" => "4"
      applications.1557323817:                            "Spark" => "Spark"
      applications.2430401924:                            "Ganglia" => "Ganglia"
      applications.3006569794:                            "Hadoop" => "Hadoop"
      applications.3652851695:                            "Zeppelin" => "Zeppelin"
      autoscaling_role:                                   "Stagingautoscaling_role" => "Stagingautoscaling_role"
      cluster_state:                                      "WAITING" => <computed>
      configurations:                                     "[\n  {\n    \"Classification\": \"core-site\",\n    \"Properties\": {\n      \"hadoop.security.groups.cache.secs\": \"250\"\n    }\n  },\n  {\n    \"Classification\": \"mapred-site\",\n    \"Properties\": {\n      \"mapred.tasktracker.map.tasks.maximum\": \"2\",\n      \"mapreduce.map.sort.spill.percent\": \"0.90\",\n      \"mapreduce.tasktracker.reduce.tasks.maximum\": \"5\"\n    }\n  }\n]\n" => "[\n  {\n    \"Classification\": \"core-site\",\n    \"Properties\": {\n      \"hadoop.security.groups.cache.secs\": \"250\"\n    }\n  },\n  {\n    \"Classification\": \"mapred-site\",\n    \"Properties\": {\n      \"mapred.tasktracker.map.tasks.maximum\": \"2\",\n      \"mapreduce.map.sort.spill.percent\": \"0.90\",\n      \"mapreduce.tasktracker.reduce.tasks.maximum\": \"5\"\n    }\n  }\n]\n"
      core_instance_type:                                 "m3.xlarge" => <computed>
      ec2_attributes.#:                                   "1" => "1"
      ec2_attributes.0.emr_managed_master_security_group: "sg-97cf9efa" => "sg-97cf9efa"
      ec2_attributes.0.emr_managed_slave_security_group:  "sg-20f1a04d" => "sg-20f1a04d"
      ec2_attributes.0.instance_profile:                  "StagingJobFlowInstanceProfile" => "StagingJobFlowInstanceProfile"
      ec2_attributes.0.key_name:                          "jumpbox" => "jumpbox"
      ec2_attributes.0.service_access_security_group:     "sg-94ce9ff9" => "sg-94ce9ff9"
      ec2_attributes.0.subnet_id:                         "subnet-3a3db747" => "subnet-3a3db747"
      instance_group.#:                                   "3" => "3"
      instance_group.1995334535.autoscaling_policy:       "" => ""
      instance_group.1995334535.bid_price:                "0.30" => "0.30"
      instance_group.1995334535.ebs_config.#:             "0" => "0"
      instance_group.1995334535.instance_count:           "1" => "1"
      instance_group.1995334535.instance_role:            "CORE" => "CORE"
      instance_group.1995334535.instance_type:            "m3.xlarge" => "m3.xlarge"
      instance_group.1995334535.name:                     "CoreInstanceGroup" => "CoreInstanceGroup"
      instance_group.255882833.autoscaling_policy:        "" => ""
      instance_group.255882833.bid_price:                 "" => ""
      instance_group.255882833.ebs_config.#:              "0" => "0"
      instance_group.255882833.instance_count:            "1" => "1"
      instance_group.255882833.instance_role:             "MASTER" => "MASTER"
      instance_group.255882833.instance_type:             "m3.xlarge" => "m3.xlarge"
      instance_group.255882833.name:                      "MasterInstanceGroup" => "MasterInstanceGroup"
      instance_group.3047466039.autoscaling_policy:       "" => "{\"Constraints\":{\"MaxCapacity\":4,\"MinCapacity\":1},\"Rules\":[{\"Action\":{\"SimpleScalingPolicyConfiguration\":{\"AdjustmentType\":\"CHANGE_IN_CAPACITY\",\"CoolDown\":300,\"ScalingAdjustment\":1}},\"Description\":\"Scale out if YARNMemoryAvailablePercentage is less than 15\",\"Name\":\"ScaleOutMemoryPercentage\",\"Trigger\":{\"CloudWatchAlarmDefinition\":{\"ComparisonOperator\":\"LESS_THAN\",\"EvaluationPeriods\":1,\"MetricName\":\"YARNMemoryAvailablePercentage\",\"Namespace\":\"AWS/ElasticMapReduce\",\"Period\":300,\"Statistic\":\"AVERAGE\",\"Threshold\":15,\"Unit\":\"PERCENT\"}}}]}"
      instance_group.3047466039.bid_price:                "" => "0.30"
      instance_group.3047466039.ebs_config.#:             "" => "0"
      instance_group.3047466039.instance_count:           "" => "3"
      instance_group.3047466039.instance_role:            "" => "TASK"
      instance_group.3047466039.instance_type:            "" => "m3.xlarge" (forces new resource)
      instance_group.3047466039.name:                     "" => "CoreInstanceGroup" (forces new resource)
      instance_group.3499703317.autoscaling_policy:       "{\n\"Constraints\": {\n\"MinCapacity\": 1,\n\"MaxCapacity\": 4\n},\n\"Rules\": [\n{\n  \"Name\": \"ScaleOutMemoryPercentage\",\n  \"Description\": \"Scale out if YARNMemoryAvailablePercentage is less than 15\",\n  \"Action\": {\n    \"SimpleScalingPolicyConfiguration\": {\n      \"AdjustmentType\": \"CHANGE_IN_CAPACITY\",\n      \"ScalingAdjustment\": 1,\n      \"CoolDown\": 300\n    }\n  },\n  \"Trigger\": {\n    \"CloudWatchAlarmDefinition\": {\n      \"ComparisonOperator\": \"LESS_THAN\",\n      \"EvaluationPeriods\": 1,\n      \"MetricName\": \"YARNMemoryAvailablePercentage\",\n      \"Namespace\": \"AWS/ElasticMapReduce\",\n      \"Period\": 300,\n      \"Statistic\": \"AVERAGE\",\n      \"Threshold\": 15.0,\n      \"Unit\": \"PERCENT\"\n    }\n  }\n}\n]\n}\n" => ""
      instance_group.3499703317.bid_price:                "0.30" => ""
      instance_group.3499703317.ebs_config.#:             "0" => "0"
      instance_group.3499703317.instance_count:           "2" => "0"
      instance_group.3499703317.instance_role:            "TASK" => ""
      instance_group.3499703317.instance_type:            "m3.xlarge" => "" (forces new resource)
      instance_group.3499703317.name:                     "CoreInstanceGroup" => "" (forces new resource)
      keep_job_flow_alive_when_no_steps:                  "" => <computed>
      log_uri:                                            "s3n://handmade-s3-emr/" => "s3n://handmade-s3-emr/"
      master_public_dns:                                  "ip-172-21-60-101.eu-central-1.compute.internal" => <computed>
      name:                                               "testclustersetup" => "testclustersetup"
      release_label:                                      "emr-5.13.0" => "emr-5.13.0"
      scale_down_behavior:                                "TERMINATE_AT_TASK_COMPLETION" => "TERMINATE_AT_TASK_COMPLETION"
      service_role:                                       "emrStagingServiceRole" => "emrStagingServiceRole"
      step.#:                                             "1" => "1"
      step.0.action_on_failure:                           "TERMINATE_CLUSTER" => "TERMINATE_CLUSTER"
      step.0.hadoop_jar_step.#:                           "1" => "1"
      step.0.hadoop_jar_step.0.args.#:                    "1" => "1"
      step.0.hadoop_jar_step.0.args.0:                    "state-pusher-script" => "state-pusher-script"
      step.0.hadoop_jar_step.0.jar:                       "command-runner.jar" => "command-runner.jar"
      step.0.name:                                        "Setup Hadoop Debugging" => "Setup Hadoop Debugging"
      tags.%:                                             "3" => "3"
      tags.Environment:                                   "Staging" => "Staging"
      tags.Name:                                          "testclustersetup" => "testclustersetup"
      tags.Project:                                       "Test" => "Test"
      termination_protection:                             "" => <computed>
      visible_to_all_users:                               "true" => "true"

  ~ module.emr.aws_security_group.emr_service_access
      egress.#:                                           "2" => "1"
      egress.1243700508.cidr_blocks.#:                    "0" => "0"
      egress.1243700508.description:                      "" => ""
      egress.1243700508.from_port:                        "8443" => "0"
      egress.1243700508.ipv6_cidr_blocks.#:               "0" => "0"
      egress.1243700508.prefix_list_ids.#:                "0" => "0"
      egress.1243700508.protocol:                         "tcp" => ""
      egress.1243700508.security_groups.#:                "2" => "0"
      egress.1243700508.security_groups.3921408127:       "sg-97cf9efa" => ""
      egress.1243700508.security_groups.933318784:        "sg-20f1a04d" => ""
      egress.1243700508.self:                             "false" => "false"
      egress.1243700508.to_port:                          "8443" => "0"
      egress.482069346.cidr_blocks.#:                     "1" => "1"
      egress.482069346.cidr_blocks.0:                     "0.0.0.0/0" => "0.0.0.0/0"
      egress.482069346.description:                       "" => ""
      egress.482069346.from_port:                         "0" => "0"
      egress.482069346.ipv6_cidr_blocks.#:                "0" => "0"
      egress.482069346.prefix_list_ids.#:                 "0" => "0"
      egress.482069346.protocol:                          "-1" => "-1"
      egress.482069346.security_groups.#:                 "0" => "0"
      egress.482069346.self:                              "false" => "false"
      egress.482069346.to_port:                           "0" => "0"

  ~ module.emr.aws_vpc_endpoint.s3
      policy:                                             "{\"Statement\":[{\"Action\":\"*\",\"Effect\":\"Allow\",\"Principal\":\"*\",\"Resource\":\"*\"}],\"Version\":\"2008-10-17\"}" => "{\"Statement\":[{\"Action\":\"*\",\"Effect\":\"Allow\",\"Principal\":\"*\",\"Resource\":\"*\"}]}"


Plan: 1 to add, 2 to change, 1 to destroy.

Ответы [ 2 ]

0 голосов
/ 03 мая 2018

Это проблема с Terraform. Возможно, вы недавно изменили отступ в своем коде Terraform, который Terraform распознает как изменение. Например,

instance_group.3047466039.instance_type: "" => "m3.xlarge" (инициирует новый ресурс)

Я уверен, что вы не меняете тип экземпляра. Чтобы это исправить, перейдите в github, скопируйте код terraform (когда он работал нормально) из предыдущей копии файла из History и замените его в текущем ресурсе в terraform. Затем сохраните, зарегистрируйте и повторно запустите программу Terraform / Apply. Он должен работать.

0 голосов
/ 03 мая 2018

Если вы посмотрите на документацию по EMR с использованием облачной информации здесь , вы увидите, что для модификации CoreInstanceGroup требуется замена, которую вы можете прочитать подробнее о здесь . Хотя это не документация Terraform, если облачная информация требует замены, как и Terraform.

Cloudformation создаст новый ресурс перед удалением старого. Использование жизненного цикла create_before_destroy может помочь минимизировать время простоя.

...