AWS Lambda теряет соединение с SQS в VPC с использованием NAT-шлюза с EIP через ~ 12 часов - PullRequest
0 голосов
/ 10 мая 2018

Я настроил VPC, включающий 3 подсети, 1 интернет-шлюз, 1 NAT-шлюз, 1 RDS-кластер, SQS-очереди и еще несколько вещей (полная конфигурация приведена ниже).

При развертывании все работает нормально. Мои облачные функции могут без проблем получать доступ к RDS, SQS и публиковать в SNS. Однако через пару часов (думаю, 12 часов) все перестает работать!

После этого отключения вызов той же лямбды, которая работала несколько часов назад, приводит к таймауту. Это тот же результат, который вы получаете при удалении шлюза NAT (поскольку Lambda больше не будет иметь доступа к SQS или SNS в VPC).

Вот мой шаблон CloudFormation / Serverless:

ServerlessVPC:
  Type: AWS::EC2::VPC
  Properties:
    CidrBlock: '10.0.0.0/16'
    EnableDnsSupport: true
    EnableDnsHostnames: true
    Tags:
      - Key: Name
        Value: pulse-${self:provider.stage}

ElasticIP:
  Type: AWS::EC2::EIP
  DependsOn:
    - AttachInternetGateway
  Properties:
    Domain: vpc

InternetGateway:
  Type: AWS::EC2::InternetGateway
  Properties:
    Tags:
      - Key: Name
        Value: pulse-${self:provider.stage}

NATGateway:
  Type: AWS::EC2::NatGateway
  Properties:
    AllocationId:
      Fn::GetAtt: 'ElasticIP.AllocationId'
    SubnetId:
      Ref: SubnetAPublic
    Tags:
      - Key: Name
        Value: pulse-${self:provider.stage}

AttachInternetGateway:
  Type: AWS::EC2::VPCGatewayAttachment
  DependsOn:
    - InternetGateway
    - ServerlessVPC
  Properties:
    VpcId:
      Ref: ServerlessVPC
    InternetGatewayId:
      Ref: InternetGateway

RouteTableEC2Bastion:
  Type: AWS::EC2::RouteTable
  Properties:
    VpcId:
      Ref: ServerlessVPC
    Tags:
      - Key: Name
        Value: pulse-${self:provider.stage}-bastion

RouteTableLambda:
  Type: AWS::EC2::RouteTable
  Properties:
    VpcId:
      Ref: ServerlessVPC
    Tags:
      - Key: Name
        Value: pulse-${self:provider.stage}-lambda

InternetRoute:
  Type: AWS::EC2::Route
  DependsOn:
    - RouteTableEC2Bastion
    - InternetGateway
  Properties:
    RouteTableId:
      Ref: RouteTableEC2Bastion
    DestinationCidrBlock: '0.0.0.0/0'
    GatewayId:
      Ref: InternetGateway

NATRoute:
  Type: AWS::EC2::Route
  DependsOn:
    - RouteTableLambda
    - InternetGateway
  Properties:
    RouteTableId:
      Ref: RouteTableLambda
    DestinationCidrBlock: '0.0.0.0/0'
    NatGatewayId:
      Ref: NATGateway

PublicSubnetRouteTableAssociation:
  Type: AWS::EC2::SubnetRouteTableAssociation
  DependsOn:
    - SubnetAPublic
    - RouteTableEC2Bastion
  Properties:
    SubnetId:
      Ref: SubnetAPublic
    RouteTableId:
      Ref: RouteTableEC2Bastion

LambdaSubnetRouteTableAssociationA:
  Type: AWS::EC2::SubnetRouteTableAssociation
  DependsOn:
    - SubnetBPrivate
    - RouteTableLambda
  Properties:
    SubnetId:
      Ref: SubnetBPrivate
    RouteTableId:
      Ref: RouteTableLambda

LambdaSubnetRouteTableAssociationB:
  Type: AWS::EC2::SubnetRouteTableAssociation
  DependsOn:
    - SubnetCPrivate
    - RouteTableLambda
  Properties:
    SubnetId:
      Ref: SubnetCPrivate
    RouteTableId:
      Ref: RouteTableLambda

SubnetAPublic:
  DependsOn: ServerlessVPC
  Type: AWS::EC2::Subnet
  Properties:
    VpcId:
      Ref: ServerlessVPC
    AvailabilityZone: ${self:provider.region}a
    CidrBlock: '10.0.0.0/24'
    Tags:
      - Key: Name
        Value: pulse-${self:provider.stage}-publicA

SubnetBPrivate:
  DependsOn: ServerlessVPC
  Type: AWS::EC2::Subnet
  Properties:
    VpcId:
      Ref: ServerlessVPC
    AvailabilityZone: ${self:provider.region}b
    CidrBlock: '10.0.1.0/24'
    Tags:
      - Key: Name
        Value: pulse-${self:provider.stage}-privateB

SubnetCPrivate:
  DependsOn: ServerlessVPC
  Type: AWS::EC2::Subnet
  Properties:
    VpcId:
      Ref: ServerlessVPC
    AvailabilityZone: ${self:provider.region}c
    CidrBlock: '10.0.2.0/24'
    Tags:
      - Key: Name
        Value: pulse-${self:provider.stage}-privateC

BastionServer:
  Type: AWS::EC2::Instance
  Properties:
    ImageId: ami-3bfab942
    InstanceType: t2.nano
    KeyName: remente-pulse-${self:provider.stage}-bastion
    NetworkInterfaces:
      - AssociatePublicIpAddress: true
        DeleteOnTermination: true
        DeviceIndex: 0
        GroupSet:
          - Ref: BastionSecurityGroup
        SubnetId:
          Ref: SubnetAPublic
    Tags:
      - Key: Name
        Value: pulse-${self:provider.stage}-bastion

ServerlessSecurityGroup:
  DependsOn: ServerlessVPC
  Type: AWS::EC2::SecurityGroup
  Properties:
    GroupDescription: Serverless Functions security group
    VpcId:
      Ref: ServerlessVPC

BastionSecurityGroup:
  DependsOn: ServerlessVPC
  Type: AWS::EC2::SecurityGroup
  Properties:
    GroupDescription: Bastion EC2 instance security group
    VpcId:
      Ref: ServerlessVPC
    SecurityGroupIngress:
      - IpProtocol: tcp
        FromPort: 22
        ToPort: 22
        CidrIp: '0.0.0.0/0'

RDSSecurityGroup:
  DependsOn: ServerlessVPC
  Type: AWS::EC2::SecurityGroup
  Properties:
    GroupDescription: Ingress for RDS Instance
    VpcId:
      Ref: ServerlessVPC
    SecurityGroupIngress:
      - IpProtocol: tcp
        FromPort: 3306
        ToPort: 3306
        Description: Lambda access
        SourceSecurityGroupId:
          Ref: ServerlessSecurityGroup
      - IpProtocol: tcp
        FromPort: 3306
        ToPort: 3306
        Description: Bastion access
        SourceSecurityGroupId:
          Ref: BastionSecurityGroup

ServerlessRDSSubnetGroup:
  Type: AWS::RDS::DBSubnetGroup
  Properties:
    DBSubnetGroupDescription: RDS Subnet Group
    SubnetIds:
    - Ref: SubnetBPrivate
    - Ref: SubnetCPrivate

ServerlessRDSCluster:
  DependsOn:
    - ServerlessVPC
    - RDSSecurityGroup
  Type: AWS::RDS::DBInstance
  Properties:
    Engine: ${self:custom.dbClient}
    DBName: ${self:custom.dbName}
    MasterUsername: ${self:custom.dbUser}
    MasterUserPassword: ${self:custom.dbPassword}
    DBInstanceClass: ${file(./serverless.env.yml):${self:provider.stage}.db.instance}
    AllocatedStorage: ${file(./serverless.env.yml):${self:provider.stage}.db.storage}
    DBSubnetGroupName:
      Ref: ServerlessRDSSubnetGroup
    VPCSecurityGroups:
      - Ref: RDSSecurityGroup

# -----------------------------------------------------------------
# Users & Groups

PublishTopicUser:
  Type: AWS::IAM::User

PublishTopicGroup:
  Type: AWS::IAM::Group
  Properties:
    Policies:
      - PolicyName: ${self:custom.iamSurveySend}
        PolicyDocument:
          Version: '2012-10-17'
          Statement:
            - Effect: Allow
              Resource:
                Ref: SendSurveyTopic
              Action:
                - sns:Publish

AddUserToPublishTopicGroup:
  Type: AWS::IAM::UserToGroupAddition
  Properties:
    GroupName:
      Ref: PublishTopicGroup
    Users:
      - Ref: PublishTopicUser

QueuePolicy:
  Type: AWS::SQS::QueuePolicy
  Properties:
    Queues:
      - Ref: SendSurveyEmailQueue
      - Ref: SendSurveySMSQueue
    PolicyDocument:
      Id: sqs-policy
      Version: '2012-10-17'
      Statement:
        - Sid: Allow-SendMessage-to-queues-from-SNS-Topic
          Effect: Allow
          Principal: '*'
          Resource: '*'
          Action:
            - sqs:SendMessage
          Condition:
            ArnEquals:
              aws:SourceArn:
                Ref: SendSurveyTopic

# -----------------------------------------------------------------
# SNS Topics & SQS Queues

SendSurveyTopic:
  Type: AWS::SNS::Topic
  Properties:
    TopicName: ${self:custom.snsSendSurveyTopic}
    Subscription:
      - Protocol: sqs
        Endpoint:
          Fn::GetAtt: [SendSurveyEmailQueue, Arn]
      - Protocol: sqs
        Endpoint:
          Fn::GetAtt: [SendSurveySMSQueue, Arn]

SendSurveyEmailQueue:
  Type: AWS::SQS::Queue
  Properties:
    QueueName: ${self:custom.sqsSurveySendEmail}
    MessageRetentionPeriod: 300
    VisibilityTimeout: 60
    RedrivePolicy:
      deadLetterTargetArn:
        Fn::GetAtt: [SendSurveyEmailQueueDead, Arn]
      maxReceiveCount: 10

SendSurveyEmailQueueDead:
  Type: AWS::SQS::Queue
  Properties:
    QueueName: ${self:custom.sqsSurveySendEmail}-dead
    MessageRetentionPeriod: 300

SendSurveySMSQueue:
  Type: AWS::SQS::Queue
  Properties:
    QueueName: ${self:custom.sqsSurveySendSMS}
    MessageRetentionPeriod: 300
    VisibilityTimeout: 60
    RedrivePolicy:
      deadLetterTargetArn:
        Fn::GetAtt: [SendSurveyEmailQueueDead, Arn]
      maxReceiveCount: 10

SendSurveySMSQueueDead:
  Type: AWS::SQS::Queue
  Properties:
    QueueName: ${self:custom.sqsSurveySendSMS}-dead
    MessageRetentionPeriod: 300

Мое единственное предположение, что это как-то связано с Elastic IP, но у меня действительно нет идей, как избежать этого разъединения.

...