Questions
Build a CI/CD pipeline with CodePipeline that deploys to ECS with blue-green deployments.
The Scenario
Your deployment process is problematic:
Current State:
├── Manual deployments via SSH
├── Deployment time: 45 minutes
├── Rollback: Manual, takes 1+ hour
├── Downtime: 5-10 minutes per deployment
├── Testing: Manual, often skipped
└── Last failed deployment: 3 days recovery
The Challenge
Build a fully automated CI/CD pipeline using AWS CodePipeline, CodeBuild, and CodeDeploy with blue-green deployments to ECS for zero-downtime releases.
A junior engineer might deploy directly to production, skip testing stages, use rolling deployments without health checks, or have no rollback strategy. These approaches cause production outages, release bugs, create extended downtime, and make recovery difficult.
A senior engineer implements multi-stage pipelines with proper testing, uses blue-green deployments for instant rollback, configures deployment health checks, and automates everything from commit to production.
Step 1: Pipeline Architecture
CI/CD Pipeline Architecture:
┌────────────┐ ┌────────────┐ ┌────────────┐ ┌────────────┐
│ Source │───►│ Build │───►│ Test │───►│ Deploy │
│ │ │ │ │ │ │ (Staging) │
│ GitHub │ │ CodeBuild │ │ CodeBuild │ │ ECS │
│ Webhook │ │ + ECR Push │ │ + Reports │ │ │
└────────────┘ └────────────┘ └────────────┘ └─────┬──────┘
│
┌──────▼──────┐
│ Manual │
│ Approval │
└──────┬──────┘
│
┌────────────┐ ┌─────▼──────┐
│ Rollback │◄───│ Deploy │
│ (Auto) │ │ (Prod) │
└────────────┘ │ Blue-Green │
└────────────┘Step 2: CodePipeline Configuration
resource "aws_codepipeline" "main" {
name = "orders-service-pipeline"
role_arn = aws_iam_role.codepipeline.arn
artifact_store {
location = aws_s3_bucket.artifacts.bucket
type = "S3"
encryption_key {
id = aws_kms_key.artifacts.arn
type = "KMS"
}
}
# Source Stage
stage {
name = "Source"
action {
name = "Source"
category = "Source"
owner = "AWS"
provider = "CodeStarSourceConnection"
version = "1"
output_artifacts = ["source_output"]
configuration = {
ConnectionArn = aws_codestarconnections_connection.github.arn
FullRepositoryId = "myorg/orders-service"
BranchName = "main"
}
}
}
# Build Stage
stage {
name = "Build"
action {
name = "Build"
category = "Build"
owner = "AWS"
provider = "CodeBuild"
input_artifacts = ["source_output"]
output_artifacts = ["build_output"]
version = "1"
configuration = {
ProjectName = aws_codebuild_project.build.name
}
}
}
# Test Stage
stage {
name = "Test"
action {
name = "UnitTests"
category = "Build"
owner = "AWS"
provider = "CodeBuild"
input_artifacts = ["source_output"]
output_artifacts = ["test_output"]
version = "1"
run_order = 1
configuration = {
ProjectName = aws_codebuild_project.test.name
}
}
action {
name = "IntegrationTests"
category = "Build"
owner = "AWS"
provider = "CodeBuild"
input_artifacts = ["source_output"]
version = "1"
run_order = 2
configuration = {
ProjectName = aws_codebuild_project.integration_test.name
}
}
}
# Deploy to Staging
stage {
name = "DeployStaging"
action {
name = "Deploy"
category = "Deploy"
owner = "AWS"
provider = "ECS"
input_artifacts = ["build_output"]
version = "1"
configuration = {
ClusterName = aws_ecs_cluster.staging.name
ServiceName = aws_ecs_service.staging.name
FileName = "imagedefinitions.json"
}
}
}
# Manual Approval
stage {
name = "Approval"
action {
name = "ManualApproval"
category = "Approval"
owner = "AWS"
provider = "Manual"
version = "1"
configuration = {
CustomData = "Please review staging deployment before production"
NotificationArn = aws_sns_topic.approvals.arn
ExternalEntityLink = "https://staging.example.com"
}
}
}
# Deploy to Production (Blue-Green)
stage {
name = "DeployProduction"
action {
name = "Deploy"
category = "Deploy"
owner = "AWS"
provider = "CodeDeployToECS"
input_artifacts = ["build_output"]
version = "1"
configuration = {
ApplicationName = aws_codedeploy_app.main.name
DeploymentGroupName = aws_codedeploy_deployment_group.production.deployment_group_name
TaskDefinitionTemplateArtifact = "build_output"
TaskDefinitionTemplatePath = "taskdef.json"
AppSpecTemplateArtifact = "build_output"
AppSpecTemplatePath = "appspec.yaml"
}
}
}
}Step 3: CodeBuild Projects
resource "aws_codebuild_project" "build" {
name = "orders-service-build"
description = "Build Docker image and push to ECR"
service_role = aws_iam_role.codebuild.arn
artifacts {
type = "CODEPIPELINE"
}
environment {
compute_type = "BUILD_GENERAL1_MEDIUM"
image = "aws/codebuild/amazonlinux2-x86_64-standard:4.0"
type = "LINUX_CONTAINER"
image_pull_credentials_type = "CODEBUILD"
privileged_mode = true # Required for Docker builds
environment_variable {
name = "AWS_ACCOUNT_ID"
value = data.aws_caller_identity.current.account_id
}
environment_variable {
name = "ECR_REPO"
value = aws_ecr_repository.main.repository_url
}
environment_variable {
name = "AWS_DEFAULT_REGION"
value = var.region
}
}
source {
type = "CODEPIPELINE"
buildspec = "buildspec.yml"
}
cache {
type = "S3"
location = "${aws_s3_bucket.artifacts.bucket}/cache"
}
logs_config {
cloudwatch_logs {
group_name = "/codebuild/orders-service"
stream_name = "build"
}
}
}
resource "aws_codebuild_project" "test" {
name = "orders-service-test"
description = "Run unit and integration tests"
service_role = aws_iam_role.codebuild.arn
artifacts {
type = "CODEPIPELINE"
}
environment {
compute_type = "BUILD_GENERAL1_MEDIUM"
image = "aws/codebuild/amazonlinux2-x86_64-standard:4.0"
type = "LINUX_CONTAINER"
}
source {
type = "CODEPIPELINE"
buildspec = "buildspec-test.yml"
}
# Test reports
logs_config {
cloudwatch_logs {
group_name = "/codebuild/orders-service"
stream_name = "test"
}
}
}Step 4: Buildspec Files
# buildspec.yml - Build and push Docker image
version: 0.2
env:
variables:
DOCKER_BUILDKIT: "1"
phases:
pre_build:
commands:
- echo Logging in to Amazon ECR...
- aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com
- COMMIT_HASH=$(echo $CODEBUILD_RESOLVED_SOURCE_VERSION | cut -c 1-7)
- IMAGE_TAG=${COMMIT_HASH:=latest}
build:
commands:
- echo Building Docker image...
- docker build -t $ECR_REPO:$IMAGE_TAG -t $ECR_REPO:latest .
post_build:
commands:
- echo Pushing Docker image...
- docker push $ECR_REPO:$IMAGE_TAG
- docker push $ECR_REPO:latest
- echo Writing image definitions...
- printf '[{"name":"api","imageUri":"%s"}]' $ECR_REPO:$IMAGE_TAG > imagedefinitions.json
- echo Writing task definition...
- envsubst < taskdef-template.json > taskdef.json
artifacts:
files:
- imagedefinitions.json
- taskdef.json
- appspec.yaml
cache:
paths:
- '/root/.cache/**/*'# buildspec-test.yml - Run tests
version: 0.2
phases:
install:
runtime-versions:
nodejs: 18
commands:
- npm ci
build:
commands:
- echo Running tests...
- npm run test:coverage
post_build:
commands:
- echo Running linting...
- npm run lint
reports:
jest-reports:
files:
- 'coverage/clover.xml'
file-format: CLOVERXML
junit-reports:
files:
- 'junit.xml'
file-format: JUNITXML
artifacts:
files:
- coverage/**/*
base-directory: '.'Step 5: Blue-Green Deployment with CodeDeploy
resource "aws_codedeploy_app" "main" {
compute_platform = "ECS"
name = "orders-service"
}
resource "aws_codedeploy_deployment_group" "production" {
app_name = aws_codedeploy_app.main.name
deployment_group_name = "production"
deployment_config_name = "CodeDeployDefault.ECSAllAtOnce"
service_role_arn = aws_iam_role.codedeploy.arn
ecs_service {
cluster_name = aws_ecs_cluster.production.name
service_name = aws_ecs_service.production.name
}
deployment_style {
deployment_option = "WITH_TRAFFIC_CONTROL"
deployment_type = "BLUE_GREEN"
}
blue_green_deployment_config {
deployment_ready_option {
action_on_timeout = "CONTINUE_DEPLOYMENT"
wait_time_in_minutes = 5
}
terminate_blue_instances_on_deployment_success {
action = "TERMINATE"
termination_wait_time_in_minutes = 60
}
}
load_balancer_info {
target_group_pair_info {
prod_traffic_route {
listener_arns = [aws_lb_listener.https.arn]
}
test_traffic_route {
listener_arns = [aws_lb_listener.test.arn]
}
target_group {
name = aws_lb_target_group.blue.name
}
target_group {
name = aws_lb_target_group.green.name
}
}
}
auto_rollback_configuration {
enabled = true
events = ["DEPLOYMENT_FAILURE", "DEPLOYMENT_STOP_ON_ALARM"]
}
alarm_configuration {
enabled = true
alarms = [
aws_cloudwatch_metric_alarm.error_rate.alarm_name,
aws_cloudwatch_metric_alarm.latency.alarm_name
]
}
}Step 6: AppSpec for ECS
# appspec.yaml
version: 0.0
Resources:
- TargetService:
Type: AWS::ECS::Service
Properties:
TaskDefinition: <TASK_DEFINITION>
LoadBalancerInfo:
ContainerName: "api"
ContainerPort: 3000
PlatformVersion: "LATEST"
NetworkConfiguration:
AwsvpcConfiguration:
Subnets:
- "subnet-xxx"
- "subnet-yyy"
SecurityGroups:
- "sg-xxx"
AssignPublicIp: "DISABLED"
Hooks:
- BeforeInstall: "LambdaFunctionToValidateBeforeInstall"
- AfterInstall: "LambdaFunctionToValidateAfterInstall"
- BeforeAllowTraffic: "LambdaFunctionToValidateBeforeTraffic"
- AfterAllowTraffic: "LambdaFunctionToValidateAfterTraffic"Step 7: Deployment Hooks (Lambda)
import boto3
import json
codedeploy = boto3.client('codedeploy')
def before_allow_traffic(event, context):
"""Validate deployment before switching traffic."""
deployment_id = event['DeploymentId']
lifecycle_event_hook_execution_id = event['LifecycleEventHookExecutionId']
try:
# Run health checks against green environment
response = requests.get(
'http://internal-green-alb/health',
timeout=10
)
if response.status_code == 200:
# Health check passed
codedeploy.put_lifecycle_event_hook_execution_status(
deploymentId=deployment_id,
lifecycleEventHookExecutionId=lifecycle_event_hook_execution_id,
status='Succeeded'
)
else:
raise Exception(f"Health check failed: {response.status_code}")
except Exception as e:
print(f"Validation failed: {e}")
codedeploy.put_lifecycle_event_hook_execution_status(
deploymentId=deployment_id,
lifecycleEventHookExecutionId=lifecycle_event_hook_execution_id,
status='Failed'
)
def after_allow_traffic(event, context):
"""Validate deployment after switching traffic."""
deployment_id = event['DeploymentId']
lifecycle_event_hook_execution_id = event['LifecycleEventHookExecutionId']
try:
# Run smoke tests against production
run_smoke_tests()
codedeploy.put_lifecycle_event_hook_execution_status(
deploymentId=deployment_id,
lifecycleEventHookExecutionId=lifecycle_event_hook_execution_id,
status='Succeeded'
)
except Exception as e:
print(f"Smoke tests failed: {e}")
# This will trigger automatic rollback
codedeploy.put_lifecycle_event_hook_execution_status(
deploymentId=deployment_id,
lifecycleEventHookExecutionId=lifecycle_event_hook_execution_id,
status='Failed'
)Step 8: Pipeline Notifications
resource "aws_codestarnotifications_notification_rule" "pipeline" {
name = "orders-pipeline-notifications"
detail_type = "FULL"
resource = aws_codepipeline.main.arn
event_type_ids = [
"codepipeline-pipeline-pipeline-execution-started",
"codepipeline-pipeline-pipeline-execution-failed",
"codepipeline-pipeline-pipeline-execution-succeeded",
"codepipeline-pipeline-manual-approval-needed"
]
target {
address = aws_sns_topic.pipeline_notifications.arn
}
}
# Slack integration via Lambda
resource "aws_sns_topic_subscription" "slack" {
topic_arn = aws_sns_topic.pipeline_notifications.arn
protocol = "lambda"
endpoint = aws_lambda_function.slack_notifier.arn
} CI/CD Best Practices
| Stage | Implementation | Purpose |
|---|---|---|
| Source | GitHub with webhooks | Trigger on push |
| Build | Docker multi-stage | Smaller images |
| Test | Unit + Integration | Catch bugs early |
| Staging | Full environment | Validate changes |
| Approval | Manual gate | Human oversight |
| Production | Blue-green | Zero downtime |
| Rollback | Automatic on alarm | Fast recovery |
Practice Question
Why should you use blue-green deployment instead of rolling deployment for production?