freundcloud

Amazon S3 (Simple Storage Service)

Overview

Amazon S3 is an object storage service offering industry-leading scalability, data availability, security, and performance. As of May 2025, S3 is one of the most fundamental AWS services, allowing you to store and retrieve any amount of data from anywhere on the web.

S3 is designed with 99.999999999% (11 9’s) of durability and can scale to handle practically unlimited storage needs.

Key Concepts

Core Components

  • Buckets: Containers for storing objects
  • Objects: Files and any associated metadata stored in S3
  • Keys: Unique identifiers for objects within a bucket
  • Regions: Geographic locations where S3 buckets are physically stored
  • Endpoints: URLs through which S3 is accessed

Storage Classes

  • Standard: General-purpose storage for frequently accessed data
  • Intelligent-Tiering: Automatic cost optimization for data with unknown or changing access patterns
  • Standard-IA: For less frequently accessed data with rapid access when needed
  • One Zone-IA: For less frequently accessed data that doesn’t require multi-AZ resilience
  • Glacier Instant Retrieval: For archived data that needs immediate access
  • Glacier Flexible Retrieval: Low-cost storage for archived data with retrieval times from minutes to hours
  • Glacier Deep Archive: Lowest-cost storage for long-term data archiving with retrieval times of hours
  • Outposts: Store S3 data on-premises using AWS Outposts

Deploying S3 with Terraform

Basic S3 Bucket

provider "aws" {
  region = "us-west-2"
}

resource "aws_s3_bucket" "example_bucket" {
  bucket = "my-example-bucket-unique-name-2025"
  
  tags = {
    Name        = "My Example Bucket"
    Environment = "Dev"
  }
}

# Configure bucket versioning
resource "aws_s3_bucket_versioning" "versioning_example" {
  bucket = aws_s3_bucket.example_bucket.id
  versioning_configuration {
    status = "Enabled"
  }
}

# Configure bucket server-side encryption
resource "aws_s3_bucket_server_side_encryption_configuration" "example" {
  bucket = aws_s3_bucket.example_bucket.id

  rule {
    apply_server_side_encryption_by_default {
      sse_algorithm = "AES256"
    }
  }
}

# Block public access
resource "aws_s3_bucket_public_access_block" "example" {
  bucket = aws_s3_bucket.example_bucket.id

  block_public_acls       = true
  block_public_policy     = true
  ignore_public_acls      = true
  restrict_public_buckets = true
}

# Add a bucket policy
resource "aws_s3_bucket_policy" "allow_access_from_specific_vpc_endpoint" {
  bucket = aws_s3_bucket.example_bucket.id
  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Sid       = "AllowAccessFromVPCEndpoint"
        Effect    = "Allow"
        Principal = "*"
        Action    = "s3:*"
        Resource = [
          aws_s3_bucket.example_bucket.arn,
          "${aws_s3_bucket.example_bucket.arn}/*",
        ]
        Condition = {
          StringEquals = {
            "aws:SourceVpce": "vpce-12345678"
          }
        }
      }
    ]
  })
}

S3 Website Hosting

resource "aws_s3_bucket" "website_bucket" {
  bucket = "my-static-website-bucket-2025"
  
  tags = {
    Name        = "Static Website Bucket"
    Environment = "Production"
  }
}

# Enable website hosting
resource "aws_s3_bucket_website_configuration" "website_config" {
  bucket = aws_s3_bucket.website_bucket.id

  index_document {
    suffix = "index.html"
  }

  error_document {
    key = "error.html"
  }

  routing_rule {
    condition {
      key_prefix_equals = "docs/"
    }
    redirect {
      replace_key_prefix_with = "documents/"
    }
  }
}

# Create bucket policy to allow public read access
resource "aws_s3_bucket_policy" "allow_public_read" {
  bucket = aws_s3_bucket.website_bucket.id
  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Sid       = "PublicReadGetObject"
        Effect    = "Allow"
        Principal = "*"
        Action    = "s3:GetObject"
        Resource = [
          aws_s3_bucket.website_bucket.arn,
          "${aws_s3_bucket.website_bucket.arn}/*",
        ]
      }
    ]
  })
}

# Disable blocking of public access for website functionality
resource "aws_s3_bucket_public_access_block" "website_public_access" {
  bucket = aws_s3_bucket.website_bucket.id

  block_public_acls       = false
  block_public_policy     = false
  ignore_public_acls      = false
  restrict_public_buckets = false
}

# Upload website files
resource "aws_s3_object" "index_html" {
  bucket       = aws_s3_bucket.website_bucket.id
  key          = "index.html"
  source       = "${path.module}/website/index.html"
  content_type = "text/html"
  etag         = filemd5("${path.module}/website/index.html")
}

resource "aws_s3_object" "error_html" {
  bucket       = aws_s3_bucket.website_bucket.id
  key          = "error.html"
  source       = "${path.module}/website/error.html"
  content_type = "text/html"
  etag         = filemd5("${path.module}/website/error.html")
}

output "website_endpoint" {
  value = aws_s3_bucket_website_configuration.website_config.website_endpoint
}

S3 Replication

# Source bucket in us-west-2
resource "aws_s3_bucket" "source" {
  provider = aws.us-west-2
  bucket   = "source-bucket-us-west-2-2025"
}

# Enable versioning on source bucket
resource "aws_s3_bucket_versioning" "source_versioning" {
  provider = aws.us-west-2
  bucket   = aws_s3_bucket.source.id
  
  versioning_configuration {
    status = "Enabled"
  }
}

# Destination bucket in us-east-1
resource "aws_s3_bucket" "destination" {
  provider = aws.us-east-1
  bucket   = "destination-bucket-us-east-1-2025"
}

# Enable versioning on destination bucket
resource "aws_s3_bucket_versioning" "destination_versioning" {
  provider = aws.us-east-1
  bucket   = aws_s3_bucket.destination.id
  
  versioning_configuration {
    status = "Enabled"
  }
}

# Create IAM role for S3 replication
resource "aws_iam_role" "replication" {
  name = "s3-bucket-replication-role"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Principal = {
          Service = "s3.amazonaws.com"
        }
      }
    ]
  })
}

# Create policy for replication
resource "aws_iam_policy" "replication" {
  name = "s3-bucket-replication-policy"

  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = [
          "s3:GetReplicationConfiguration",
          "s3:ListBucket"
        ]
        Effect = "Allow"
        Resource = [
          aws_s3_bucket.source.arn
        ]
      },
      {
        Action = [
          "s3:GetObjectVersionForReplication",
          "s3:GetObjectVersionAcl",
          "s3:GetObjectVersionTagging"
        ]
        Effect = "Allow"
        Resource = [
          "${aws_s3_bucket.source.arn}/*"
        ]
      },
      {
        Action = [
          "s3:ReplicateObject",
          "s3:ReplicateDelete",
          "s3:ReplicateTags"
        ]
        Effect = "Allow"
        Resource = [
          "${aws_s3_bucket.destination.arn}/*"
        ]
      }
    ]
  })
}

# Attach policy to IAM role
resource "aws_iam_role_policy_attachment" "replication" {
  role       = aws_iam_role.replication.name
  policy_arn = aws_iam_policy.replication.arn
}

# Configure replication
resource "aws_s3_bucket_replication_configuration" "replication" {
  provider = aws.us-west-2
  
  # Must have bucket versioning enabled first
  depends_on = [aws_s3_bucket_versioning.source_versioning]

  role   = aws_iam_role.replication.arn
  bucket = aws_s3_bucket.source.id

  rule {
    id     = "EntireBucketReplication"
    status = "Enabled"

    destination {
      bucket        = aws_s3_bucket.destination.arn
      storage_class = "STANDARD"
    }
  }
}

Using S3 with AWS CLI

Basic S3 Operations

# Create a bucket
aws s3 mb s3://my-bucket-name-2025

# List buckets
aws s3 ls

# Upload a file
aws s3 cp myfile.txt s3://my-bucket-name-2025/

# Upload a directory recursively
aws s3 cp my-directory/ s3://my-bucket-name-2025/my-directory/ --recursive

# Download a file
aws s3 cp s3://my-bucket-name-2025/myfile.txt ./

# Download a directory recursively
aws s3 cp s3://my-bucket-name-2025/my-directory/ ./my-directory/ --recursive

# List objects in a bucket
aws s3 ls s3://my-bucket-name-2025/

# Delete a file
aws s3 rm s3://my-bucket-name-2025/myfile.txt

# Delete a directory recursively
aws s3 rm s3://my-bucket-name-2025/my-directory/ --recursive

# Delete a bucket (must be empty)
aws s3 rb s3://my-bucket-name-2025

# Force delete a bucket and all its contents
aws s3 rb s3://my-bucket-name-2025 --force

Sync Commands

# Sync local directory to S3 bucket
aws s3 sync my-directory/ s3://my-bucket-name-2025/my-directory/

# Sync S3 bucket to local directory
aws s3 sync s3://my-bucket-name-2025/my-directory/ my-directory/

# Sync with deletion (remove files in destination that don't exist in source)
aws s3 sync my-directory/ s3://my-bucket-name-2025/my-directory/ --delete

Managing Bucket Policies

# Get bucket policy
aws s3api get-bucket-policy --bucket my-bucket-name-2025

# Set bucket policy from a JSON file
aws s3api put-bucket-policy --bucket my-bucket-name-2025 --policy file://policy.json

# Delete bucket policy
aws s3api delete-bucket-policy --bucket my-bucket-name-2025

Managing Bucket Lifecycle Rules

# Get bucket lifecycle configuration
aws s3api get-bucket-lifecycle-configuration --bucket my-bucket-name-2025

# Set lifecycle configuration from a JSON file
aws s3api put-bucket-lifecycle-configuration --bucket my-bucket-name-2025 --lifecycle-configuration file://lifecycle.json

Managing Bucket Versioning

# Enable versioning
aws s3api put-bucket-versioning --bucket my-bucket-name-2025 --versioning-configuration Status=Enabled

# Get versioning status
aws s3api get-bucket-versioning --bucket my-bucket-name-2025

# List object versions
aws s3api list-object-versions --bucket my-bucket-name-2025

# Delete a specific version of an object
aws s3api delete-object --bucket my-bucket-name-2025 --key myfile.txt --version-id versionId

Website Configuration

# Configure bucket for static website hosting
aws s3api put-bucket-website --bucket my-bucket-name-2025 --website-configuration '{
  "IndexDocument": {
    "Suffix": "index.html"
  },
  "ErrorDocument": {
    "Key": "error.html"
  }
}'

# Get website configuration
aws s3api get-bucket-website --bucket my-bucket-name-2025

# Delete website configuration
aws s3api delete-bucket-website --bucket my-bucket-name-2025

Access Control

# Block public access to a bucket
aws s3api put-public-access-block \
  --bucket my-bucket-name-2025 \
  --public-access-block-configuration "BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=true,RestrictPublicBuckets=true"

# Get public access block configuration
aws s3api get-public-access-block --bucket my-bucket-name-2025

Advanced Operations

# Create a presigned URL (valid for 1 hour)
aws s3 presign s3://my-bucket-name-2025/myfile.txt --expires-in 3600

# Enable server access logging
aws s3api put-bucket-logging \
  --bucket my-bucket-name-2025 \
  --bucket-logging-status '{
    "LoggingEnabled": {
      "TargetBucket": "log-bucket-2025",
      "TargetPrefix": "my-bucket-logs/"
    }
  }'

# Enable server-side encryption
aws s3api put-bucket-encryption \
  --bucket my-bucket-name-2025 \
  --server-side-encryption-configuration '{
    "Rules": [
      {
        "ApplyServerSideEncryptionByDefault": {
          "SSEAlgorithm": "AES256"
        }
      }
    ]
  }'

Best Practices for S3

  1. Security
    • Use bucket policies and IAM policies to control access
    • Enable default encryption for all buckets
    • Block all public access unless explicitly required
    • Use VPC endpoints to access S3 from within your VPC
    • Enable access logging to track bucket access
  2. Cost Optimization
    • Use lifecycle policies to transition objects to cheaper storage classes
    • Implement Intelligent-Tiering for objects with unknown access patterns
    • Enable requester pays for shared data sets
    • Monitor and set budget alerts for S3 usage
    • Delete incomplete multipart uploads
  3. Performance
    • Use multi-region access points for global distributions
    • Implement Transfer Acceleration for faster uploads
    • Use appropriate naming schemes for high-performance use cases
    • Consider S3 batch operations for large-scale changes
    • Use byte-range fetches for large objects
  4. Data Management
    • Enable versioning for critical data
    • Configure Cross-Region Replication (CRR) for disaster recovery
    • Implement appropriate backup strategies
    • Use S3 Object Lock for WORM (Write Once Read Many) compliance
    • Set up S3 Inventory for asset management
  5. Monitoring and Analytics
    • Enable S3 Storage Lens for visibility across organization
    • Use CloudWatch metrics to monitor bucket operations
    • Configure event notifications for critical operations
    • Apply S3 analytics to optimize storage class transitions
    • Implement tagging for resource allocation tracking