Aws

Help! We’ve ran into a DockerHub rate limit!

About

Yes, it is still happining. In 2025! Here you will find:

Podman Dockerhub Mirror Configuration

~/.config/containers/registries.conf.d/dockerhub-mirror.conf:

[[registry]]                                                                                              
prefix = "docker.io"                                                      
insecure = false                                                                              
blocked = false
location = "public.ecr.aws/docker"  

[[registry.mirror]]
location = "mirror.gcr.io"

[[registry.mirror]]
location = "gitlab.com/acme-org/dependency_proxy/containers"

[[registry.mirror]]
location = "registry-1.docker.io"                                                              

[[registry.mirror]]
location = "123456789012.dkr.ecr.us-east-1.amazonaws.com/docker-io"

I hope you are using ecr-login for your ECR registries ;)

export REGISTRY_AUTH_FILE=$HOME/.config/containers/auth.json
{
  "auths": {
    "docker.io": {
      "auth": "eGw4ZGVwXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXem40VQ=="
    },
    "gitlab.com": {
      "auth": "cmVXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSYQ=="
    },
    "registry.gitlab.com": {
      "auth": "cmVXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSYQ=="
    }
  },
  "credHelpers": {
    "*": "",
    "123456789012.dkr.ecr.us-east-1.amazonaws.com": "ecr-login",
    "345678901234.dkr.ecr.us-east-1.amazonaws.com": "ecr-login"
  }
}

K8s Quickfix: Rewriting Existing K8s Resources


$ cd $(mktemp -d)

$ (
  kubectl get pods --field-selector=status.phase=Pending -A -ojson | jq -c '.items[]';
  kubectl get deployments -ojson -A | jq -c '.items[]';
  kubectl get replicasets -ojson -A | jq -c '.items[]';
  kubectl get daemonsets -ojson -A | jq -c '.items[]';
) > /tmp/cluster.jsonl

$ cat /tmp/cluster.jsonl \
  | jq -r '
    def parse_into_parts:
      . as $i
      |capture(
        "^((?<host>[a-zA-Z0-9-]+\\.[a-zA-Z0-9.-]+)/)?"  
        + "(:(?<port>[0-9]+))?"
        + "((?<path>[a-zA-Z0-9-._/]+)/)?"
        + "(?<image>[a-zA-Z0-9-._]+)"
        + "((:(?<tag>[a-z0-9_.-]+))|(@(?<digest>sha256:[a-z0-9]+)))?$"
      ) // error("couldnt parse \($i)");

    def qualify_oci_image:
      if (.host==null) then .host="docker.io" end
      |if (.path==null and .host=="docker.io") then .path="library" end
      # |if (.tag==null and .digest==null) then .tag="latest" end
      ;

    def glue_parts:
      [
        if (.host) then .host else "" end,
        if (.port) then ":\(.port)" else "" end,
        if (.host) then "/" else "" end,
        if (.path) then "\(.path)/" else "" end,
        .image,
        if (.digest) then "@\(.digest)" elif (.tag) then ":\(.tag)" else "" end
      ]|join("")
      ;

    def fix_oci_image:
      . as $i
      |parse_into_parts
      |qualify_oci_image
      |if (.path=="bitnami") then .path="bitnamilegacy" else . end
      |if (.host=="docker.io") then (.host="123456780123.dkr.ecr.us-east-1.amazonaws.com"|.path="docker-io/\(.path)") else . end
      |glue_parts;
    
    [
      ..|objects|(.initContainers[]?,.containers[]?)
      |(.image|fix_oci_image) as $newImage
      |select(.image!=$newImage)
      |"\(.name)=\($newImage)"
    ] as $p
    |select($p|length > 0)
    |"kubectl set image \(.kind) -n \(.metadata.namespace) \(.metadata.name) \($p|join(" "))"
    

Permanent Mirror Configuration for containerd

(
	# patch /etc/containerd/config.toml for automatically picking dockerhub mirror

	containerd_config_version="$(grep -oP '^\s*version\s*=\s*\K\d+' /etc/containerd/config.toml)"
	p=""
	case "$containerd_config_version" in
		2) p="io.containerd.grpc.v1.cri";;
		3) p="io.containerd.cri.v1.images";;
		*) echo "unsupported"; return;;
	esac
	cat <<-EOM >> /etc/containerd/config.d/dockerhub-mirrors.toml
[plugins]

  [plugins."$p".registry]

    [plugins."$p".registry.mirrors]

      [plugins."$p".registry.mirrors."docker.io"]
        endpoint = [
          "public.ecr.aws/docker",
          "mirror.gcr.io",
          "gitlab.com/acme-org/dependency_proxy/containers",
          "123456789012.dkr.ecr.us-east-1.amazonaws.com/docker-io",
          "docker.io",
        ]

    [plugins."$p".registry.configs]
      [plugins."io.containerd.grpc.v1.cri".registry.configs."gitlab.com".auth]
      	# https://gitlab.com/groups/acme-org/-/settings/access_tokens?page=1
        username = "dependency-proxy"
        password = "glpat-XXXXXXXXXXXXXXXXXXXX"

      [plugins."$p".registry.configs."docker.io".auth]
        username = "acme-org"
        password = "dckr_pat_3Xi_XXXXXXXXXXXXXXXXXXXXXXX"
        auth = "dckr_pat_3Xi_XXXXXXXXXXXXXXXXXXXXXXX"
EOM		
	fi
)

if ! containerd config dump 1>/dev/null; then
   echo "exiting since containerd config is bad" >&2
   exit 1
fi

How to get AWS-CLI v2 down from 127M to 67M

Follow these steps:

 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
FROM alpine:3.12

# 1. Install glibc compatibility for Alpine
RUN apk --no-cache add binutils \
    && echo "Getting libc libraries" \
    && curl -sL https://alpine-pkgs.sgerrand.com/sgerrand.rsa.pub -o /etc/apk/keys/sgerrand.rsa.pub \
    && curl -sLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VER}/glibc-${GLIBC_VER}.apk \
    && curl -sLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VER}/glibc-bin-${GLIBC_VER}.apk \
    && echo "Installing libc libraries" \
    && apk add --no-cache \
        glibc-${GLIBC_VER}.apk \
        glibc-bin-${GLIBC_VER}.apk

# 2. Install rush parallel runner (temporary)
# This is used for optimizing the botocore data later.
RUN echo "Installing rush parallel runner (temporary)" \
    && curl -sSfL https://github.com/shenwei356/rush/releases/download/v0.4.2/rush_linux_amd64.tar.gz -o - \
    | tar -C /tmp/ -zxf -

# 3. Install AWS CLI v2
RUN echo "Installing AWS CLI" \
    && curl -sL https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip -o awscliv2.zip \
    && unzip awscliv2.zip \
        -x 'aws/dist/awscli/examples/**' 'aws/dist/docutils/**' \
    && aws/install

# 4. Clean and Slim the Installation
RUN echo "Cleaning and slimming AWS CLI installation" \
    # Remove installation files and unnecessary components
    && rm -rvf \
        awscliv2.zip \
        aws \
        /usr/local/aws-cli/v2/*/dist/aws_completer \
        /usr/local/aws-cli/v2/*/dist/awscli/data/ac.index \
        /usr/local/aws-cli/v2/*/dist/awscli/examples \
    # Remove example JSON files from botocore data
    && find /usr/local/aws-cli/v2/current/dist/botocore/data/ -name "*.json" -name "examples*" -exec rm {} \; \
    # Optimize remaining JSON files by removing 'documentation' field using jq/rush
    && find /usr/local/aws-cli/v2/current/dist/botocore/data/ -name "*.json" \
        | /tmp/rush 'echo "optimizing {}"; jq -cer "del(.. | .documentation?)" "{}" > "{}.tmp" && mv "{}.tmp" "{}"'

# 5. Final Cleanup
RUN echo "Removing artifacts" \
    && rm /tmp/rush \
    && apk --no-cache del \
        binutils \
    && rm glibc-${GLIBC_VER}.apk \
    && rm glibc-bin-${GLIBC_VER}.apk \
    && rm -rf /var/cache/apk/*

Notes

I do think there can be much more improved in botocore (like compressing assets) or just rewritting AWS CLI to Golang.

AWS CLI V2 is generally available since February 2020. BUT…

The AWS CLI V2 is distributed as a binary package, but this new distribution method comes with a few caveats.

It is distributed as a binary package (built on Python with PyInstaller that has bundled native libs) BUT…

Anyway, they have some nice features now, and it seems they packed aws-shell right into the CLI: https://aws.amazon.com/blogs/developer/aws-cli-v2-is-now-generally-available/ https://www.youtube.com/watch?v=U5y7JI_mHk8

Infojunk November 2018

This is a collection of interesting links and resources I came across in November 2018, covering topics such as security, Linux, AWS, and development.

Hacking / MITM-API-Testing

Linux

Windows

Python

KataCode

Spectre/Meltdown

Project

Security

Tools

AWS

Development

Other

Infojunk October 2018

This is a collection of interesting links and resources I came across in October 2018, covering a wide range of topics including browser extensions, collaborative coding, Linux, AWS, and more.

Browser Extensions

Collaborative Coding

Focusing on IDEs. Web-based solutions are mostly ignored.

Linux

NodeJS

DevOps

AI/MachineLearning

AWS

JmesPath is not as powerful as jq, but Amazon AWS probably chose it since it might be faster and the query-selectors are a bit more sophisticated (?).

AWS S3 Sync is Not Reliable and Slow!

This article explores reliability issues with AWS CLI’s S3 sync functionality and provides alternative solutions for better file synchronization.

While migrating from s3cmd to AWS S3 CLI, I noticed that files don’t sync properly when using AWS CLI.

I tested with different versions and they all revealed the same behavior:

Test Setup

  1. Setup AWS CLI utility and configure your credentials
  2. Create a testing S3 bucket
  3. Setup some random files
# Create 10 random files of 10MB each
for i in {1..10}; do dd if=/dev/urandom of=multi/part-$i.out bs=1MB count=10; done;
# Then copy the first 5 files over
mkdir multi-changed
cp -r multi/part-{1,2,3,4,5}.out multi-changed
# And replace the content in 5 files
for i in {6..10}; do dd if=/dev/urandom of=multi-changed/part-$i.out bs=1MB count=10; done;

Testing S3 Sync with AWS CLI

Cleanup

$ aws s3 rm s3://testbucket/multi --recursive 

Initial Sync

$ aws s3 sync multi s3://testbucket/multi
upload: multi/part-1.out to s3://testbucket/multi/part-1.out         
upload: multi/part-3.out to s3://testbucket/multi/part-3.out      
upload: multi/part-2.out to s3://testbucket/multi/part-2.out      
upload: multi/part-4.out to s3://testbucket/multi/part-4.out      
upload: multi/part-10.out to s3://testbucket/multi/part-10.out    
upload: multi/part-5.out to s3://testbucket/multi/part-5.out      
upload: multi/part-6.out to s3://testbucket/multi/part-6.out      
upload: multi/part-8.out to s3://testbucket/multi/part-8.out      
upload: multi/part-7.out to s3://testbucket/multi/part-7.out      
upload: multi/part-9.out to s3://testbucket/multi/part-9.out  

Update Files

Only 5 files should now be uploaded. Timestamps for all 10 files should be changed.

AWS sync is not reliable!

While migrating from s3cmd to the AWS S3 CLI, I noticed that files did not reliably sync when using the AWS CLI.

I tested this behavior with different versions, and they all exhibited the same issue:


Test Setup

  1. Set up the AWS CLI utility and configure your credentials.

  2. Create a testing S3 bucket.

  3. Set up some random files:

    # Create 10 random files of 10MB each
    for i in {1..10}; do dd if=/dev/urandom of=multi/part-$i.out bs=1MB count=10; done;
    # Then copy the first 5 files over
    mkdir multi-changed
    cp -r multi/part-{1,2,3,4,5}.out multi-changed
    # And replace the content in the remaining 5 files (6-10)
    for i in {6..10}; do dd if=/dev/urandom of=multi-changed/part-$i.out bs=1MB count=10; done;
    

Testing S3 sync with AWS CLI

Cleanup

$ aws s3 rm s3://l3testing/multi --recursive

Inital sync

$ aws s3 sync multi s3://l3testing/multi
upload: multi/part-1.out to s3://l3testing/multi/part-1.out       
upload: multi/part-3.out to s3://l3testing/multi/part-3.out     
upload: multi/part-2.out to s3://l3testing/multi/part-2.out     
upload: multi/part-4.out to s3://l3testing/multi/part-4.out     
upload: multi/part-10.out to s3://l3testing/multi/part-10.out   
upload: multi/part-5.out to s3://l3testing/multi/part-5.out     
upload: multi/part-6.out to s3://l3testing/multi/part-6.out     
upload: multi/part-8.out to s3://l3testing/multi/part-8.out     
upload: multi/part-7.out to s3://l3testing/multi/part-7.out     
upload: multi/part-9.out to s3://l3testing/multi/part-9.out

Update files

Only 5 files should now be uploaded. Timestamps for all 10 files should be changed.