From 519554013c476659771ad808256b1b4a044fbf7c Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Wed, 22 Dec 2021 22:36:51 -0800 Subject: [PATCH 1/2] Add custom www-gitlab-com image with patched gsutil rsync command Currently the `gsutil rsync` command does not support the `-z` or `-Z` options available in `gsutil cp` to compress files locally via gzip before uploading (https://github.com/GoogleCloudPlatform/gsutil/issues/579). As https://cloud.google.com/storage/docs/gsutil/commands/cp states: When you specify the -z option, the data from your files is compressed before it is uploaded, but your actual files are left uncompressed on the local disk. The uploaded objects retain the Content-Type and name of the original files, but have their Content-Encoding metadata set to gzip to indicate that the object data stored are compressed on the Cloud Storage servers and have their Cache-Control metadata set to no-transform. about.gitlab.com is currently serving uncompressed HTML files because `Cache-Control: max-age=0` is set (see https://gitlab.com/gitlab-com/www-gitlab-com/-/merge_requests/87045), and Fastly has a custom rule to skip HTML files and therefore won't cache them. This patches the `rsync.py` using https://github.com/GoogleCloudPlatform/gsutil/pull/1430 to support these command-line options so local gzip compression can be performed. Relates to https://gitlab.com/gitlab-com/gl-infra/infrastructure/-/issues/14852 --- .gitlab/ci/custom.images.yml | 2 + Dockerfile.www-gitlab-com-3.0-patched-gsutil | 18 ++++ patches/gsutil/rsync_gzip.patch | 100 +++++++++++++++++++ scripts/install-www-gitlab-com | 12 ++- 4 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 Dockerfile.www-gitlab-com-3.0-patched-gsutil create mode 100644 patches/gsutil/rsync_gzip.patch diff --git a/.gitlab/ci/custom.images.yml b/.gitlab/ci/custom.images.yml index f3b7f49..94ec58f 100644 --- a/.gitlab/ci/custom.images.yml +++ b/.gitlab/ci/custom.images.yml @@ -49,6 +49,7 @@ release-tools test: *test_build sitespeed-gitlab test: *test_build ubi-release test: *test_build www-gitlab-com-3.0 test: *test_build +www-gitlab-com-3.0-patched-gsutil test: *test_build build-git: *test_build # Used by GitLab: https://gitlab.com/gitlab-org/gitlab/-/blob/13-8-stable-ee/lib/gitlab/ci/templates/Terraform.gitlab-ci.yml terraform test: *test_build @@ -94,6 +95,7 @@ release-tools push: *build_and_deploy sitespeed-gitlab push: *build_and_deploy ubi-release push: *build_and_deploy www-gitlab-com-3.0 push: *build_and_deploy +www-gitlab-com-3.0-patched-gsutil push: *build_and_deploy build-git push: *build_and_deploy # Used by GitLab: https://gitlab.com/gitlab-org/gitlab/-/blob/13-8-stable-ee/lib/gitlab/ci/templates/Terraform.gitlab-ci.yml terraform push: *build_and_deploy diff --git a/Dockerfile.www-gitlab-com-3.0-patched-gsutil b/Dockerfile.www-gitlab-com-3.0-patched-gsutil new file mode 100644 index 0000000..26749ba --- /dev/null +++ b/Dockerfile.www-gitlab-com-3.0-patched-gsutil @@ -0,0 +1,18 @@ +FROM gcr.io/google.com/cloudsdktool/cloud-sdk as gcloud-sdk + +FROM ruby:3.0.3-slim-buster + +# Install Google Cloud SDK for deploys via rsync +COPY --from=gcloud-sdk /usr/lib/google-cloud-sdk /usr/lib/google-cloud-sdk +COPY --from=gcloud-sdk /usr/share/google-cloud-sdk /usr/share/google-cloud-sdk +RUN cd /usr/bin && find ../lib/google-cloud-sdk/bin -type f -executable -exec ln -s {} \;; cd - + +ADD /scripts/ /scripts/ +ADD /patches /patches/ +RUN /scripts/install-www-gitlab-com + +# Set UTF-8 http://jaredmarkell.com/docker-and-locales/ +# Must be set after install-essentials is run +ENV LANG C.UTF-8 +ENV LANGUAGE C +ENV LC_ALL C.UTF-8 diff --git a/patches/gsutil/rsync_gzip.patch b/patches/gsutil/rsync_gzip.patch new file mode 100644 index 0000000..d5a27a5 --- /dev/null +++ b/patches/gsutil/rsync_gzip.patch @@ -0,0 +1,100 @@ +diff --git a/gslib/commands/rsync.py b/gslib/commands/rsync.py +index 734a8dbf0..0897d74a5 100644 +--- a/gslib/commands/rsync.py ++++ b/gslib/commands/rsync.py +@@ -527,6 +527,58 @@ + use ^ as an escape character instead of \\ and escape the | + character. When using Windows PowerShell, use ' instead of " + and surround the | character with ". ++ ++ -z Applies gzip content-encoding to any file upload whose ++ extension matches the ``-z`` extension list. This is useful when ++ uploading files with compressible content such as .js, .css, ++ or .html files, because it reduces network bandwidth and storage ++ sizes. This can both improve performance and reduce costs. ++ ++ When you specify the ``-z`` option, the data from your files is ++ compressed before it is uploaded, but your actual files are ++ left uncompressed on the local disk. The uploaded objects ++ retain the ``Content-Type`` and name of the original files, but ++ have their ``Content-Encoding`` metadata set to ``gzip`` to ++ indicate that the object data stored are compressed on the ++ Cloud Storage servers and have their ``Cache-Control`` metadata ++ set to ``no-transform``. ++ ++ For example, suppose the directory ``cattypes`` contains the ++ following files: ++ ++ - ``cattypes.html`` ++ - ``tabby.jpeg` ++ ++ The following command: ++ ++ gsutil rsync -z html cattypes/ gs://mycats ++ ++ does the following: ++ ++ - The ``rsync`` command uploads the directory `cattypes` ++ to the bucket ``gs://mycats``. ++ - Based on the file extensions, gsutil sets the ``Content-Type`` ++ of ``cattypes.html`` to ``text/html`` and ``tabby.jpeg`` to ++ ``image/jpeg``. ++ - The ``-z`` option compresses the data in the file ``cattypes.html``. ++ - The ``-z`` option also sets the ``Content-Encoding`` for ++ ``cattypes.html`` to ``gzip`` and the ``Cache-Control`` for ++ ``cattypes.html`` to ``no-transform``. ++ ++ Because the ``-z/-Z`` options compress data prior to upload, they ++ are not subject to the same compression buffer bottleneck that ++ can affect the ``-j/-J`` options. ++ ++ Note that if you download an object with ``Content-Encoding:gzip``, ++ gsutil decompresses the content before writing the local file. ++ ++ -Z Applies gzip content-encoding to file uploads. This option ++ works like the ``-z`` option described above, but it applies to ++ all uploaded files, regardless of extension. ++ ++ CAUTION: If some of the source files don't compress well, such ++ as binary data, using this option may result in files taking up ++ more space in the cloud than they would if left uncompressed. + """) + # pylint: enable=anomalous-backslash-in-string + +@@ -1580,7 +1632,7 @@ class RsyncCommand(Command): + usage_synopsis=_SYNOPSIS, + min_args=2, + max_args=2, +- supported_sub_args='a:cCdenpPriRuUx:j:J', ++ supported_sub_args='a:cCdenpPriRuUxz:Zj:J', + file_url_ok=True, + provider_url_ok=False, + urls_start_arg=0, +@@ -1732,6 +1784,7 @@ def _ParseOpts(self): + # The gzip_encoded flag marks if the files should be compressed during + # the upload. + gzip_encoded = False ++ gzip_local = False + gzip_arg_exts = None + gzip_arg_all = None + if self.sub_opts: +@@ -1779,10 +1832,19 @@ def _ParseOpts(self): + self.exclude_pattern = re.compile(a) + except re.error: + raise CommandException('Invalid exclude filter (%s)' % a) ++ elif o == '-z': ++ gzip_local = True ++ gzip_arg_exts = [x.strip() for x in a.split(',')] ++ elif o == '-Z': ++ gzip_local = True ++ gzip_arg_all = GZIP_ALL_FILES + + if self.preserve_acl and canned_acl: + raise CommandException( + 'Specifying both the -p and -a options together is invalid.') ++ if gzip_encoded and gzip_local: ++ raise CommandException( ++ 'Specifying both the -j/-J and -z/-Z options together is invalid.') + if gzip_arg_exts and gzip_arg_all: + raise CommandException( + 'Specifying both the -j and -J options together is invalid.') diff --git a/scripts/install-www-gitlab-com b/scripts/install-www-gitlab-com index 3d1d55a..3b55226 100755 --- a/scripts/install-www-gitlab-com +++ b/scripts/install-www-gitlab-com @@ -13,7 +13,8 @@ apt-get install -yq --no-install-recommends \ rsync git-core \ ed file curl gnupg2 \ unzip \ - python3 python3-pip python3-crcmod python-minimal + python3 python3-pip python3-crcmod python-minimal \ + patch # Install Imagemagick for cropping the pictures on the team page apt-get install -yq --no-install-recommends imagemagick @@ -35,6 +36,15 @@ curl -O -J -L https://gitlab-ci-multi-runner-downloads.s3.amazonaws.com/latest/b mv gitlab-ci-multi-runner-linux-amd64 /usr/bin/gitlab-runner-helper chmod +x /usr/bin/gitlab-runner-helper +# Patch gsutil to support gzip compression with rsync command: +# https://github.com/GoogleCloudPlatform/gsutil/pull/1430 +if [[ -d "/patches/gsutil" ]]; then + for i in /patches/gsutil/*.patch; do + echo "$i..." + patch -d /usr/lib/google-cloud-sdk/platform/gsutil -p1 -i "$i" + done +fi + # Set UTF-8 echo "C.UTF-8 UTF-8" > /etc/locale.gen locale-gen From 0e7a55eb8af6ee1be611a8d04335c164c65f7e83 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Wed, 5 Jan 2022 08:35:44 -0800 Subject: [PATCH 2/2] Fold patched www-gitlab-com-3.0.patched-gsutil into www-gitlab-com-3.0 This eliminates an extra build. --- .gitlab/ci/custom.images.yml | 2 -- Dockerfile.www-gitlab-com-3.0 | 7 ++++--- Dockerfile.www-gitlab-com-3.0-patched-gsutil | 18 ------------------ 3 files changed, 4 insertions(+), 23 deletions(-) delete mode 100644 Dockerfile.www-gitlab-com-3.0-patched-gsutil diff --git a/.gitlab/ci/custom.images.yml b/.gitlab/ci/custom.images.yml index 94ec58f..f3b7f49 100644 --- a/.gitlab/ci/custom.images.yml +++ b/.gitlab/ci/custom.images.yml @@ -49,7 +49,6 @@ release-tools test: *test_build sitespeed-gitlab test: *test_build ubi-release test: *test_build www-gitlab-com-3.0 test: *test_build -www-gitlab-com-3.0-patched-gsutil test: *test_build build-git: *test_build # Used by GitLab: https://gitlab.com/gitlab-org/gitlab/-/blob/13-8-stable-ee/lib/gitlab/ci/templates/Terraform.gitlab-ci.yml terraform test: *test_build @@ -95,7 +94,6 @@ release-tools push: *build_and_deploy sitespeed-gitlab push: *build_and_deploy ubi-release push: *build_and_deploy www-gitlab-com-3.0 push: *build_and_deploy -www-gitlab-com-3.0-patched-gsutil push: *build_and_deploy build-git push: *build_and_deploy # Used by GitLab: https://gitlab.com/gitlab-org/gitlab/-/blob/13-8-stable-ee/lib/gitlab/ci/templates/Terraform.gitlab-ci.yml terraform push: *build_and_deploy diff --git a/Dockerfile.www-gitlab-com-3.0 b/Dockerfile.www-gitlab-com-3.0 index 2f82bd4..26749ba 100644 --- a/Dockerfile.www-gitlab-com-3.0 +++ b/Dockerfile.www-gitlab-com-3.0 @@ -2,14 +2,15 @@ FROM gcr.io/google.com/cloudsdktool/cloud-sdk as gcloud-sdk FROM ruby:3.0.3-slim-buster -ADD /scripts/ /scripts/ -RUN /scripts/install-www-gitlab-com - # Install Google Cloud SDK for deploys via rsync COPY --from=gcloud-sdk /usr/lib/google-cloud-sdk /usr/lib/google-cloud-sdk COPY --from=gcloud-sdk /usr/share/google-cloud-sdk /usr/share/google-cloud-sdk RUN cd /usr/bin && find ../lib/google-cloud-sdk/bin -type f -executable -exec ln -s {} \;; cd - +ADD /scripts/ /scripts/ +ADD /patches /patches/ +RUN /scripts/install-www-gitlab-com + # Set UTF-8 http://jaredmarkell.com/docker-and-locales/ # Must be set after install-essentials is run ENV LANG C.UTF-8 diff --git a/Dockerfile.www-gitlab-com-3.0-patched-gsutil b/Dockerfile.www-gitlab-com-3.0-patched-gsutil deleted file mode 100644 index 26749ba..0000000 --- a/Dockerfile.www-gitlab-com-3.0-patched-gsutil +++ /dev/null @@ -1,18 +0,0 @@ -FROM gcr.io/google.com/cloudsdktool/cloud-sdk as gcloud-sdk - -FROM ruby:3.0.3-slim-buster - -# Install Google Cloud SDK for deploys via rsync -COPY --from=gcloud-sdk /usr/lib/google-cloud-sdk /usr/lib/google-cloud-sdk -COPY --from=gcloud-sdk /usr/share/google-cloud-sdk /usr/share/google-cloud-sdk -RUN cd /usr/bin && find ../lib/google-cloud-sdk/bin -type f -executable -exec ln -s {} \;; cd - - -ADD /scripts/ /scripts/ -ADD /patches /patches/ -RUN /scripts/install-www-gitlab-com - -# Set UTF-8 http://jaredmarkell.com/docker-and-locales/ -# Must be set after install-essentials is run -ENV LANG C.UTF-8 -ENV LANGUAGE C -ENV LC_ALL C.UTF-8