mirror of
https://ops.gitlab.net/gitlab-org/gitlab-build-images.git
synced 2025-12-09 10:02:56 +01:00
Add custom www-gitlab-com image with patched gsutil rsync command
Currently the `gsutil rsync` command does not support the `-z` or `-Z` options available in `gsutil cp` to compress files locally via gzip before uploading (https://github.com/GoogleCloudPlatform/gsutil/issues/579). As https://cloud.google.com/storage/docs/gsutil/commands/cp states: When you specify the -z option, the data from your files is compressed before it is uploaded, but your actual files are left uncompressed on the local disk. The uploaded objects retain the Content-Type and name of the original files, but have their Content-Encoding metadata set to gzip to indicate that the object data stored are compressed on the Cloud Storage servers and have their Cache-Control metadata set to no-transform. about.gitlab.com is currently serving uncompressed HTML files because `Cache-Control: max-age=0` is set (see https://gitlab.com/gitlab-com/www-gitlab-com/-/merge_requests/87045), and Fastly has a custom rule to skip HTML files and therefore won't cache them. This patches the `rsync.py` using https://github.com/GoogleCloudPlatform/gsutil/pull/1430 to support these command-line options so local gzip compression can be performed. Relates to https://gitlab.com/gitlab-com/gl-infra/infrastructure/-/issues/14852
This commit is contained in:
parent
58a310f6ce
commit
519554013c
4 changed files with 131 additions and 1 deletions
100
patches/gsutil/rsync_gzip.patch
Normal file
100
patches/gsutil/rsync_gzip.patch
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
diff --git a/gslib/commands/rsync.py b/gslib/commands/rsync.py
|
||||
index 734a8dbf0..0897d74a5 100644
|
||||
--- a/gslib/commands/rsync.py
|
||||
+++ b/gslib/commands/rsync.py
|
||||
@@ -527,6 +527,58 @@
|
||||
use ^ as an escape character instead of \\ and escape the |
|
||||
character. When using Windows PowerShell, use ' instead of "
|
||||
and surround the | character with ".
|
||||
+
|
||||
+ -z <ext,...> Applies gzip content-encoding to any file upload whose
|
||||
+ extension matches the ``-z`` extension list. This is useful when
|
||||
+ uploading files with compressible content such as .js, .css,
|
||||
+ or .html files, because it reduces network bandwidth and storage
|
||||
+ sizes. This can both improve performance and reduce costs.
|
||||
+
|
||||
+ When you specify the ``-z`` option, the data from your files is
|
||||
+ compressed before it is uploaded, but your actual files are
|
||||
+ left uncompressed on the local disk. The uploaded objects
|
||||
+ retain the ``Content-Type`` and name of the original files, but
|
||||
+ have their ``Content-Encoding`` metadata set to ``gzip`` to
|
||||
+ indicate that the object data stored are compressed on the
|
||||
+ Cloud Storage servers and have their ``Cache-Control`` metadata
|
||||
+ set to ``no-transform``.
|
||||
+
|
||||
+ For example, suppose the directory ``cattypes`` contains the
|
||||
+ following files:
|
||||
+
|
||||
+ - ``cattypes.html``
|
||||
+ - ``tabby.jpeg`
|
||||
+
|
||||
+ The following command:
|
||||
+
|
||||
+ gsutil rsync -z html cattypes/ gs://mycats
|
||||
+
|
||||
+ does the following:
|
||||
+
|
||||
+ - The ``rsync`` command uploads the directory `cattypes`
|
||||
+ to the bucket ``gs://mycats``.
|
||||
+ - Based on the file extensions, gsutil sets the ``Content-Type``
|
||||
+ of ``cattypes.html`` to ``text/html`` and ``tabby.jpeg`` to
|
||||
+ ``image/jpeg``.
|
||||
+ - The ``-z`` option compresses the data in the file ``cattypes.html``.
|
||||
+ - The ``-z`` option also sets the ``Content-Encoding`` for
|
||||
+ ``cattypes.html`` to ``gzip`` and the ``Cache-Control`` for
|
||||
+ ``cattypes.html`` to ``no-transform``.
|
||||
+
|
||||
+ Because the ``-z/-Z`` options compress data prior to upload, they
|
||||
+ are not subject to the same compression buffer bottleneck that
|
||||
+ can affect the ``-j/-J`` options.
|
||||
+
|
||||
+ Note that if you download an object with ``Content-Encoding:gzip``,
|
||||
+ gsutil decompresses the content before writing the local file.
|
||||
+
|
||||
+ -Z Applies gzip content-encoding to file uploads. This option
|
||||
+ works like the ``-z`` option described above, but it applies to
|
||||
+ all uploaded files, regardless of extension.
|
||||
+
|
||||
+ CAUTION: If some of the source files don't compress well, such
|
||||
+ as binary data, using this option may result in files taking up
|
||||
+ more space in the cloud than they would if left uncompressed.
|
||||
""")
|
||||
# pylint: enable=anomalous-backslash-in-string
|
||||
|
||||
@@ -1580,7 +1632,7 @@ class RsyncCommand(Command):
|
||||
usage_synopsis=_SYNOPSIS,
|
||||
min_args=2,
|
||||
max_args=2,
|
||||
- supported_sub_args='a:cCdenpPriRuUx:j:J',
|
||||
+ supported_sub_args='a:cCdenpPriRuUxz:Zj:J',
|
||||
file_url_ok=True,
|
||||
provider_url_ok=False,
|
||||
urls_start_arg=0,
|
||||
@@ -1732,6 +1784,7 @@ def _ParseOpts(self):
|
||||
# The gzip_encoded flag marks if the files should be compressed during
|
||||
# the upload.
|
||||
gzip_encoded = False
|
||||
+ gzip_local = False
|
||||
gzip_arg_exts = None
|
||||
gzip_arg_all = None
|
||||
if self.sub_opts:
|
||||
@@ -1779,10 +1832,19 @@ def _ParseOpts(self):
|
||||
self.exclude_pattern = re.compile(a)
|
||||
except re.error:
|
||||
raise CommandException('Invalid exclude filter (%s)' % a)
|
||||
+ elif o == '-z':
|
||||
+ gzip_local = True
|
||||
+ gzip_arg_exts = [x.strip() for x in a.split(',')]
|
||||
+ elif o == '-Z':
|
||||
+ gzip_local = True
|
||||
+ gzip_arg_all = GZIP_ALL_FILES
|
||||
|
||||
if self.preserve_acl and canned_acl:
|
||||
raise CommandException(
|
||||
'Specifying both the -p and -a options together is invalid.')
|
||||
+ if gzip_encoded and gzip_local:
|
||||
+ raise CommandException(
|
||||
+ 'Specifying both the -j/-J and -z/-Z options together is invalid.')
|
||||
if gzip_arg_exts and gzip_arg_all:
|
||||
raise CommandException(
|
||||
'Specifying both the -j and -J options together is invalid.')
|
||||
Loading…
Add table
Add a link
Reference in a new issue