diff --git a/conf/nginx.conf.template b/conf/nginx.conf.template index b935943..67c5137 100644 --- a/conf/nginx.conf.template +++ b/conf/nginx.conf.template @@ -764,6 +764,241 @@ http { proxy_pass __S3_ENDPOINT_PROTO__$aws_tgt_bucket/$encoded_key; } + # Return a presigned S3 PUT URL for a single-file upload. + # GET /presign-upload// + # Responds 200 with the presigned URL as plain text so the client can PUT + # the file body directly to S3, bypassing this proxy entirely. + # + location ~ ^/presign-upload/([^/]+/.*[^/])$ { + if ($request_method != GET) { + return 400; + } + + set_by_lua_file $canonical_path /etc/nginx/canonicalize_path.ljbc; + + if ($canonical_path !~ '^[__SUPPORTED_CHARSET__]+$') { + return 400; + } + + if ($canonical_path !~ '^([^/]+\:staging\-([0-9]{10}\.|)[0-9a-f]+\.[^./]+\.[0-9]+(\.[0-9]+|)|\.md_staging)/') { + return 400; + } + + set $signature_mode "PRESIGN_PUT"; + set $redirect_endpoint "__S3_ENDPOINT_PROTO____S3_ENDPOINT_HOST__:__S3_ENDPOINT_PORT__"; + set $aws_access_key ""; + set $aws_tgt_bucket ""; + set $encoded_key ""; + set $presign_query_string ""; + rewrite_by_lua_file /etc/nginx/compute_aws_s3_signature.ljbc; + } + + # Return a presigned S3 PUT URL for one multipart part. + # GET /presign-upload-part//?partNumber=N&uploadId=X + # Responds 200 with the presigned URL as plain text. + # + location ~ ^/presign-upload-part/([^/]+/.*[^/])$ { + if ($request_method != GET) { + return 400; + } + + set_by_lua_file $canonical_path /etc/nginx/canonicalize_path.ljbc; + + if ($canonical_path !~ '^[__SUPPORTED_CHARSET__]+$') { + return 400; + } + + if ($canonical_path !~ '^([^/]+\:staging\-([0-9]{10}\.|)[0-9a-f]+\.[^./]+\.[0-9]+(\.[0-9]+|)|\.md_staging)/') { + return 400; + } + + set $signature_mode "PRESIGN_PART"; + set $redirect_endpoint "__S3_ENDPOINT_PROTO____S3_ENDPOINT_HOST__:__S3_ENDPOINT_PORT__"; + set $aws_access_key ""; + set $aws_tgt_bucket ""; + set $encoded_key ""; + set $presign_query_string ""; + rewrite_by_lua_file /etc/nginx/compute_aws_s3_signature.ljbc; + } + + # Multipart upload: initiate. + # POST /upload-multipart/initiate// + # Proxies POST /?uploads to S3; returns XML with uploadId. + # + location ~ ^/upload-multipart/initiate/([^/]+/.*[^/])$ { + if ($request_method != POST) { + return 400; + } + + set_by_lua_block $canonical_path { + local tmp = ngx.var.request_uri + tmp = tmp:gsub('?.*$', '', 1) + tmp = tmp:gsub('^/[^/]+/[^/]+/', '', 1) + tmp = tmp:gsub('+', '%%2B') + tmp = tmp:gsub('&', '%%26') + tmp = ngx.unescape_uri(tmp) + return tmp + } + + if ($canonical_path !~ '^[__SUPPORTED_CHARSET__]+$') { + return 400; + } + + if ($canonical_path !~ '^([^/]+\:staging\-([0-9]{10}\.|)[0-9a-f]+\.[^./]+\.[0-9]+(\.[0-9]+|)|\.md_staging)/') { + return 400; + } + + set $signature_mode "MULTIPART_INITIATE"; + set $aws_access_key ""; + set $aws_signature ""; + set $x_amz_date ""; + set $x_amz_acl ""; + set $aws_tgt_bucket "${AWS_BUCKET_PREFIX}-staging"; + set $encoded_key ""; + rewrite_by_lua_file /etc/nginx/compute_aws_s3_signature.ljbc; + + proxy_set_header Authorization "AWS $aws_access_key:$aws_signature"; + proxy_set_header x-amz-date $x_amz_date; + proxy_set_header x-amz-acl $x_amz_acl; + proxy_set_header Host $aws_tgt_bucket.__S3_ENDPOINT_HOST__; + proxy_set_header Connection "keep-alive"; + + proxy_pass __S3_ENDPOINT_PROTO__$aws_tgt_bucket/$encoded_key?uploads; + } + + # Multipart upload: upload one part. + # PUT /upload-multipart/part//?partNumber=N&uploadId=X + # Proxies PUT /?partNumber=N&uploadId=X to S3; returns ETag header. + # + location ~ ^/upload-multipart/part/([^/]+/.*[^/])$ { + if ($request_method != PUT) { + return 400; + } + + set_by_lua_block $canonical_path { + local tmp = ngx.var.request_uri + tmp = tmp:gsub('?.*$', '', 1) + tmp = tmp:gsub('^/[^/]+/[^/]+/', '', 1) + tmp = tmp:gsub('+', '%%2B') + tmp = tmp:gsub('&', '%%26') + tmp = ngx.unescape_uri(tmp) + return tmp + } + + if ($canonical_path !~ '^[__SUPPORTED_CHARSET__]+$') { + return 400; + } + + if ($canonical_path !~ '^([^/]+\:staging\-([0-9]{10}\.|)[0-9a-f]+\.[^./]+\.[0-9]+(\.[0-9]+|)|\.md_staging)/') { + return 400; + } + + set $signature_mode "MULTIPART_UPLOAD_PART"; + set $aws_access_key ""; + set $aws_signature ""; + set $x_amz_date ""; + set $x_amz_acl ""; + set $aws_tgt_bucket "${AWS_BUCKET_PREFIX}-staging"; + set $encoded_key ""; + rewrite_by_lua_file /etc/nginx/compute_aws_s3_signature.ljbc; + + proxy_set_header Authorization "AWS $aws_access_key:$aws_signature"; + proxy_set_header x-amz-date $x_amz_date; + proxy_set_header Host $aws_tgt_bucket.__S3_ENDPOINT_HOST__; + proxy_set_header Connection "keep-alive"; + + proxy_pass __S3_ENDPOINT_PROTO__$aws_tgt_bucket/$encoded_key?partNumber=$arg_partNumber&uploadId=$arg_uploadId; + } + + # Multipart upload: complete. + # POST /upload-multipart/complete//?uploadId=X + # Proxies POST /?uploadId=X with XML body (part ETags) to S3. + # + location ~ ^/upload-multipart/complete/([^/]+/.*[^/])$ { + if ($request_method != POST) { + return 400; + } + + set_by_lua_block $canonical_path { + local tmp = ngx.var.request_uri + tmp = tmp:gsub('?.*$', '', 1) + tmp = tmp:gsub('^/[^/]+/[^/]+/', '', 1) + tmp = tmp:gsub('+', '%%2B') + tmp = tmp:gsub('&', '%%26') + tmp = ngx.unescape_uri(tmp) + return tmp + } + + if ($canonical_path !~ '^[__SUPPORTED_CHARSET__]+$') { + return 400; + } + + if ($canonical_path !~ '^([^/]+\:staging\-([0-9]{10}\.|)[0-9a-f]+\.[^./]+\.[0-9]+(\.[0-9]+|)|\.md_staging)/') { + return 400; + } + + set $signature_mode "MULTIPART_COMPLETE"; + set $aws_access_key ""; + set $aws_signature ""; + set $x_amz_date ""; + set $x_amz_acl ""; + set $aws_tgt_bucket "${AWS_BUCKET_PREFIX}-staging"; + set $encoded_key ""; + rewrite_by_lua_file /etc/nginx/compute_aws_s3_signature.ljbc; + + proxy_set_header Authorization "AWS $aws_access_key:$aws_signature"; + proxy_set_header x-amz-date $x_amz_date; + proxy_set_header Host $aws_tgt_bucket.__S3_ENDPOINT_HOST__; + proxy_set_header Connection "keep-alive"; + proxy_set_header Content-Type "application/xml"; + + proxy_pass __S3_ENDPOINT_PROTO__$aws_tgt_bucket/$encoded_key?uploadId=$arg_uploadId; + } + + # Multipart upload: abort. + # DELETE /upload-multipart/abort//?uploadId=X + # Cancels an in-progress multipart upload and frees stored parts. + # + location ~ ^/upload-multipart/abort/([^/]+/.*[^/])$ { + if ($request_method != DELETE) { + return 400; + } + + set_by_lua_block $canonical_path { + local tmp = ngx.var.request_uri + tmp = tmp:gsub('?.*$', '', 1) + tmp = tmp:gsub('^/[^/]+/[^/]+/', '', 1) + tmp = tmp:gsub('+', '%%2B') + tmp = tmp:gsub('&', '%%26') + tmp = ngx.unescape_uri(tmp) + return tmp + } + + if ($canonical_path !~ '^[__SUPPORTED_CHARSET__]+$') { + return 400; + } + + if ($canonical_path !~ '^([^/]+\:staging\-([0-9]{10}\.|)[0-9a-f]+\.[^./]+\.[0-9]+(\.[0-9]+|)|\.md_staging)/') { + return 400; + } + + set $signature_mode "MULTIPART_ABORT"; + set $aws_access_key ""; + set $aws_signature ""; + set $x_amz_date ""; + set $x_amz_acl ""; + set $aws_tgt_bucket "${AWS_BUCKET_PREFIX}-staging"; + set $encoded_key ""; + rewrite_by_lua_file /etc/nginx/compute_aws_s3_signature.ljbc; + + proxy_set_header Authorization "AWS $aws_access_key:$aws_signature"; + proxy_set_header x-amz-date $x_amz_date; + proxy_set_header Host $aws_tgt_bucket.__S3_ENDPOINT_HOST__; + proxy_set_header Connection "keep-alive"; + + proxy_pass __S3_ENDPOINT_PROTO__$aws_tgt_bucket/$encoded_key?uploadId=$arg_uploadId; + } + # Add metadata for a build with the following syntax: # # /github/scality/$(repo)/$(workflow_name)/$(created_at)/$(artifacts_name)?key1=value1&key1=value2&key2=value3 diff --git a/lua/compute_aws_s3_signature.lua b/lua/compute_aws_s3_signature.lua index 7419d9e..0117e27 100644 --- a/lua/compute_aws_s3_signature.lua +++ b/lua/compute_aws_s3_signature.lua @@ -81,10 +81,10 @@ local function empty_if_nil (str) end --- Compute AWS S3 signature. +-- Compute AWS S3 signature with an explicit canonicalized resource. +-- Used by multipart modes that need to append subresource query params. -- -local function compute_S3_signature (canonicalized_amz_headers) - local canonicalized_resource = "/" .. ngx.var.aws_tgt_bucket .. "/" .. ngx.var.encoded_key +local function compute_S3_signature_with_resource (canonicalized_amz_headers, canonicalized_resource) local aws_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') local http_content_md5 = empty_if_nil(ngx.var.http_content_md5) local http_content_type = empty_if_nil(ngx.var.http_content_type) @@ -94,6 +94,14 @@ local function compute_S3_signature (canonicalized_amz_headers) end +-- Compute AWS S3 signature. +-- +local function compute_S3_signature (canonicalized_amz_headers) + local canonicalized_resource = "/" .. ngx.var.aws_tgt_bucket .. "/" .. ngx.var.encoded_key + compute_S3_signature_with_resource(canonicalized_amz_headers, canonicalized_resource) +end + + -- Compute AWS S3 presignature. -- local function compute_S3_presignature (expires) @@ -253,6 +261,122 @@ elseif signature_mode == "PRESIGN" then -- return ngx.redirect(ngx.var.redirect_endpoint .. "/" .. ngx.var.aws_tgt_bucket .. "/" .. ngx.var.encoded_key .. "?" .. ngx.var.presign_query_string, ngx.HTTP_MOVED_TEMPORARILY); +elseif signature_mode == "MULTIPART_INITIATE" then + + -- MULTIPART_INITIATE: POST /{key}?uploads + -- Initiates a multipart upload; S3 returns an uploadId. + -- + ngx.var.encoded_key = get_encoded_key(ngx.var.canonical_path) + if ngx.var.aws_tgt_bucket == "" then + ngx.var.aws_tgt_bucket = aws_bucket_prefix .. "-staging" + end + ngx.var.x_amz_acl = "private" + compute_S3_signature_with_resource( + "x-amz-acl:" .. ngx.var.x_amz_acl .. "\nx-amz-date:" .. ngx.var.x_amz_date, + "/" .. ngx.var.aws_tgt_bucket .. "/" .. ngx.var.encoded_key .. "?uploads" + ) + +elseif signature_mode == "MULTIPART_UPLOAD_PART" then + + -- MULTIPART_UPLOAD_PART: PUT /{key}?partNumber=N&uploadId=X + -- Uploads one chunk; S3 returns an ETag for the part. + -- partNumber < uploadId alphabetically → correct V2 sort order. + -- + ngx.var.encoded_key = get_encoded_key(ngx.var.canonical_path) + if ngx.var.aws_tgt_bucket == "" then + ngx.var.aws_tgt_bucket = aws_bucket_prefix .. "-staging" + end + local part_number = ngx.var.arg_partNumber or "" + local upload_id = ngx.var.arg_uploadId or "" + compute_S3_signature_with_resource( + "x-amz-date:" .. ngx.var.x_amz_date, + "/" .. ngx.var.aws_tgt_bucket .. "/" .. ngx.var.encoded_key .. "?partNumber=" .. part_number .. "&uploadId=" .. upload_id + ) + +elseif signature_mode == "MULTIPART_COMPLETE" then + + -- MULTIPART_COMPLETE: POST /{key}?uploadId=X + -- Sends XML body with part ETags; S3 assembles the final object. + -- + ngx.var.encoded_key = get_encoded_key(ngx.var.canonical_path) + if ngx.var.aws_tgt_bucket == "" then + ngx.var.aws_tgt_bucket = aws_bucket_prefix .. "-staging" + end + local upload_id = ngx.var.arg_uploadId or "" + compute_S3_signature_with_resource( + "x-amz-date:" .. ngx.var.x_amz_date, + "/" .. ngx.var.aws_tgt_bucket .. "/" .. ngx.var.encoded_key .. "?uploadId=" .. upload_id + ) + +elseif signature_mode == "MULTIPART_ABORT" then + + -- MULTIPART_ABORT: DELETE /{key}?uploadId=X + -- Cancels an in-progress multipart upload and frees stored parts. + -- + ngx.var.encoded_key = get_encoded_key(ngx.var.canonical_path) + if ngx.var.aws_tgt_bucket == "" then + ngx.var.aws_tgt_bucket = aws_bucket_prefix .. "-staging" + end + local upload_id = ngx.var.arg_uploadId or "" + compute_S3_signature_with_resource( + "x-amz-date:" .. ngx.var.x_amz_date, + "/" .. ngx.var.aws_tgt_bucket .. "/" .. ngx.var.encoded_key .. "?uploadId=" .. upload_id + ) + +elseif signature_mode == "PRESIGN_PUT" then + + -- PRESIGN_PUT: return a presigned S3 PUT URL for a single-file upload. + -- The client (GitHub Actions runner) GETs this endpoint to obtain a + -- presigned URL, then PUTs the file body directly to S3, bypassing nginx. + -- + ngx.var.encoded_key = get_encoded_key(ngx.var.canonical_path) + local build_tgt = ngx.var.canonical_path:match("^[^/]+") + scan_tgt_buckets(build_tgt) + + local expires = ngx.time() + 3600 + local aws_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') + local canonicalized_resource = "/" .. ngx.var.aws_tgt_bucket .. "/" .. ngx.var.encoded_key + -- Sign as PUT with no Content-MD5 and no Content-Type (presigned, not proxied). + local string_to_sign = "PUT\n\n\n" .. expires .. "\n" .. canonicalized_resource + local aws_signature = ngx.encode_base64(ngx.hmac_sha1(aws_secret_key, string_to_sign)) + local presigned_url = ngx.var.redirect_endpoint .. canonicalized_resource .. "?" .. + ngx.encode_args({AWSAccessKeyId = ngx.var.aws_access_key, Expires = expires, Signature = aws_signature}) + + ngx.header.content_type = "text/plain" + ngx.say(presigned_url) + return ngx.exit(ngx.HTTP_OK) + +elseif signature_mode == "PRESIGN_PART" then + + -- PRESIGN_PART: return a presigned S3 PUT URL for one multipart part. + -- The client GETs ?partNumber=N&uploadId=X to obtain a presigned URL, + -- then PUTs the part body directly to S3. + -- + ngx.var.encoded_key = get_encoded_key(ngx.var.canonical_path) + if ngx.var.aws_tgt_bucket == "" then + ngx.var.aws_tgt_bucket = aws_bucket_prefix .. "-staging" + end + + local part_number = ngx.var.arg_partNumber or "" + local upload_id = ngx.var.arg_uploadId or "" + local expires = ngx.time() + 3600 + local aws_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') + -- subresources must appear in canonical resource (alphabetical: partNumber < uploadId) + local canonicalized_resource = "/" .. ngx.var.aws_tgt_bucket .. "/" .. ngx.var.encoded_key .. + "?partNumber=" .. part_number .. "&uploadId=" .. upload_id + local string_to_sign = "PUT\n\n\n" .. expires .. "\n" .. canonicalized_resource + local aws_signature = ngx.encode_base64(ngx.hmac_sha1(aws_secret_key, string_to_sign)) + local presigned_url = ngx.var.redirect_endpoint .. "/" .. ngx.var.aws_tgt_bucket .. "/" .. ngx.var.encoded_key .. + "?partNumber=" .. part_number .. + "&uploadId=" .. ngx.escape_uri(upload_id) .. + "&AWSAccessKeyId=" .. ngx.escape_uri(ngx.var.aws_access_key) .. + "&Expires=" .. expires .. + "&Signature=" .. ngx.escape_uri(aws_signature) + + ngx.header.content_type = "text/plain" + ngx.say(presigned_url) + return ngx.exit(ngx.HTTP_OK) + else -- diff --git a/lua/github_access.lua b/lua/github_access.lua index bbd6098..beaf5b6 100644 --- a/lua/github_access.lua +++ b/lua/github_access.lua @@ -8,7 +8,7 @@ local github_api_company = os.getenv('GITHUB_API_COMPANY') local env_github_restriction_upload = os.getenv('GITHUB_USER_ALLOWED_UPLOAD') local github_auth_cache_dir = "/data/nginx/artifacts_github_auth_cache" local github_restriction_users = {} -local github_restriction_paths = { "/upload/", "/copy/", "/version/", "/add_metadata/" } +local github_restriction_paths = { "/upload/", "/upload-multipart/", "/copy/", "/version/", "/add_metadata/" } local bot_username = os.getenv('BOT_USERNAME') local bot_token = os.getenv('BOT_TOKEN') local local_bot_creds_enabled = false diff --git a/tests/end2end/test_multipart_upload.py b/tests/end2end/test_multipart_upload.py new file mode 100644 index 0000000..304b13c --- /dev/null +++ b/tests/end2end/test_multipart_upload.py @@ -0,0 +1,156 @@ +"""Tests for the multipart upload endpoints.""" + +import hashlib +import re +import xml.etree.ElementTree as ET + +import pytest + +from constants import STAGING_BUILD + + +def _initiate(session, artifacts_url, build, path): + """POST /upload-multipart/initiate/{build}/{path} → uploadId.""" + resp = session.post( + f'{artifacts_url}/upload-multipart/initiate/{build}/{path}', + headers={'Content-Length': '0'}, + ) + assert resp.status_code == 200, f'initiate failed: {resp.status_code} {resp.text}' + match = re.search(r'([^<]+)', resp.text) + assert match, f'No UploadId in initiate response: {resp.text}' + return match.group(1) + + +def _upload_part(session, artifacts_url, build, path, upload_id, part_number, data): + """PUT /upload-multipart/part/{build}/{path}?partNumber=N&uploadId=X → ETag.""" + resp = session.put( + f'{artifacts_url}/upload-multipart/part/{build}/{path}', + params={'partNumber': part_number, 'uploadId': upload_id}, + data=data, + headers={'Content-Length': str(len(data))}, + ) + assert resp.status_code == 200, f'part {part_number} failed: {resp.status_code} {resp.text}' + etag = resp.headers.get('ETag', '') + assert etag, f'No ETag returned for part {part_number}' + return etag + + +def _complete(session, artifacts_url, build, path, upload_id, parts): + """POST /upload-multipart/complete/{build}/{path}?uploadId=X with XML body.""" + xml_parts = ''.join( + f'{pn}{etag}' + for pn, etag in sorted(parts) + ) + xml_body = f'{xml_parts}' + resp = session.post( + f'{artifacts_url}/upload-multipart/complete/{build}/{path}', + params={'uploadId': upload_id}, + data=xml_body.encode(), + headers={'Content-Type': 'application/xml'}, + ) + assert resp.status_code == 200, f'complete failed: {resp.status_code} {resp.text}' + return resp + + +def _abort(session, artifacts_url, build, path, upload_id): + """DELETE /upload-multipart/abort/{build}/{path}?uploadId=X.""" + return session.delete( + f'{artifacts_url}/upload-multipart/abort/{build}/{path}', + params={'uploadId': upload_id}, + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +def test_multipart_upload_single_part(session, artifacts_url): + """A single-part multipart upload round-trips correctly.""" + build = STAGING_BUILD + path = 'multipart/single.bin' + data = b'hello multipart world' + + upload_id = _initiate(session, artifacts_url, build, path) + etag = _upload_part(session, artifacts_url, build, path, upload_id, 1, data) + _complete(session, artifacts_url, build, path, upload_id, [(1, etag)]) + + dl = session.get( + f'{artifacts_url}/download/{build}/{path}', + headers={'ForceCacheUpdate': 'yes'}, + ) + assert dl.status_code == 200 + assert dl.content == data + + +def test_multipart_upload_multiple_parts(session, artifacts_url): + """Two-part multipart upload assembles into the correct byte stream.""" + build = STAGING_BUILD + path = 'multipart/two-parts.bin' + part1 = b'A' * (6 * 1024 * 1024) # 6 MB (S3 minimum part size) + part2 = b'B' * (1 * 1024 * 1024) # 1 MB last part + + upload_id = _initiate(session, artifacts_url, build, path) + etag1 = _upload_part(session, artifacts_url, build, path, upload_id, 1, part1) + etag2 = _upload_part(session, artifacts_url, build, path, upload_id, 2, part2) + _complete(session, artifacts_url, build, path, upload_id, [(1, etag1), (2, etag2)]) + + dl = session.get( + f'{artifacts_url}/download/{build}/{path}', + headers={'ForceCacheUpdate': 'yes'}, + ) + assert dl.status_code == 200 + expected = part1 + part2 + assert len(dl.content) == len(expected) + assert hashlib.sha256(dl.content).hexdigest() == hashlib.sha256(expected).hexdigest() + + +def test_multipart_upload_abort(session, artifacts_url): + """Aborting a multipart upload cleans it up; the object is not stored.""" + build = STAGING_BUILD + path = 'multipart/aborted.bin' + data = b'this should never land' + + upload_id = _initiate(session, artifacts_url, build, path) + _upload_part(session, artifacts_url, build, path, upload_id, 1, data) + + abort_resp = _abort(session, artifacts_url, build, path, upload_id) + assert abort_resp.status_code in (200, 204), \ + f'abort returned: {abort_resp.status_code} {abort_resp.text}' + + # Object must not be present after abort + dl = session.get(f'{artifacts_url}/download/{build}/{path}') + assert dl.status_code == 404 + + +def test_multipart_initiate_rejected_for_non_staging(session, artifacts_url): + """Initiating a multipart upload for a non-staging build is rejected.""" + promoted = 'githost:owner:repo:promoted-abc123.rel.1' + resp = session.post( + f'{artifacts_url}/upload-multipart/initiate/{promoted}/file.bin', + headers={'Content-Length': '0'}, + ) + assert resp.status_code == 400 + + +def test_multipart_upload_parts_out_of_order(session, artifacts_url): + """Parts submitted out of order are correctly assembled after complete.""" + build = STAGING_BUILD + path = 'multipart/out-of-order.bin' + # Non-last parts must meet S3's 5 MB minimum size requirement. + part1 = b'A' * (6 * 1024 * 1024) + part2 = b'B' * (1 * 1024 * 1024) # last part may be smaller + + upload_id = _initiate(session, artifacts_url, build, path) + # Upload part 2 before part 1 intentionally + etag2 = _upload_part(session, artifacts_url, build, path, upload_id, 2, part2) + etag1 = _upload_part(session, artifacts_url, build, path, upload_id, 1, part1) + # Complete with parts in correct numerical order in the XML + _complete(session, artifacts_url, build, path, upload_id, [(1, etag1), (2, etag2)]) + + dl = session.get( + f'{artifacts_url}/download/{build}/{path}', + headers={'ForceCacheUpdate': 'yes'}, + ) + assert dl.status_code == 200 + expected = part1 + part2 + assert hashlib.sha256(dl.content).hexdigest() == hashlib.sha256(expected).hexdigest() diff --git a/tests/end2end/test_presign_upload.py b/tests/end2end/test_presign_upload.py new file mode 100644 index 0000000..195cffc --- /dev/null +++ b/tests/end2end/test_presign_upload.py @@ -0,0 +1,186 @@ +"""Tests for the presigned upload endpoints. + +/presign-upload// → presigned S3 PUT URL for a single file +/presign-upload-part// → presigned S3 PUT URL for one multipart part + +The presigned URL is returned as plain text (200). The client then PUTs the +file body (or part body) directly to S3 without going through nginx. +""" + +import hashlib +import re + +import requests + +from constants import STAGING_BUILD, PROMOTED_BUILD + + +# --------------------------------------------------------------------------- +# Helpers shared with multipart flow +# --------------------------------------------------------------------------- + +def _initiate(session, artifacts_url, build, path): + resp = session.post( + f'{artifacts_url}/upload-multipart/initiate/{build}/{path}', + headers={'Content-Length': '0'}, + ) + assert resp.status_code == 200, f'initiate failed: {resp.status_code} {resp.text}' + match = re.search(r'([^<]+)', resp.text) + assert match, f'No UploadId in response: {resp.text}' + return match.group(1) + + +def _complete(session, artifacts_url, build, path, upload_id, parts): + xml_parts = ''.join( + f'{pn}{etag}' + for pn, etag in sorted(parts) + ) + xml_body = f'{xml_parts}' + resp = session.post( + f'{artifacts_url}/upload-multipart/complete/{build}/{path}', + params={'uploadId': upload_id}, + data=xml_body.encode(), + headers={'Content-Type': 'application/xml'}, + ) + assert resp.status_code == 200, f'complete failed: {resp.status_code} {resp.text}' + return resp + + +# --------------------------------------------------------------------------- +# /presign-upload/ — single-file presigned PUT +# --------------------------------------------------------------------------- + +def test_presign_upload_returns_url(session, artifacts_url): + """GET /presign-upload/ returns 200 with a non-empty URL.""" + resp = session.get(f'{artifacts_url}/presign-upload/{STAGING_BUILD}/file.txt') + assert resp.status_code == 200, f'{resp.status_code} {resp.text}' + url = resp.text.strip() + assert url.startswith('http'), f'Expected a URL, got: {url!r}' + + +def test_presign_upload_file_reachable_after_direct_put(session, artifacts_url): + """Presigned URL PUT bypasses nginx; the file is then downloadable.""" + data = b'direct-to-s3 content' + + # 1. Get presigned URL from nginx + presign_resp = session.get( + f'{artifacts_url}/presign-upload/{STAGING_BUILD}/direct.txt' + ) + assert presign_resp.status_code == 200 + s3_url = presign_resp.text.strip() + + # 2. PUT directly to S3 (no nginx auth headers) + put_resp = requests.put( + s3_url, + data=data, + headers={'Content-Length': str(len(data))}, + ) + assert put_resp.status_code == 200, f'S3 PUT failed: {put_resp.status_code} {put_resp.text}' + + # 3. File must be downloadable through nginx + dl = session.get( + f'{artifacts_url}/download/{STAGING_BUILD}/direct.txt', + headers={'ForceCacheUpdate': 'yes'}, + ) + assert dl.status_code == 200 + assert dl.content == data + + +def test_presign_upload_rejects_non_get(session, artifacts_url): + """Only GET is allowed on /presign-upload/; other methods return 400.""" + url = f'{artifacts_url}/presign-upload/{STAGING_BUILD}/file.txt' + assert session.post(url).status_code == 400 + assert session.put(url, data=b'x').status_code == 400 + + +def test_presign_upload_rejects_non_staging_build(session, artifacts_url): + """Non-staging build names are rejected with 400.""" + resp = session.get( + f'{artifacts_url}/presign-upload/{PROMOTED_BUILD}/file.txt' + ) + assert resp.status_code == 400 + + +# --------------------------------------------------------------------------- +# /presign-upload-part/ — presigned multipart part PUT +# --------------------------------------------------------------------------- + +def test_presign_upload_part_returns_url(session, artifacts_url): + """GET /presign-upload-part/ with partNumber+uploadId returns a URL.""" + upload_id = _initiate(session, artifacts_url, STAGING_BUILD, 'presign/part-url.bin') + try: + resp = session.get( + f'{artifacts_url}/presign-upload-part/{STAGING_BUILD}/presign/part-url.bin', + params={'partNumber': 1, 'uploadId': upload_id}, + ) + assert resp.status_code == 200, f'{resp.status_code} {resp.text}' + url = resp.text.strip() + assert url.startswith('http'), f'Expected a URL, got: {url!r}' + finally: + session.delete( + f'{artifacts_url}/upload-multipart/abort/{STAGING_BUILD}/presign/part-url.bin', + params={'uploadId': upload_id}, + ) + + +def test_presign_multipart_full_round_trip(session, artifacts_url): + """Initiate via nginx, upload parts directly to S3, complete via nginx.""" + build = STAGING_BUILD + path = 'presign/multipart.bin' + part1 = b'A' * (6 * 1024 * 1024) # 6 MB (S3 minimum non-last part) + part2 = b'B' * (1 * 1024 * 1024) # 1 MB (last part, may be smaller) + + # 1. Initiate through nginx + upload_id = _initiate(session, artifacts_url, build, path) + + etags = [] + try: + for part_number, data in [(1, part1), (2, part2)]: + # 2. Get presigned part URL from nginx + presign_resp = session.get( + f'{artifacts_url}/presign-upload-part/{build}/{path}', + params={'partNumber': part_number, 'uploadId': upload_id}, + ) + assert presign_resp.status_code == 200, \ + f'presign-part {part_number} failed: {presign_resp.status_code} {presign_resp.text}' + s3_url = presign_resp.text.strip() + + # 3. PUT part directly to S3 + put_resp = requests.put( + s3_url, + data=data, + headers={'Content-Length': str(len(data))}, + ) + assert put_resp.status_code == 200, \ + f'S3 part {part_number} PUT failed: {put_resp.status_code} {put_resp.text}' + etag = put_resp.headers.get('ETag', '') + assert etag, f'No ETag for part {part_number}' + etags.append((part_number, etag)) + except Exception: + session.delete( + f'{artifacts_url}/upload-multipart/abort/{build}/{path}', + params={'uploadId': upload_id}, + ) + raise + + # 4. Complete through nginx + _complete(session, artifacts_url, build, path, upload_id, etags) + + # 5. Verify the assembled object + dl = session.get( + f'{artifacts_url}/download/{build}/{path}', + headers={'ForceCacheUpdate': 'yes'}, + ) + assert dl.status_code == 200 + expected = part1 + part2 + assert len(dl.content) == len(expected) + assert hashlib.sha256(dl.content).hexdigest() == hashlib.sha256(expected).hexdigest() + + +def test_presign_upload_part_rejects_non_staging(session, artifacts_url): + """Non-staging build names are rejected on /presign-upload-part/ too.""" + resp = session.get( + f'{artifacts_url}/presign-upload-part/{PROMOTED_BUILD}/file.bin', + params={'partNumber': 1, 'uploadId': 'fake-id'}, + ) + assert resp.status_code == 400