From b11e6625682ae1f9e405750926ae356b0069f0b5 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Mon, 8 Jun 2026 09:44:17 -0400 Subject: [PATCH 1/4] Run Docker images with reduced privileges Signed-off-by: Juan Cruz Viotti --- Dockerfile | 29 +++++++++++++++++++++++++++++ docker/wrapper-server.sh | 11 +++++++++-- enterprise/Dockerfile | 29 ++++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7fea4186e..f24ad86cb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -52,6 +52,25 @@ RUN ctest --test-dir /build --build-config ${SOURCEMETA_ONE_BUILD_TYPE} \ FROM debian:trixie-slim +# Install `gosu` so the runtime entrypoint can drop from root to the +# unprivileged service account. We deliberately don't set `USER` on +# this image so build-time `RUN` instructions (including those in +# downstream consumer Dockerfiles) keep executing as root, matching +# the well-trodden Postgres/Redis/MySQL pattern. +RUN apt-get --yes update \ + && apt-get install --yes --no-install-recommends gosu \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Create the unprivileged service account that the entrypoint will +# `gosu` into. The UID sits above the typical 1000-range dev user +# (so host-mounted volumes can be mapped unambiguously) and outside +# the Debian system-user range (1-999) reserved for daemons. +ARG SOURCEMETA_ONE_UID=10001 +RUN groupadd --system --gid "${SOURCEMETA_ONE_UID}" sourcemeta \ + && useradd --system --uid "${SOURCEMETA_ONE_UID}" \ + --gid "${SOURCEMETA_ONE_UID}" \ + --no-create-home --shell /usr/sbin/nologin sourcemeta + # See https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys LABEL org.opencontainers.image.url="https://one.sourcemeta.com" LABEL org.opencontainers.image.documentation="https://one.sourcemeta.com" @@ -90,6 +109,16 @@ COPY docker/wrapper-index.sh /usr/bin/sourcemeta COPY docker/wrapper-server.sh /usr/bin/sourcemeta-server COPY docker/transaction-overlayfs.sh /usr/bin/sourcemeta-transaction-overlayfs +# Pre-create the output directory and hand it to the service account +# so the runtime server can read through it (and the optional +# transactional re-index path can write through it) without +# elevation. The chown is non-recursive to keep blast-radius bounded +# if a build-arg override ever pointed the path at the rootfs root. +RUN test -n "${SOURCEMETA_ONE_OUTPUT}" \ + && test "${SOURCEMETA_ONE_OUTPUT}" != "/" \ + && mkdir -p "${SOURCEMETA_ONE_OUTPUT}" \ + && chown sourcemeta:sourcemeta "${SOURCEMETA_ONE_OUTPUT}" + ENV SOURCEMETA_ONE_PORT=8000 HEALTHCHECK --interval=1s --timeout=2s --start-period=1s --retries=10 CMD grep -qE \ "^\s*[0-9]+:\s+[0-9A-F]+:$(printf '%04X' $SOURCEMETA_ONE_PORT)\s+[0-9A-F:]+\s+0A\s" \ diff --git a/docker/wrapper-server.sh b/docker/wrapper-server.sh index 2e954b83d..b7d0c2556 100755 --- a/docker/wrapper-server.sh +++ b/docker/wrapper-server.sh @@ -3,8 +3,15 @@ set -o errexit set -o nounset -# For better shell expansion in the Dockerfile +# For better shell expansion in the Dockerfile. +# +# `gosu` drops privileges from root (the image's default execution +# identity, kept that way so build-time `RUN` instructions in +# consumer Dockerfiles are unsurprising) to the unprivileged service +# account before the long-running server starts. The signal-handling +# behaviour of `gosu` is the same as `exec`, so SIGTERM and friends +# reach the server process directly. -exec /usr/bin/sourcemeta-one-server \ +exec /usr/sbin/gosu sourcemeta /usr/bin/sourcemeta-one-server \ "$SOURCEMETA_ONE_OUTPUT" \ "$SOURCEMETA_ONE_PORT" diff --git a/enterprise/Dockerfile b/enterprise/Dockerfile index 42dfbea93..d0fde36e6 100644 --- a/enterprise/Dockerfile +++ b/enterprise/Dockerfile @@ -76,11 +76,28 @@ RUN mkdir -p /usr/share/sourcemeta/one \ FROM debian:trixie-slim +# `gosu` lets the runtime entrypoint drop from root to the +# unprivileged service account. We deliberately don't set `USER` on +# this image so build-time `RUN` instructions (including those in +# downstream consumer Dockerfiles) keep executing as root, matching +# the well-trodden Postgres/Redis/MySQL pattern. RUN apt-get --yes update && apt-get install --yes --no-install-recommends \ - openssl-provider-fips \ + gosu openssl-provider-fips \ && apt-get clean && rm -rf /var/lib/apt/lists/* COPY --from=builder /etc/ssl/openssl.cnf /etc/ssl/openssl.cnf +# Create the unprivileged service account before the package purge +# below removes `passwd` (and with it `useradd`). The account lives +# in `/etc/passwd` after this point and survives the purge. The UID +# sits above the typical 1000-range dev user (so host-mounted volumes +# can be mapped unambiguously) and outside the Debian system-user +# range (1-999) reserved for daemons. +ARG SOURCEMETA_ONE_UID=10001 +RUN groupadd --system --gid "${SOURCEMETA_ONE_UID}" sourcemeta \ + && useradd --system --uid "${SOURCEMETA_ONE_UID}" \ + --gid "${SOURCEMETA_ONE_UID}" \ + --no-create-home --shell /usr/sbin/nologin sourcemeta + # Commercial editions require a paid license # See https://one.sourcemeta.com/commercial/ @@ -142,6 +159,16 @@ COPY docker/wrapper-index.sh /usr/bin/sourcemeta COPY docker/wrapper-server.sh /usr/bin/sourcemeta-server COPY docker/transaction-overlayfs.sh /usr/bin/sourcemeta-transaction-overlayfs +# Pre-create the output directory and hand it to the service account +# so the runtime server can read through it (and the optional +# transactional re-index path can write through it) without +# elevation. The chown is non-recursive to keep blast-radius bounded +# if a build-arg override ever pointed the path at the rootfs root. +RUN test -n "${SOURCEMETA_ONE_OUTPUT}" \ + && test "${SOURCEMETA_ONE_OUTPUT}" != "/" \ + && mkdir -p "${SOURCEMETA_ONE_OUTPUT}" \ + && chown sourcemeta:sourcemeta "${SOURCEMETA_ONE_OUTPUT}" + ENV SOURCEMETA_ONE_PORT=8000 HEALTHCHECK --interval=1s --timeout=2s --start-period=1s --retries=10 CMD grep -qE \ "^\s*[0-9]+:\s+[0-9A-F]+:$(printf '%04X' $SOURCEMETA_ONE_PORT)\s+[0-9A-F:]+\s+0A\s" \ From c3a0dd8ca300d3989cacb51391333d5d7dc88dd5 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Mon, 8 Jun 2026 10:13:09 -0400 Subject: [PATCH 2/4] More Signed-off-by: Juan Cruz Viotti --- Dockerfile | 23 +++++++++++++++++++---- docker/wrapper-server.sh | 31 ++++++++++++++++++++++++------- enterprise/Dockerfile | 23 +++++++++++++++++++---- 3 files changed, 62 insertions(+), 15 deletions(-) diff --git a/Dockerfile b/Dockerfile index f24ad86cb..6f05c017e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -57,8 +57,13 @@ FROM debian:trixie-slim # this image so build-time `RUN` instructions (including those in # downstream consumer Dockerfiles) keep executing as root, matching # the well-trodden Postgres/Redis/MySQL pattern. +# +# `libcap2-bin` provides `setcap`, used below to grant the server +# binary `CAP_NET_BIND_SERVICE` so the dropped service account can +# bind privileged ports (below 1024) if the operator chooses to run +# without a separate TLS terminator in front of the image. RUN apt-get --yes update \ - && apt-get install --yes --no-install-recommends gosu \ + && apt-get install --yes --no-install-recommends gosu libcap2-bin \ && apt-get clean && rm -rf /var/lib/apt/lists/* # Create the unprivileged service account that the entrypoint will @@ -96,6 +101,13 @@ COPY --from=builder /usr/share/sourcemeta/one \ RUN ldd /usr/bin/sourcemeta-one-index RUN ldd /usr/bin/sourcemeta-one-server +# File capabilities are filesystem extended attributes that do not +# survive a cross-stage `COPY --from=builder`, so the capability has +# to be applied here in the runtime stage after the binary lands. +# `+ep` puts the capability in both the permitted and effective sets +# of the process at exec time. +RUN setcap cap_net_bind_service+ep /usr/bin/sourcemeta-one-server + # We expect images that extend this one to use this directory ARG SOURCEMETA_ONE_WORKDIR=/source ENV SOURCEMETA_ONE_WORKDIR=${SOURCEMETA_ONE_WORKDIR} @@ -113,11 +125,14 @@ COPY docker/transaction-overlayfs.sh /usr/bin/sourcemeta-transaction-overlayfs # so the runtime server can read through it (and the optional # transactional re-index path can write through it) without # elevation. The chown is non-recursive to keep blast-radius bounded -# if a build-arg override ever pointed the path at the rootfs root. +# if a build-arg override ever pointed the path at the rootfs root, +# and we normalize via `realpath` so values like `/..`, `/foo/..`, or +# symlinks that resolve to `/` are rejected too. RUN test -n "${SOURCEMETA_ONE_OUTPUT}" \ - && test "${SOURCEMETA_ONE_OUTPUT}" != "/" \ && mkdir -p "${SOURCEMETA_ONE_OUTPUT}" \ - && chown sourcemeta:sourcemeta "${SOURCEMETA_ONE_OUTPUT}" + && resolved_output="$(realpath "${SOURCEMETA_ONE_OUTPUT}")" \ + && test "${resolved_output}" != "/" \ + && chown sourcemeta:sourcemeta "${resolved_output}" ENV SOURCEMETA_ONE_PORT=8000 HEALTHCHECK --interval=1s --timeout=2s --start-period=1s --retries=10 CMD grep -qE \ diff --git a/docker/wrapper-server.sh b/docker/wrapper-server.sh index b7d0c2556..6a4bfb67b 100755 --- a/docker/wrapper-server.sh +++ b/docker/wrapper-server.sh @@ -5,13 +5,30 @@ set -o nounset # For better shell expansion in the Dockerfile. # -# `gosu` drops privileges from root (the image's default execution -# identity, kept that way so build-time `RUN` instructions in -# consumer Dockerfiles are unsurprising) to the unprivileged service -# account before the long-running server starts. The signal-handling -# behaviour of `gosu` is the same as `exec`, so SIGTERM and friends -# reach the server process directly. +# When the entrypoint runs as root (the image's default execution +# identity, kept that way so build-time `RUN` instructions in consumer +# Dockerfiles are unsurprising), `gosu` drops privileges to the +# unprivileged service account before the long-running server starts. +# When the entrypoint is already running as a non-root account (e.g. +# `docker run --user 1234`, or an orchestrator like OpenShift that +# assigns an arbitrary UID via its restricted SCC), `gosu` would fail +# because privilege drop requires root, so we just exec the server +# directly and trust the caller's choice. Either path uses `exec` +# (and `gosu` itself uses `execve(2)`), so SIGTERM and friends reach +# the server process directly without an intermediate shell. +# +# Binding `SOURCEMETA_ONE_PORT` to a privileged port (below 1024) +# works even after the privilege drop because the server binary +# carries `CAP_NET_BIND_SERVICE` as a file capability, granted at +# image build time. + +if [ "$(id -u)" -eq 0 ] +then + exec /usr/sbin/gosu sourcemeta /usr/bin/sourcemeta-one-server \ + "$SOURCEMETA_ONE_OUTPUT" \ + "$SOURCEMETA_ONE_PORT" +fi -exec /usr/sbin/gosu sourcemeta /usr/bin/sourcemeta-one-server \ +exec /usr/bin/sourcemeta-one-server \ "$SOURCEMETA_ONE_OUTPUT" \ "$SOURCEMETA_ONE_PORT" diff --git a/enterprise/Dockerfile b/enterprise/Dockerfile index d0fde36e6..4f1caef75 100644 --- a/enterprise/Dockerfile +++ b/enterprise/Dockerfile @@ -81,8 +81,13 @@ FROM debian:trixie-slim # this image so build-time `RUN` instructions (including those in # downstream consumer Dockerfiles) keep executing as root, matching # the well-trodden Postgres/Redis/MySQL pattern. +# +# `libcap2-bin` provides `setcap`, used below to grant the server +# binary `CAP_NET_BIND_SERVICE` so the dropped service account can +# bind privileged ports (below 1024) if the operator chooses to run +# without a separate TLS terminator in front of the image. RUN apt-get --yes update && apt-get install --yes --no-install-recommends \ - gosu openssl-provider-fips \ + gosu libcap2-bin openssl-provider-fips \ && apt-get clean && rm -rf /var/lib/apt/lists/* COPY --from=builder /etc/ssl/openssl.cnf /etc/ssl/openssl.cnf @@ -126,6 +131,13 @@ COPY --from=builder /usr/share/sourcemeta/one \ RUN ldd /usr/bin/sourcemeta-one-index RUN ldd /usr/bin/sourcemeta-one-server +# File capabilities are filesystem extended attributes that do not +# survive a cross-stage `COPY --from=builder`, so the capability has +# to be applied here in the runtime stage after the binary lands. +# `+ep` puts the capability in both the permitted and effective sets +# of the process at exec time. +RUN setcap cap_net_bind_service+ep /usr/bin/sourcemeta-one-server + # Verify that the index binary uses system OpenSSL for cryptography RUN ldd /usr/bin/sourcemeta-one-index | grep libcrypto # Verify that the OpenSSL FIPS provider is configured and present @@ -163,11 +175,14 @@ COPY docker/transaction-overlayfs.sh /usr/bin/sourcemeta-transaction-overlayfs # so the runtime server can read through it (and the optional # transactional re-index path can write through it) without # elevation. The chown is non-recursive to keep blast-radius bounded -# if a build-arg override ever pointed the path at the rootfs root. +# if a build-arg override ever pointed the path at the rootfs root, +# and we normalize via `realpath` so values like `/..`, `/foo/..`, or +# symlinks that resolve to `/` are rejected too. RUN test -n "${SOURCEMETA_ONE_OUTPUT}" \ - && test "${SOURCEMETA_ONE_OUTPUT}" != "/" \ && mkdir -p "${SOURCEMETA_ONE_OUTPUT}" \ - && chown sourcemeta:sourcemeta "${SOURCEMETA_ONE_OUTPUT}" + && resolved_output="$(realpath "${SOURCEMETA_ONE_OUTPUT}")" \ + && test "${resolved_output}" != "/" \ + && chown sourcemeta:sourcemeta "${resolved_output}" ENV SOURCEMETA_ONE_PORT=8000 HEALTHCHECK --interval=1s --timeout=2s --start-period=1s --retries=10 CMD grep -qE \ From 5f34aa91c8bb7619e201d35ffb499a1d7e7e9659 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Mon, 8 Jun 2026 10:27:49 -0400 Subject: [PATCH 3/4] More Signed-off-by: Juan Cruz Viotti --- Dockerfile | 3 ++- docker/wrapper-server.sh | 8 ++++++++ enterprise/Dockerfile | 3 ++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6f05c017e..a971b13ae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -132,7 +132,8 @@ RUN test -n "${SOURCEMETA_ONE_OUTPUT}" \ && mkdir -p "${SOURCEMETA_ONE_OUTPUT}" \ && resolved_output="$(realpath "${SOURCEMETA_ONE_OUTPUT}")" \ && test "${resolved_output}" != "/" \ - && chown sourcemeta:sourcemeta "${resolved_output}" + && chown sourcemeta:sourcemeta "${resolved_output}" \ + && chmod 775 "${resolved_output}" ENV SOURCEMETA_ONE_PORT=8000 HEALTHCHECK --interval=1s --timeout=2s --start-period=1s --retries=10 CMD grep -qE \ diff --git a/docker/wrapper-server.sh b/docker/wrapper-server.sh index 6a4bfb67b..ec1bcf2bc 100755 --- a/docker/wrapper-server.sh +++ b/docker/wrapper-server.sh @@ -17,6 +17,14 @@ set -o nounset # (and `gosu` itself uses `execve(2)`), so SIGTERM and friends reach # the server process directly without an intermediate shell. # +# The output directory is mode 775 owned by the `sourcemeta` group at +# build time, so the default server (which only reads from it) works +# at any UID. Operators that need write access from an arbitrary UID +# (e.g. the optional transactional re-index path) should run as +# `--user :10001` so the process inherits the `sourcemeta` group +# and picks up the directory's group-write bit, or bind-mount their +# own writable volume over the output path. +# # Binding `SOURCEMETA_ONE_PORT` to a privileged port (below 1024) # works even after the privilege drop because the server binary # carries `CAP_NET_BIND_SERVICE` as a file capability, granted at diff --git a/enterprise/Dockerfile b/enterprise/Dockerfile index 4f1caef75..e5602e57e 100644 --- a/enterprise/Dockerfile +++ b/enterprise/Dockerfile @@ -182,7 +182,8 @@ RUN test -n "${SOURCEMETA_ONE_OUTPUT}" \ && mkdir -p "${SOURCEMETA_ONE_OUTPUT}" \ && resolved_output="$(realpath "${SOURCEMETA_ONE_OUTPUT}")" \ && test "${resolved_output}" != "/" \ - && chown sourcemeta:sourcemeta "${resolved_output}" + && chown sourcemeta:sourcemeta "${resolved_output}" \ + && chmod 775 "${resolved_output}" ENV SOURCEMETA_ONE_PORT=8000 HEALTHCHECK --interval=1s --timeout=2s --start-period=1s --retries=10 CMD grep -qE \ From fe36653f72d91fa1b5c3c0ebbf0d474eb539af2b Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Mon, 8 Jun 2026 10:45:24 -0400 Subject: [PATCH 4/4] Fix Signed-off-by: Juan Cruz Viotti --- docker/wrapper-server.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/wrapper-server.sh b/docker/wrapper-server.sh index ec1bcf2bc..21d69834f 100755 --- a/docker/wrapper-server.sh +++ b/docker/wrapper-server.sh @@ -21,9 +21,11 @@ set -o nounset # build time, so the default server (which only reads from it) works # at any UID. Operators that need write access from an arbitrary UID # (e.g. the optional transactional re-index path) should run as -# `--user :10001` so the process inherits the `sourcemeta` group -# and picks up the directory's group-write bit, or bind-mount their -# own writable volume over the output path. +# `--user :` (the GID matches the +# `SOURCEMETA_ONE_UID` build arg, default `10001`) so the process +# inherits the `sourcemeta` group and picks up the directory's +# group-write bit, or bind-mount their own writable volume over the +# output path. # # Binding `SOURCEMETA_ONE_PORT` to a privileged port (below 1024) # works even after the privilege drop because the server binary