From 37d382f91941570fa810d20794bd715e77a2d598 Mon Sep 17 00:00:00 2001
From: kmontemayor <kmontemayor@snapchat.com>
Date: Mon, 18 May 2026 18:59:59 +0000
Subject: [PATCH 1/4] Expose gigl vars as env vars for the custom luancher

---
 gigl/env/custom_launcher.py                   |  16 +++
 gigl/src/common/custom_launcher.py            |  71 ++++++++---
 tests/unit/src/common/custom_launcher_test.py | 111 ++++++++++++++++++
 3 files changed, 180 insertions(+), 18 deletions(-)
 create mode 100644 gigl/env/custom_launcher.py

diff --git a/gigl/env/custom_launcher.py b/gigl/env/custom_launcher.py
new file mode 100644
index 000000000..21eb4c223
--- /dev/null
+++ b/gigl/env/custom_launcher.py
@@ -0,0 +1,16 @@
+"""Environment-variable keys exported by ``launch_custom``.
+
+These keys are set on the subprocess env (never on the parent
+``os.environ``) by ``gigl.src.common.custom_launcher.launch_custom`` so
+that receiving CLIs can ``os.environ.get(...)`` their runtime context.
+"""
+
+from typing import Final
+
+GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY: Final[str] = "GIGL_APPLIED_TASK_IDENTIFIER"
+GIGL_TASK_CONFIG_URI_ENV_KEY: Final[str] = "GIGL_TASK_CONFIG_URI"
+GIGL_RESOURCE_CONFIG_URI_ENV_KEY: Final[str] = "GIGL_RESOURCE_CONFIG_URI"
+GIGL_PROCESS_COMMAND_ENV_KEY: Final[str] = "GIGL_PROCESS_COMMAND"
+GIGL_CPU_DOCKER_URI_ENV_KEY: Final[str] = "GIGL_CPU_DOCKER_URI"
+GIGL_CUDA_DOCKER_URI_ENV_KEY: Final[str] = "GIGL_CUDA_DOCKER_URI"
+GIGL_COMPONENT_ENV_KEY: Final[str] = "GIGL_COMPONENT"
diff --git a/gigl/src/common/custom_launcher.py b/gigl/src/common/custom_launcher.py
index 10c3115e9..f264782d6 100644
--- a/gigl/src/common/custom_launcher.py
+++ b/gigl/src/common/custom_launcher.py
@@ -12,8 +12,16 @@
 dynamic content (runtime URIs, image refs, etc.) is the caller's
 responsibility — typically resolved at YAML-load time before the
 proto reaches this module.
+
+The dispatcher exports its context args as ``GIGL_*`` environment
+variables on the subprocess env (see ``gigl.env.custom_launcher``) so
+receiving CLIs can ``os.environ.get(...)`` whatever runtime context
+they need. The parent process's ``os.environ`` is never mutated; the
+``GIGL_*`` keys live only in the per-call env passed to
+``subprocess.run``.
 """
 
+import os
 import shlex
 import subprocess
 from collections.abc import Mapping
@@ -21,6 +29,15 @@
 
 from gigl.common import Uri
 from gigl.common.logger import Logger
+from gigl.env.custom_launcher import (
+    GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY,
+    GIGL_COMPONENT_ENV_KEY,
+    GIGL_CPU_DOCKER_URI_ENV_KEY,
+    GIGL_CUDA_DOCKER_URI_ENV_KEY,
+    GIGL_PROCESS_COMMAND_ENV_KEY,
+    GIGL_RESOURCE_CONFIG_URI_ENV_KEY,
+    GIGL_TASK_CONFIG_URI_ENV_KEY,
+)
 from gigl.src.common.constants.components import GiGLComponents
 from snapchat.research.gbml.gigl_resource_config_pb2 import CustomLauncherConfig
 
@@ -46,7 +63,7 @@ def launch_custom(
 
     Composes a shell line as ``command`` followed by each ``args[]``
     element passed through ``shlex.quote``, then invokes
-    ``subprocess.run(shell_line, shell=True, check=True)``.
+    ``subprocess.run(shell_line, shell=True, check=True, env=env)``.
 
     The dispatcher takes ``command`` and ``args[]`` verbatim — no
     template substitution of any kind. Any placeholder text in those
@@ -54,28 +71,35 @@ def launch_custom(
     substitution should resolve it at YAML-load time before the proto
     reaches this module.
 
-    ``applied_task_identifier``, ``task_config_uri``,
-    ``resource_config_uri``, ``process_command``,
-    ``process_runtime_args``, ``cpu_docker_uri``, and ``cuda_docker_uri``
-    are accepted for API symmetry with the GLT-side Vertex AI launchers
-    but are intentionally not plumbed into the subprocess — the
-    receiving CLI is expected to source whatever context it needs from
-    the resource config it gets handed (or from env vars inherited from
-    the parent process).
+    The subprocess env is built per-call from ``os.environ.copy()`` plus
+    the ``GIGL_*`` keys defined in :mod:`gigl.env.custom_launcher`. The
+    parent process's ``os.environ`` is never mutated. Optional URI args
+    (``cpu_docker_uri``, ``cuda_docker_uri``) are omitted from the env
+    when ``None`` so the receiver's ``os.environ.get(KEY)`` returns
+    ``None`` and preserves the original ``Optional[str]`` semantics.
 
     Args:
         custom_launcher_config: Proto whose ``command`` is the shell
             snippet to execute and whose ``args`` are positional
             arguments appended verbatim.
-        applied_task_identifier: Accepted for back-compat; ignored.
-        task_config_uri: Accepted for back-compat; ignored.
-        resource_config_uri: Accepted for back-compat; ignored.
-        process_command: Accepted for back-compat; ignored.
-        process_runtime_args: Accepted for back-compat; ignored.
-        cpu_docker_uri: Accepted for back-compat; ignored.
-        cuda_docker_uri: Accepted for back-compat; ignored.
+        applied_task_identifier: Exported to the subprocess as
+            ``GIGL_APPLIED_TASK_IDENTIFIER``.
+        task_config_uri: Exported to the subprocess as
+            ``GIGL_TASK_CONFIG_URI`` (stringified).
+        resource_config_uri: Exported to the subprocess as
+            ``GIGL_RESOURCE_CONFIG_URI`` (stringified).
+        process_command: Exported to the subprocess as
+            ``GIGL_PROCESS_COMMAND``.
+        process_runtime_args: Accepted for API symmetry with the
+            GLT-side Vertex AI launchers but not currently exported —
+            there is no clean single-env-var encoding for a dict.
+        cpu_docker_uri: Exported as ``GIGL_CPU_DOCKER_URI`` when set;
+            the env var is omitted entirely when ``None``.
+        cuda_docker_uri: Exported as ``GIGL_CUDA_DOCKER_URI`` when set;
+            the env var is omitted entirely when ``None``.
         component: Which GiGL component is being launched. Must be in
-            ``_LAUNCHABLE_COMPONENTS``.
+            ``_LAUNCHABLE_COMPONENTS``. Exported as ``GIGL_COMPONENT``
+            using ``component.name`` (e.g. ``"Trainer"``).
 
     Raises:
         ValueError: If ``component`` is not Trainer or Inferencer, or if
@@ -91,6 +115,17 @@ def launch_custom(
     command: str = custom_launcher_config.command
     args: list[str] = list(custom_launcher_config.args)
 
+    env: dict[str, str] = os.environ.copy()
+    env[GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY] = applied_task_identifier
+    env[GIGL_TASK_CONFIG_URI_ENV_KEY] = str(task_config_uri)
+    env[GIGL_RESOURCE_CONFIG_URI_ENV_KEY] = str(resource_config_uri)
+    env[GIGL_PROCESS_COMMAND_ENV_KEY] = process_command
+    env[GIGL_COMPONENT_ENV_KEY] = component.name
+    if cpu_docker_uri is not None:
+        env[GIGL_CPU_DOCKER_URI_ENV_KEY] = cpu_docker_uri
+    if cuda_docker_uri is not None:
+        env[GIGL_CUDA_DOCKER_URI_ENV_KEY] = cuda_docker_uri
+
     shell_line = " ".join([command, *(shlex.quote(a) for a in args)])
     logger.info(f"Launching {component.name} via subprocess: {shell_line!r}")
-    subprocess.run(shell_line, shell=True, check=True)
+    subprocess.run(shell_line, shell=True, check=True, env=env)
diff --git a/tests/unit/src/common/custom_launcher_test.py b/tests/unit/src/common/custom_launcher_test.py
index 6a8765352..94b3a2100 100644
--- a/tests/unit/src/common/custom_launcher_test.py
+++ b/tests/unit/src/common/custom_launcher_test.py
@@ -1,10 +1,20 @@
 """Unit tests for ``gigl.src.common.custom_launcher``."""
 
+import os
 from unittest.mock import MagicMock, patch
 
 from absl.testing import absltest
 
 from gigl.common import Uri
+from gigl.env.custom_launcher import (
+    GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY,
+    GIGL_COMPONENT_ENV_KEY,
+    GIGL_CPU_DOCKER_URI_ENV_KEY,
+    GIGL_CUDA_DOCKER_URI_ENV_KEY,
+    GIGL_PROCESS_COMMAND_ENV_KEY,
+    GIGL_RESOURCE_CONFIG_URI_ENV_KEY,
+    GIGL_TASK_CONFIG_URI_ENV_KEY,
+)
 from gigl.src.common.constants.components import GiGLComponents
 from gigl.src.common.custom_launcher import launch_custom
 from snapchat.research.gbml import gigl_resource_config_pb2
@@ -113,6 +123,107 @@ def test_args_with_spaces_are_shell_quoted(self, mock_run: MagicMock) -> None:
         self.assertIn("'a b c'", shell_line)
         self.assertIn("'--name=with space'", shell_line)
 
+    @patch("gigl.src.common.custom_launcher.subprocess.run")
+    def test_dispatch_sets_gigl_env_vars(self, mock_run: MagicMock) -> None:
+        config = self._build_config(command="python -m my.cli")
+        launch_custom(
+            custom_launcher_config=config,
+            applied_task_identifier="job-42",
+            task_config_uri=Uri("gs://bucket/task.yaml"),
+            resource_config_uri=Uri("gs://bucket/resource.yaml"),
+            process_command="python -m my.cli",
+            process_runtime_args={},
+            cpu_docker_uri="gcr.io/p/cpu:tag",
+            cuda_docker_uri="gcr.io/p/cuda:tag",
+            component=GiGLComponents.Trainer,
+        )
+        env = mock_run.call_args.kwargs["env"]
+        self.assertEqual(env[GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY], "job-42")
+        self.assertEqual(env[GIGL_TASK_CONFIG_URI_ENV_KEY], "gs://bucket/task.yaml")
+        self.assertEqual(
+            env[GIGL_RESOURCE_CONFIG_URI_ENV_KEY], "gs://bucket/resource.yaml"
+        )
+        self.assertEqual(env[GIGL_PROCESS_COMMAND_ENV_KEY], "python -m my.cli")
+        self.assertEqual(env[GIGL_CPU_DOCKER_URI_ENV_KEY], "gcr.io/p/cpu:tag")
+        self.assertEqual(env[GIGL_CUDA_DOCKER_URI_ENV_KEY], "gcr.io/p/cuda:tag")
+        # component is exported via .name (the enum member identifier).
+        self.assertEqual(env[GIGL_COMPONENT_ENV_KEY], "Trainer")
+
+    @patch("gigl.src.common.custom_launcher.subprocess.run")
+    def test_dispatch_omits_optional_uris_when_none(self, mock_run: MagicMock) -> None:
+        config = self._build_config(command="echo")
+        launch_custom(
+            custom_launcher_config=config,
+            applied_task_identifier="job",
+            task_config_uri=Uri("gs://bucket/task.yaml"),
+            resource_config_uri=Uri("gs://bucket/resource.yaml"),
+            process_command="echo",
+            process_runtime_args={},
+            cpu_docker_uri=None,
+            cuda_docker_uri=None,
+            component=GiGLComponents.Inferencer,
+        )
+        env = mock_run.call_args.kwargs["env"]
+        # Optional URIs must be omitted entirely (not stringified to "None"
+        # nor set to ""), so receivers see env.get(KEY) is None.
+        self.assertNotIn(GIGL_CPU_DOCKER_URI_ENV_KEY, env)
+        self.assertNotIn(GIGL_CUDA_DOCKER_URI_ENV_KEY, env)
+        # Required keys are still present.
+        self.assertEqual(env[GIGL_COMPONENT_ENV_KEY], "Inferencer")
+
+    @patch("gigl.src.common.custom_launcher.subprocess.run")
+    def test_dispatch_does_not_mutate_parent_os_environ(
+        self, mock_run: MagicMock
+    ) -> None:
+        # Pre-condition: none of the GIGL_* keys leak into the parent.
+        snapshot = dict(os.environ)
+        config = self._build_config(command="echo")
+        launch_custom(
+            custom_launcher_config=config,
+            applied_task_identifier="job",
+            task_config_uri=Uri("gs://bucket/task.yaml"),
+            resource_config_uri=Uri("gs://bucket/resource.yaml"),
+            process_command="echo",
+            process_runtime_args={},
+            cpu_docker_uri="gcr.io/p/cpu:tag",
+            cuda_docker_uri="gcr.io/p/cuda:tag",
+            component=GiGLComponents.Trainer,
+        )
+        self.assertEqual(dict(os.environ), snapshot)
+        for key in (
+            GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY,
+            GIGL_TASK_CONFIG_URI_ENV_KEY,
+            GIGL_RESOURCE_CONFIG_URI_ENV_KEY,
+            GIGL_PROCESS_COMMAND_ENV_KEY,
+            GIGL_CPU_DOCKER_URI_ENV_KEY,
+            GIGL_CUDA_DOCKER_URI_ENV_KEY,
+            GIGL_COMPONENT_ENV_KEY,
+        ):
+            self.assertNotIn(key, os.environ)
+
+    @patch("gigl.src.common.custom_launcher.subprocess.run")
+    def test_dispatch_preserves_inherited_env(self, mock_run: MagicMock) -> None:
+        sentinel_key = "GIGL_TEST_PARENT_ENV_SENTINEL"
+        sentinel_value = "preserved-value"
+        try:
+            os.environ[sentinel_key] = sentinel_value
+            config = self._build_config(command="echo")
+            launch_custom(
+                custom_launcher_config=config,
+                applied_task_identifier="job",
+                task_config_uri=Uri("gs://bucket/task.yaml"),
+                resource_config_uri=Uri("gs://bucket/resource.yaml"),
+                process_command="echo",
+                process_runtime_args={},
+                cpu_docker_uri=None,
+                cuda_docker_uri=None,
+                component=GiGLComponents.Trainer,
+            )
+            env = mock_run.call_args.kwargs["env"]
+            self.assertEqual(env.get(sentinel_key), sentinel_value)
+        finally:
+            os.environ.pop(sentinel_key, None)
+
 
 if __name__ == "__main__":
     absltest.main()

From 8c1a09fb4fcca1ee2e18d0d51b49fd7826624b23 Mon Sep 17 00:00:00 2001
From: kmontemayor <kyle.e.montemayor@gmail.com>
Date: Tue, 19 May 2026 20:32:35 +0000
Subject: [PATCH 2/4] PR comments

---
 gigl/env/{custom_launcher.py => constants.py} |   0
 gigl/src/common/custom_launcher.py            |  39 ++-
 gigl/utils/dev/__init__.py                    |   5 +
 gigl/utils/dev/submit_smoke_job.py            | 258 ++++++++++++++++++
 gigl/utils/dev/tb_smoke_main.py               |  72 +++++
 tests/unit/src/common/custom_launcher_test.py |  73 +++--
 6 files changed, 394 insertions(+), 53 deletions(-)
 rename gigl/env/{custom_launcher.py => constants.py} (100%)
 create mode 100644 gigl/utils/dev/__init__.py
 create mode 100644 gigl/utils/dev/submit_smoke_job.py
 create mode 100644 gigl/utils/dev/tb_smoke_main.py

diff --git a/gigl/env/custom_launcher.py b/gigl/env/constants.py
similarity index 100%
rename from gigl/env/custom_launcher.py
rename to gigl/env/constants.py
diff --git a/gigl/src/common/custom_launcher.py b/gigl/src/common/custom_launcher.py
index f264782d6..08770cd77 100644
--- a/gigl/src/common/custom_launcher.py
+++ b/gigl/src/common/custom_launcher.py
@@ -14,7 +14,7 @@
 proto reaches this module.
 
 The dispatcher exports its context args as ``GIGL_*`` environment
-variables on the subprocess env (see ``gigl.env.custom_launcher``) so
+variables on the subprocess env (see ``gigl.env.constants``) so
 receiving CLIs can ``os.environ.get(...)`` whatever runtime context
 they need. The parent process's ``os.environ`` is never mutated; the
 ``GIGL_*`` keys live only in the per-call env passed to
@@ -28,8 +28,12 @@
 from typing import Optional
 
 from gigl.common import Uri
+from gigl.common.constants import (
+    DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU,
+    DEFAULT_GIGL_RELEASE_SRC_IMAGE_CUDA,
+)
 from gigl.common.logger import Logger
-from gigl.env.custom_launcher import (
+from gigl.env.constants import (
     GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY,
     GIGL_COMPONENT_ENV_KEY,
     GIGL_CPU_DOCKER_URI_ENV_KEY,
@@ -72,11 +76,13 @@ def launch_custom(
     reaches this module.
 
     The subprocess env is built per-call from ``os.environ.copy()`` plus
-    the ``GIGL_*`` keys defined in :mod:`gigl.env.custom_launcher`. The
-    parent process's ``os.environ`` is never mutated. Optional URI args
-    (``cpu_docker_uri``, ``cuda_docker_uri``) are omitted from the env
-    when ``None`` so the receiver's ``os.environ.get(KEY)`` returns
-    ``None`` and preserves the original ``Optional[str]`` semantics.
+    the ``GIGL_*`` keys defined in :mod:`gigl.env.constants`. The
+    parent process's ``os.environ`` is never mutated. When ``None`` is
+    passed for ``cpu_docker_uri`` / ``cuda_docker_uri``, the
+    corresponding env var falls back to
+    :data:`gigl.common.constants.DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU` /
+    :data:`gigl.common.constants.DEFAULT_GIGL_RELEASE_SRC_IMAGE_CUDA`
+    so receivers always observe a usable image URI.
 
     Args:
         custom_launcher_config: Proto whose ``command`` is the shell
@@ -93,10 +99,11 @@ def launch_custom(
         process_runtime_args: Accepted for API symmetry with the
             GLT-side Vertex AI launchers but not currently exported —
             there is no clean single-env-var encoding for a dict.
-        cpu_docker_uri: Exported as ``GIGL_CPU_DOCKER_URI`` when set;
-            the env var is omitted entirely when ``None``.
-        cuda_docker_uri: Exported as ``GIGL_CUDA_DOCKER_URI`` when set;
-            the env var is omitted entirely when ``None``.
+        cpu_docker_uri: Exported as ``GIGL_CPU_DOCKER_URI``. Falls back
+            to ``DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU`` when ``None``.
+        cuda_docker_uri: Exported as ``GIGL_CUDA_DOCKER_URI``. Falls
+            back to ``DEFAULT_GIGL_RELEASE_SRC_IMAGE_CUDA`` when
+            ``None``.
         component: Which GiGL component is being launched. Must be in
             ``_LAUNCHABLE_COMPONENTS``. Exported as ``GIGL_COMPONENT``
             using ``component.name`` (e.g. ``"Trainer"``).
@@ -121,10 +128,12 @@ def launch_custom(
     env[GIGL_RESOURCE_CONFIG_URI_ENV_KEY] = str(resource_config_uri)
     env[GIGL_PROCESS_COMMAND_ENV_KEY] = process_command
     env[GIGL_COMPONENT_ENV_KEY] = component.name
-    if cpu_docker_uri is not None:
-        env[GIGL_CPU_DOCKER_URI_ENV_KEY] = cpu_docker_uri
-    if cuda_docker_uri is not None:
-        env[GIGL_CUDA_DOCKER_URI_ENV_KEY] = cuda_docker_uri
+    env[GIGL_CPU_DOCKER_URI_ENV_KEY] = (
+        cpu_docker_uri or DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU
+    )
+    env[GIGL_CUDA_DOCKER_URI_ENV_KEY] = (
+        cuda_docker_uri or DEFAULT_GIGL_RELEASE_SRC_IMAGE_CUDA
+    )
 
     shell_line = " ".join([command, *(shlex.quote(a) for a in args)])
     logger.info(f"Launching {component.name} via subprocess: {shell_line!r}")
diff --git a/gigl/utils/dev/__init__.py b/gigl/utils/dev/__init__.py
new file mode 100644
index 000000000..9c1bf25ab
--- /dev/null
+++ b/gigl/utils/dev/__init__.py
@@ -0,0 +1,5 @@
+"""Developer utilities (smoke entrypoints, ad-hoc test helpers).
+
+Modules under this package are intended for short, ad-hoc test jobs and
+developer iteration. They are NOT part of GiGL's stable public API.
+"""
diff --git a/gigl/utils/dev/submit_smoke_job.py b/gigl/utils/dev/submit_smoke_job.py
new file mode 100644
index 000000000..e1eab73ef
--- /dev/null
+++ b/gigl/utils/dev/submit_smoke_job.py
@@ -0,0 +1,258 @@
+"""Submit a tiny Vertex AI CustomJob that exercises GiGL's TensorBoard wiring.
+
+Goal: <2 min from "I changed launcher / writer code" to "I see whether TB
+shows up." Bypasses ConfigPopulator and the full pipeline; uses the
+production launcher path (``launch_single_pool_job``) so the same submit
+logic runs as in real training.
+
+Required CLI flags:
+    --project              GCP project (e.g. ``external-snap-ci-github-gigl``).
+    --region               Vertex AI region (e.g. ``us-central1``).
+    --service-account      Service account email used by the CustomJob.
+    --staging-bucket       Regional GCS bucket Vertex stages artifacts under.
+    --tensorboard          Full TensorBoard resource name
+                           (``projects/.../locations/.../tensorboards/...``).
+    --experiment-name      Vertex AI ``TensorboardExperiment`` name. The
+                           tb_smoke_main entry point will pass this and the
+                           --tensorboard value to ``TensorBoardWriter.create``.
+    --container-uri        Container image to use. REQUIRED — must contain the
+                           branch under test.
+
+Optional:
+    --job-name             CustomJob display name. Defaults to a timestamped
+                           ``gigl-tb-smoke-...``.
+    --dry-run              Print the constructed submission parameters and
+                           exit without submitting.
+
+Verification:
+    After the CustomJob completes the script polls the TensorBoard API
+    surface and asserts the user-named ``TensorboardExperiment`` exists
+    with at least one ``TensorboardRun`` containing time series data.
+
+    The TB UI URL is printed for manual inspection.
+"""
+
+from __future__ import annotations
+
+import argparse
+import datetime
+import re
+import sys
+import time
+
+from google.cloud import aiplatform
+
+from gigl.common import Uri
+from gigl.common.logger import Logger
+from gigl.src.common.constants.components import GiGLComponents
+from gigl.src.common.types.pb_wrappers.gigl_resource_config import (
+    GiglResourceConfigWrapper,
+)
+from gigl.src.common.vertex_ai_launcher import launch_single_pool_job
+from snapchat.research.gbml import gigl_resource_config_pb2
+
+logger = Logger()
+
+_TENSORBOARD_RESOURCE_NAME_PATTERN = re.compile(
+    r"^projects/(?P<project>[^/]+)"
+    r"/locations/(?P<location>[^/]+)"
+    r"/tensorboards/(?P<tensorboard_id>[^/]+)$"
+)
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--project", required=True)
+    parser.add_argument("--region", required=True)
+    parser.add_argument("--service-account", required=True)
+    parser.add_argument(
+        "--staging-bucket",
+        required=True,
+        help="Regional GCS bucket (e.g. gs://gigl-cicd-temp).",
+    )
+    parser.add_argument(
+        "--tensorboard",
+        required=True,
+        help="Full TensorBoard resource name.",
+    )
+    parser.add_argument(
+        "--experiment-name",
+        required=True,
+        help=(
+            "TensorboardExperiment name. Passed to tb_smoke_main, which "
+            "creates the run under this experiment."
+        ),
+    )
+    parser.add_argument(
+        "--container-uri",
+        required=True,
+        help=(
+            "Container image with the branch code. Required; pointing at a "
+            "released image would test stale code."
+        ),
+    )
+    parser.add_argument("--job-name", default=None)
+    parser.add_argument("--dry-run", action="store_true")
+    return parser.parse_args()
+
+
+def _build_resource_config(
+    *,
+    project: str,
+    region: str,
+    service_account: str,
+    staging_bucket: str,
+) -> gigl_resource_config_pb2.GiglResourceConfig:
+    """Minimal GiglResourceConfig wired for a 1-replica CPU CustomJob."""
+    common = gigl_resource_config_pb2.SharedResourceConfig.CommonComputeConfig(
+        project=project,
+        region=region,
+        temp_regional_assets_bucket=staging_bucket,
+        temp_assets_bucket=staging_bucket,
+        perm_assets_bucket=staging_bucket,
+        temp_assets_bq_dataset_name="not_used_by_smoke",
+        embedding_bq_dataset_name="not_used_by_smoke",
+        gcp_service_account_email=service_account,
+        dataflow_runner="DataflowRunner",
+    )
+    shared = gigl_resource_config_pb2.SharedResourceConfig(
+        common_compute_config=common,
+        resource_labels={"cost_resource_group": "gigl_dev_smoke"},
+    )
+    trainer = gigl_resource_config_pb2.VertexAiResourceConfig(
+        # n1-standard-2 is rejected by Vertex AI; n1-standard-16 is the
+        # smallest spec we've confirmed accepted in dev.
+        machine_type="n1-standard-16",
+        gpu_type="ACCELERATOR_TYPE_UNSPECIFIED",
+        gpu_limit=0,
+        num_replicas=1,
+        timeout=600,
+    )
+    return gigl_resource_config_pb2.GiglResourceConfig(
+        shared_resource_config=shared,
+        trainer_resource_config=gigl_resource_config_pb2.TrainerResourceConfig(
+            vertex_ai_trainer_config=trainer,
+        ),
+    )
+
+
+def _verify_named_experiment(
+    *,
+    tensorboard_resource_name: str,
+    experiment_name: str,
+) -> None:
+    """Confirm the chief-rank writer ingested events into the named experiment."""
+    experiment_resource_name = (
+        f"{tensorboard_resource_name}/experiments/{experiment_name}"
+    )
+    runs = aiplatform.TensorboardRun.list(
+        tensorboard_experiment_name=experiment_resource_name,
+    )
+    if not runs:
+        raise RuntimeError(
+            f"Named TensorboardExperiment {experiment_resource_name} has no "
+            "TensorboardRuns; the writer did not ingest events."
+        )
+    for run in runs:
+        time_series = aiplatform.TensorboardTimeSeries.list(
+            tensorboard_run_name=run.resource_name,
+        )
+        if not time_series:
+            raise RuntimeError(
+                f"Run {run.resource_name} has no TensorboardTimeSeries; "
+                "events did not reach the API."
+            )
+    run_names = sorted(r.display_name for r in runs)
+    logger.info(
+        f"Named experiment OK: {len(runs)} run(s) under {experiment_resource_name}: "
+        f"{run_names}"
+    )
+
+
+def _print_tb_url(
+    *,
+    region: str,
+    project: str,
+    tensorboard_id: str,
+    experiment_name: str,
+) -> None:
+    base = f"https://{region}.tensorboard.googleusercontent.com/experiment"
+    qualifier = f"projects+{project}+locations+{region}+tensorboards+{tensorboard_id}"
+    named = f"{base}/{qualifier}+experiments+{experiment_name}"
+    logger.info(f"Named TB URL: {named}")
+
+
+def main() -> int:
+    args = _parse_args()
+
+    tb_match = _TENSORBOARD_RESOURCE_NAME_PATTERN.match(args.tensorboard)
+    if not tb_match:
+        logger.error(
+            f"--tensorboard must be projects/.../locations/.../tensorboards/...; "
+            f"got {args.tensorboard!r}."
+        )
+        return 2
+
+    timestamp = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
+    job_name = args.job_name or f"gigl-tb-smoke-{timestamp}"
+
+    resource_config = _build_resource_config(
+        project=args.project,
+        region=args.region,
+        service_account=args.service_account,
+        staging_bucket=args.staging_bucket,
+    )
+    resource_wrapper = GiglResourceConfigWrapper(resource_config=resource_config)
+
+    process_runtime_args = {
+        "tensorboard_resource_name": args.tensorboard,
+        "tensorboard_experiment_name": args.experiment_name,
+    }
+
+    if args.dry_run:
+        logger.info(
+            "Dry run — would submit a CustomJob with:\n"
+            f"  job_name              = {job_name}\n"
+            f"  container_uri         = {args.container_uri}\n"
+            f"  tensorboard_resource  = {args.tensorboard}\n"
+            f"  experiment_name       = {args.experiment_name!r}\n"
+            f"  process_runtime_args  = {process_runtime_args}\n"
+        )
+        return 0
+
+    aiplatform.init(project=args.project, location=args.region)
+    launch_single_pool_job(
+        vertex_ai_resource_config=resource_config.trainer_resource_config.vertex_ai_trainer_config,
+        job_name=job_name,
+        task_config_uri=Uri("gs://unused/by/smoke.yaml"),
+        resource_config_uri=Uri("gs://unused/by/smoke.yaml"),
+        process_command="python -m gigl.utils.dev.tb_smoke_main",
+        process_runtime_args=process_runtime_args,
+        resource_config_wrapper=resource_wrapper,
+        cpu_docker_uri=args.container_uri,
+        cuda_docker_uri=args.container_uri,
+        component=GiGLComponents.Trainer,
+        vertex_ai_region=args.region,
+    )
+    logger.info(f"Submitted CustomJob: {job_name}")
+
+    # CustomJob.submit blocks until completion inside launch_single_pool_job
+    # (see VertexAIService._submit_job: job.wait_for_completion). Give the
+    # backing TensorboardExperiment a short grace period for any final RPCs.
+    time.sleep(5)
+
+    _verify_named_experiment(
+        tensorboard_resource_name=args.tensorboard,
+        experiment_name=args.experiment_name,
+    )
+    _print_tb_url(
+        region=args.region,
+        project=args.project,
+        tensorboard_id=tb_match["tensorboard_id"],
+        experiment_name=args.experiment_name,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/gigl/utils/dev/tb_smoke_main.py b/gigl/utils/dev/tb_smoke_main.py
new file mode 100644
index 000000000..2d4a35807
--- /dev/null
+++ b/gigl/utils/dev/tb_smoke_main.py
@@ -0,0 +1,72 @@
+"""Tiny smoke-test entrypoint that exercises GiGL's TensorBoard pipeline.
+
+Submitted as the container command by ``submit_smoke_job.py``. Constructs a
+``TensorBoardWriter`` with ``enabled=True`` (single-process smoke = always
+chief), writes a few scalar events, and exits.
+
+Configuration is plumbed via CLI flags injected by the launcher from the
+smoke script's ``process_runtime_args`` map. All three are required:
+
+    --job_name=<used as the TensorboardRun ID>
+    --tensorboard_resource_name=<full Vertex AI Tensorboard resource name>
+    --tensorboard_experiment_name=<TensorboardExperiment ID under that resource>
+
+This entrypoint deliberately mirrors the production trainer/inferencer call
+sites in ``examples/link_prediction/`` so the smoke test exercises the same
+``TensorBoardWriter.create()`` code path.
+"""
+
+from __future__ import annotations
+
+import argparse
+
+from gigl.common.logger import Logger
+from gigl.utils.tensorboard_writer import TensorBoardWriter
+
+logger = Logger()
+
+_NUM_STEPS = 3
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--job_name",
+        required=True,
+        help="Used as the TensorboardRun ID (must be unique per launch).",
+    )
+    parser.add_argument(
+        "--tensorboard_resource_name",
+        required=True,
+        help="Full Vertex AI Tensorboard resource name.",
+    )
+    parser.add_argument(
+        "--tensorboard_experiment_name",
+        required=True,
+        help="TensorboardExperiment ID under the resource above.",
+    )
+    # The launcher's _build_job_config always appends --task_config_uri,
+    # --resource_config_uri, and (on GPU) --use_cuda. The smoke entrypoint
+    # doesn't need them; use parse_known_args so they don't blow up argparse.
+    args, _unrecognized = parser.parse_known_args()
+    return args
+
+
+def main() -> None:
+    """Write a handful of scalar events and exit."""
+    args = _parse_args()
+    logger.info(f"Starting tb_smoke_main; job_name={args.job_name!r}")
+    with TensorBoardWriter.create(
+        resource_name=args.tensorboard_resource_name,
+        experiment_name=args.tensorboard_experiment_name,
+        experiment_run_name=args.job_name,
+        enabled=True,
+    ) as writer:
+        for step in range(_NUM_STEPS):
+            writer.log({"smoke/value": float(step)}, step=step)
+            logger.info(f"Wrote smoke/value={step} at step {step}")
+    logger.info("tb_smoke_main complete")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/unit/src/common/custom_launcher_test.py b/tests/unit/src/common/custom_launcher_test.py
index 94b3a2100..3d325cbb5 100644
--- a/tests/unit/src/common/custom_launcher_test.py
+++ b/tests/unit/src/common/custom_launcher_test.py
@@ -6,7 +6,11 @@
 from absl.testing import absltest
 
 from gigl.common import Uri
-from gigl.env.custom_launcher import (
+from gigl.common.constants import (
+    DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU,
+    DEFAULT_GIGL_RELEASE_SRC_IMAGE_CUDA,
+)
+from gigl.env.constants import (
     GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY,
     GIGL_COMPONENT_ENV_KEY,
     GIGL_CPU_DOCKER_URI_ENV_KEY,
@@ -150,7 +154,9 @@ def test_dispatch_sets_gigl_env_vars(self, mock_run: MagicMock) -> None:
         self.assertEqual(env[GIGL_COMPONENT_ENV_KEY], "Trainer")
 
     @patch("gigl.src.common.custom_launcher.subprocess.run")
-    def test_dispatch_omits_optional_uris_when_none(self, mock_run: MagicMock) -> None:
+    def test_dispatch_defaults_optional_uris_to_release_images(
+        self, mock_run: MagicMock
+    ) -> None:
         config = self._build_config(command="echo")
         launch_custom(
             custom_launcher_config=config,
@@ -164,49 +170,26 @@ def test_dispatch_omits_optional_uris_when_none(self, mock_run: MagicMock) -> No
             component=GiGLComponents.Inferencer,
         )
         env = mock_run.call_args.kwargs["env"]
-        # Optional URIs must be omitted entirely (not stringified to "None"
-        # nor set to ""), so receivers see env.get(KEY) is None.
-        self.assertNotIn(GIGL_CPU_DOCKER_URI_ENV_KEY, env)
-        self.assertNotIn(GIGL_CUDA_DOCKER_URI_ENV_KEY, env)
-        # Required keys are still present.
+        # When the caller passes None for a docker URI, the env var
+        # falls back to the public release image so receivers always
+        # see a usable URI.
+        self.assertEqual(
+            env[GIGL_CPU_DOCKER_URI_ENV_KEY], DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU
+        )
+        self.assertEqual(
+            env[GIGL_CUDA_DOCKER_URI_ENV_KEY], DEFAULT_GIGL_RELEASE_SRC_IMAGE_CUDA
+        )
         self.assertEqual(env[GIGL_COMPONENT_ENV_KEY], "Inferencer")
 
     @patch("gigl.src.common.custom_launcher.subprocess.run")
-    def test_dispatch_does_not_mutate_parent_os_environ(
+    def test_dispatch_isolates_subprocess_env_from_parent(
         self, mock_run: MagicMock
     ) -> None:
-        # Pre-condition: none of the GIGL_* keys leak into the parent.
-        snapshot = dict(os.environ)
-        config = self._build_config(command="echo")
-        launch_custom(
-            custom_launcher_config=config,
-            applied_task_identifier="job",
-            task_config_uri=Uri("gs://bucket/task.yaml"),
-            resource_config_uri=Uri("gs://bucket/resource.yaml"),
-            process_command="echo",
-            process_runtime_args={},
-            cpu_docker_uri="gcr.io/p/cpu:tag",
-            cuda_docker_uri="gcr.io/p/cuda:tag",
-            component=GiGLComponents.Trainer,
-        )
-        self.assertEqual(dict(os.environ), snapshot)
-        for key in (
-            GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY,
-            GIGL_TASK_CONFIG_URI_ENV_KEY,
-            GIGL_RESOURCE_CONFIG_URI_ENV_KEY,
-            GIGL_PROCESS_COMMAND_ENV_KEY,
-            GIGL_CPU_DOCKER_URI_ENV_KEY,
-            GIGL_CUDA_DOCKER_URI_ENV_KEY,
-            GIGL_COMPONENT_ENV_KEY,
-        ):
-            self.assertNotIn(key, os.environ)
-
-    @patch("gigl.src.common.custom_launcher.subprocess.run")
-    def test_dispatch_preserves_inherited_env(self, mock_run: MagicMock) -> None:
         sentinel_key = "GIGL_TEST_PARENT_ENV_SENTINEL"
         sentinel_value = "preserved-value"
         try:
             os.environ[sentinel_key] = sentinel_value
+            snapshot = dict(os.environ)
             config = self._build_config(command="echo")
             launch_custom(
                 custom_launcher_config=config,
@@ -215,10 +198,24 @@ def test_dispatch_preserves_inherited_env(self, mock_run: MagicMock) -> None:
                 resource_config_uri=Uri("gs://bucket/resource.yaml"),
                 process_command="echo",
                 process_runtime_args={},
-                cpu_docker_uri=None,
-                cuda_docker_uri=None,
+                cpu_docker_uri="gcr.io/p/cpu:tag",
+                cuda_docker_uri="gcr.io/p/cuda:tag",
                 component=GiGLComponents.Trainer,
             )
+            # Parent os.environ is untouched; none of the GIGL_* keys
+            # leak into it.
+            self.assertEqual(dict(os.environ), snapshot)
+            for key in (
+                GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY,
+                GIGL_TASK_CONFIG_URI_ENV_KEY,
+                GIGL_RESOURCE_CONFIG_URI_ENV_KEY,
+                GIGL_PROCESS_COMMAND_ENV_KEY,
+                GIGL_CPU_DOCKER_URI_ENV_KEY,
+                GIGL_CUDA_DOCKER_URI_ENV_KEY,
+                GIGL_COMPONENT_ENV_KEY,
+            ):
+                self.assertNotIn(key, os.environ)
+            # Inherited parent env entries reach the subprocess env.
             env = mock_run.call_args.kwargs["env"]
             self.assertEqual(env.get(sentinel_key), sentinel_value)
         finally:

From 6591d5255bad3a815031c461a30c64317273b40e Mon Sep 17 00:00:00 2001
From: kmontemayor <kyle.e.montemayor@gmail.com>
Date: Tue, 19 May 2026 20:42:33 +0000
Subject: [PATCH 3/4] remove

---
 gigl/utils/dev/__init__.py         |   5 -
 gigl/utils/dev/submit_smoke_job.py | 258 -----------------------------
 gigl/utils/dev/tb_smoke_main.py    |  72 --------
 3 files changed, 335 deletions(-)
 delete mode 100644 gigl/utils/dev/__init__.py
 delete mode 100644 gigl/utils/dev/submit_smoke_job.py
 delete mode 100644 gigl/utils/dev/tb_smoke_main.py

diff --git a/gigl/utils/dev/__init__.py b/gigl/utils/dev/__init__.py
deleted file mode 100644
index 9c1bf25ab..000000000
--- a/gigl/utils/dev/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""Developer utilities (smoke entrypoints, ad-hoc test helpers).
-
-Modules under this package are intended for short, ad-hoc test jobs and
-developer iteration. They are NOT part of GiGL's stable public API.
-"""
diff --git a/gigl/utils/dev/submit_smoke_job.py b/gigl/utils/dev/submit_smoke_job.py
deleted file mode 100644
index e1eab73ef..000000000
--- a/gigl/utils/dev/submit_smoke_job.py
+++ /dev/null
@@ -1,258 +0,0 @@
-"""Submit a tiny Vertex AI CustomJob that exercises GiGL's TensorBoard wiring.
-
-Goal: <2 min from "I changed launcher / writer code" to "I see whether TB
-shows up." Bypasses ConfigPopulator and the full pipeline; uses the
-production launcher path (``launch_single_pool_job``) so the same submit
-logic runs as in real training.
-
-Required CLI flags:
-    --project              GCP project (e.g. ``external-snap-ci-github-gigl``).
-    --region               Vertex AI region (e.g. ``us-central1``).
-    --service-account      Service account email used by the CustomJob.
-    --staging-bucket       Regional GCS bucket Vertex stages artifacts under.
-    --tensorboard          Full TensorBoard resource name
-                           (``projects/.../locations/.../tensorboards/...``).
-    --experiment-name      Vertex AI ``TensorboardExperiment`` name. The
-                           tb_smoke_main entry point will pass this and the
-                           --tensorboard value to ``TensorBoardWriter.create``.
-    --container-uri        Container image to use. REQUIRED — must contain the
-                           branch under test.
-
-Optional:
-    --job-name             CustomJob display name. Defaults to a timestamped
-                           ``gigl-tb-smoke-...``.
-    --dry-run              Print the constructed submission parameters and
-                           exit without submitting.
-
-Verification:
-    After the CustomJob completes the script polls the TensorBoard API
-    surface and asserts the user-named ``TensorboardExperiment`` exists
-    with at least one ``TensorboardRun`` containing time series data.
-
-    The TB UI URL is printed for manual inspection.
-"""
-
-from __future__ import annotations
-
-import argparse
-import datetime
-import re
-import sys
-import time
-
-from google.cloud import aiplatform
-
-from gigl.common import Uri
-from gigl.common.logger import Logger
-from gigl.src.common.constants.components import GiGLComponents
-from gigl.src.common.types.pb_wrappers.gigl_resource_config import (
-    GiglResourceConfigWrapper,
-)
-from gigl.src.common.vertex_ai_launcher import launch_single_pool_job
-from snapchat.research.gbml import gigl_resource_config_pb2
-
-logger = Logger()
-
-_TENSORBOARD_RESOURCE_NAME_PATTERN = re.compile(
-    r"^projects/(?P<project>[^/]+)"
-    r"/locations/(?P<location>[^/]+)"
-    r"/tensorboards/(?P<tensorboard_id>[^/]+)$"
-)
-
-
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--project", required=True)
-    parser.add_argument("--region", required=True)
-    parser.add_argument("--service-account", required=True)
-    parser.add_argument(
-        "--staging-bucket",
-        required=True,
-        help="Regional GCS bucket (e.g. gs://gigl-cicd-temp).",
-    )
-    parser.add_argument(
-        "--tensorboard",
-        required=True,
-        help="Full TensorBoard resource name.",
-    )
-    parser.add_argument(
-        "--experiment-name",
-        required=True,
-        help=(
-            "TensorboardExperiment name. Passed to tb_smoke_main, which "
-            "creates the run under this experiment."
-        ),
-    )
-    parser.add_argument(
-        "--container-uri",
-        required=True,
-        help=(
-            "Container image with the branch code. Required; pointing at a "
-            "released image would test stale code."
-        ),
-    )
-    parser.add_argument("--job-name", default=None)
-    parser.add_argument("--dry-run", action="store_true")
-    return parser.parse_args()
-
-
-def _build_resource_config(
-    *,
-    project: str,
-    region: str,
-    service_account: str,
-    staging_bucket: str,
-) -> gigl_resource_config_pb2.GiglResourceConfig:
-    """Minimal GiglResourceConfig wired for a 1-replica CPU CustomJob."""
-    common = gigl_resource_config_pb2.SharedResourceConfig.CommonComputeConfig(
-        project=project,
-        region=region,
-        temp_regional_assets_bucket=staging_bucket,
-        temp_assets_bucket=staging_bucket,
-        perm_assets_bucket=staging_bucket,
-        temp_assets_bq_dataset_name="not_used_by_smoke",
-        embedding_bq_dataset_name="not_used_by_smoke",
-        gcp_service_account_email=service_account,
-        dataflow_runner="DataflowRunner",
-    )
-    shared = gigl_resource_config_pb2.SharedResourceConfig(
-        common_compute_config=common,
-        resource_labels={"cost_resource_group": "gigl_dev_smoke"},
-    )
-    trainer = gigl_resource_config_pb2.VertexAiResourceConfig(
-        # n1-standard-2 is rejected by Vertex AI; n1-standard-16 is the
-        # smallest spec we've confirmed accepted in dev.
-        machine_type="n1-standard-16",
-        gpu_type="ACCELERATOR_TYPE_UNSPECIFIED",
-        gpu_limit=0,
-        num_replicas=1,
-        timeout=600,
-    )
-    return gigl_resource_config_pb2.GiglResourceConfig(
-        shared_resource_config=shared,
-        trainer_resource_config=gigl_resource_config_pb2.TrainerResourceConfig(
-            vertex_ai_trainer_config=trainer,
-        ),
-    )
-
-
-def _verify_named_experiment(
-    *,
-    tensorboard_resource_name: str,
-    experiment_name: str,
-) -> None:
-    """Confirm the chief-rank writer ingested events into the named experiment."""
-    experiment_resource_name = (
-        f"{tensorboard_resource_name}/experiments/{experiment_name}"
-    )
-    runs = aiplatform.TensorboardRun.list(
-        tensorboard_experiment_name=experiment_resource_name,
-    )
-    if not runs:
-        raise RuntimeError(
-            f"Named TensorboardExperiment {experiment_resource_name} has no "
-            "TensorboardRuns; the writer did not ingest events."
-        )
-    for run in runs:
-        time_series = aiplatform.TensorboardTimeSeries.list(
-            tensorboard_run_name=run.resource_name,
-        )
-        if not time_series:
-            raise RuntimeError(
-                f"Run {run.resource_name} has no TensorboardTimeSeries; "
-                "events did not reach the API."
-            )
-    run_names = sorted(r.display_name for r in runs)
-    logger.info(
-        f"Named experiment OK: {len(runs)} run(s) under {experiment_resource_name}: "
-        f"{run_names}"
-    )
-
-
-def _print_tb_url(
-    *,
-    region: str,
-    project: str,
-    tensorboard_id: str,
-    experiment_name: str,
-) -> None:
-    base = f"https://{region}.tensorboard.googleusercontent.com/experiment"
-    qualifier = f"projects+{project}+locations+{region}+tensorboards+{tensorboard_id}"
-    named = f"{base}/{qualifier}+experiments+{experiment_name}"
-    logger.info(f"Named TB URL: {named}")
-
-
-def main() -> int:
-    args = _parse_args()
-
-    tb_match = _TENSORBOARD_RESOURCE_NAME_PATTERN.match(args.tensorboard)
-    if not tb_match:
-        logger.error(
-            f"--tensorboard must be projects/.../locations/.../tensorboards/...; "
-            f"got {args.tensorboard!r}."
-        )
-        return 2
-
-    timestamp = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
-    job_name = args.job_name or f"gigl-tb-smoke-{timestamp}"
-
-    resource_config = _build_resource_config(
-        project=args.project,
-        region=args.region,
-        service_account=args.service_account,
-        staging_bucket=args.staging_bucket,
-    )
-    resource_wrapper = GiglResourceConfigWrapper(resource_config=resource_config)
-
-    process_runtime_args = {
-        "tensorboard_resource_name": args.tensorboard,
-        "tensorboard_experiment_name": args.experiment_name,
-    }
-
-    if args.dry_run:
-        logger.info(
-            "Dry run — would submit a CustomJob with:\n"
-            f"  job_name              = {job_name}\n"
-            f"  container_uri         = {args.container_uri}\n"
-            f"  tensorboard_resource  = {args.tensorboard}\n"
-            f"  experiment_name       = {args.experiment_name!r}\n"
-            f"  process_runtime_args  = {process_runtime_args}\n"
-        )
-        return 0
-
-    aiplatform.init(project=args.project, location=args.region)
-    launch_single_pool_job(
-        vertex_ai_resource_config=resource_config.trainer_resource_config.vertex_ai_trainer_config,
-        job_name=job_name,
-        task_config_uri=Uri("gs://unused/by/smoke.yaml"),
-        resource_config_uri=Uri("gs://unused/by/smoke.yaml"),
-        process_command="python -m gigl.utils.dev.tb_smoke_main",
-        process_runtime_args=process_runtime_args,
-        resource_config_wrapper=resource_wrapper,
-        cpu_docker_uri=args.container_uri,
-        cuda_docker_uri=args.container_uri,
-        component=GiGLComponents.Trainer,
-        vertex_ai_region=args.region,
-    )
-    logger.info(f"Submitted CustomJob: {job_name}")
-
-    # CustomJob.submit blocks until completion inside launch_single_pool_job
-    # (see VertexAIService._submit_job: job.wait_for_completion). Give the
-    # backing TensorboardExperiment a short grace period for any final RPCs.
-    time.sleep(5)
-
-    _verify_named_experiment(
-        tensorboard_resource_name=args.tensorboard,
-        experiment_name=args.experiment_name,
-    )
-    _print_tb_url(
-        region=args.region,
-        project=args.project,
-        tensorboard_id=tb_match["tensorboard_id"],
-        experiment_name=args.experiment_name,
-    )
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/gigl/utils/dev/tb_smoke_main.py b/gigl/utils/dev/tb_smoke_main.py
deleted file mode 100644
index 2d4a35807..000000000
--- a/gigl/utils/dev/tb_smoke_main.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""Tiny smoke-test entrypoint that exercises GiGL's TensorBoard pipeline.
-
-Submitted as the container command by ``submit_smoke_job.py``. Constructs a
-``TensorBoardWriter`` with ``enabled=True`` (single-process smoke = always
-chief), writes a few scalar events, and exits.
-
-Configuration is plumbed via CLI flags injected by the launcher from the
-smoke script's ``process_runtime_args`` map. All three are required:
-
-    --job_name=<used as the TensorboardRun ID>
-    --tensorboard_resource_name=<full Vertex AI Tensorboard resource name>
-    --tensorboard_experiment_name=<TensorboardExperiment ID under that resource>
-
-This entrypoint deliberately mirrors the production trainer/inferencer call
-sites in ``examples/link_prediction/`` so the smoke test exercises the same
-``TensorBoardWriter.create()`` code path.
-"""
-
-from __future__ import annotations
-
-import argparse
-
-from gigl.common.logger import Logger
-from gigl.utils.tensorboard_writer import TensorBoardWriter
-
-logger = Logger()
-
-_NUM_STEPS = 3
-
-
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument(
-        "--job_name",
-        required=True,
-        help="Used as the TensorboardRun ID (must be unique per launch).",
-    )
-    parser.add_argument(
-        "--tensorboard_resource_name",
-        required=True,
-        help="Full Vertex AI Tensorboard resource name.",
-    )
-    parser.add_argument(
-        "--tensorboard_experiment_name",
-        required=True,
-        help="TensorboardExperiment ID under the resource above.",
-    )
-    # The launcher's _build_job_config always appends --task_config_uri,
-    # --resource_config_uri, and (on GPU) --use_cuda. The smoke entrypoint
-    # doesn't need them; use parse_known_args so they don't blow up argparse.
-    args, _unrecognized = parser.parse_known_args()
-    return args
-
-
-def main() -> None:
-    """Write a handful of scalar events and exit."""
-    args = _parse_args()
-    logger.info(f"Starting tb_smoke_main; job_name={args.job_name!r}")
-    with TensorBoardWriter.create(
-        resource_name=args.tensorboard_resource_name,
-        experiment_name=args.tensorboard_experiment_name,
-        experiment_run_name=args.job_name,
-        enabled=True,
-    ) as writer:
-        for step in range(_NUM_STEPS):
-            writer.log({"smoke/value": float(step)}, step=step)
-            logger.info(f"Wrote smoke/value={step} at step {step}")
-    logger.info("tb_smoke_main complete")
-
-
-if __name__ == "__main__":
-    main()

From ab83373cde16dd8b74f4d897ec0ff4a13c8b6d3d Mon Sep 17 00:00:00 2001
From: kmontemayor <kyle.e.montemayor@gmail.com>
Date: Tue, 19 May 2026 22:05:33 +0000
Subject: [PATCH 4/4] update

---
 gigl/env/constants.py                         |  1 -
 gigl/src/common/custom_launcher.py            | 10 ----------
 tests/unit/src/common/custom_launcher_test.py | 17 -----------------
 3 files changed, 28 deletions(-)

diff --git a/gigl/env/constants.py b/gigl/env/constants.py
index 21eb4c223..13686c64d 100644
--- a/gigl/env/constants.py
+++ b/gigl/env/constants.py
@@ -10,7 +10,6 @@
 GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY: Final[str] = "GIGL_APPLIED_TASK_IDENTIFIER"
 GIGL_TASK_CONFIG_URI_ENV_KEY: Final[str] = "GIGL_TASK_CONFIG_URI"
 GIGL_RESOURCE_CONFIG_URI_ENV_KEY: Final[str] = "GIGL_RESOURCE_CONFIG_URI"
-GIGL_PROCESS_COMMAND_ENV_KEY: Final[str] = "GIGL_PROCESS_COMMAND"
 GIGL_CPU_DOCKER_URI_ENV_KEY: Final[str] = "GIGL_CPU_DOCKER_URI"
 GIGL_CUDA_DOCKER_URI_ENV_KEY: Final[str] = "GIGL_CUDA_DOCKER_URI"
 GIGL_COMPONENT_ENV_KEY: Final[str] = "GIGL_COMPONENT"
diff --git a/gigl/src/common/custom_launcher.py b/gigl/src/common/custom_launcher.py
index 08770cd77..ebb79b4bd 100644
--- a/gigl/src/common/custom_launcher.py
+++ b/gigl/src/common/custom_launcher.py
@@ -24,7 +24,6 @@
 import os
 import shlex
 import subprocess
-from collections.abc import Mapping
 from typing import Optional
 
 from gigl.common import Uri
@@ -38,7 +37,6 @@
     GIGL_COMPONENT_ENV_KEY,
     GIGL_CPU_DOCKER_URI_ENV_KEY,
     GIGL_CUDA_DOCKER_URI_ENV_KEY,
-    GIGL_PROCESS_COMMAND_ENV_KEY,
     GIGL_RESOURCE_CONFIG_URI_ENV_KEY,
     GIGL_TASK_CONFIG_URI_ENV_KEY,
 )
@@ -57,8 +55,6 @@ def launch_custom(
     applied_task_identifier: str,
     task_config_uri: Uri,
     resource_config_uri: Uri,
-    process_command: str,
-    process_runtime_args: Mapping[str, str],
     cpu_docker_uri: Optional[str],
     cuda_docker_uri: Optional[str],
     component: GiGLComponents,
@@ -94,11 +90,6 @@ def launch_custom(
             ``GIGL_TASK_CONFIG_URI`` (stringified).
         resource_config_uri: Exported to the subprocess as
             ``GIGL_RESOURCE_CONFIG_URI`` (stringified).
-        process_command: Exported to the subprocess as
-            ``GIGL_PROCESS_COMMAND``.
-        process_runtime_args: Accepted for API symmetry with the
-            GLT-side Vertex AI launchers but not currently exported —
-            there is no clean single-env-var encoding for a dict.
         cpu_docker_uri: Exported as ``GIGL_CPU_DOCKER_URI``. Falls back
             to ``DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU`` when ``None``.
         cuda_docker_uri: Exported as ``GIGL_CUDA_DOCKER_URI``. Falls
@@ -126,7 +117,6 @@ def launch_custom(
     env[GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY] = applied_task_identifier
     env[GIGL_TASK_CONFIG_URI_ENV_KEY] = str(task_config_uri)
     env[GIGL_RESOURCE_CONFIG_URI_ENV_KEY] = str(resource_config_uri)
-    env[GIGL_PROCESS_COMMAND_ENV_KEY] = process_command
     env[GIGL_COMPONENT_ENV_KEY] = component.name
     env[GIGL_CPU_DOCKER_URI_ENV_KEY] = (
         cpu_docker_uri or DEFAULT_GIGL_RELEASE_SRC_IMAGE_CPU
diff --git a/tests/unit/src/common/custom_launcher_test.py b/tests/unit/src/common/custom_launcher_test.py
index 3d325cbb5..05aae14b7 100644
--- a/tests/unit/src/common/custom_launcher_test.py
+++ b/tests/unit/src/common/custom_launcher_test.py
@@ -15,7 +15,6 @@
     GIGL_COMPONENT_ENV_KEY,
     GIGL_CPU_DOCKER_URI_ENV_KEY,
     GIGL_CUDA_DOCKER_URI_ENV_KEY,
-    GIGL_PROCESS_COMMAND_ENV_KEY,
     GIGL_RESOURCE_CONFIG_URI_ENV_KEY,
     GIGL_TASK_CONFIG_URI_ENV_KEY,
 )
@@ -57,8 +56,6 @@ def test_dispatches_subprocess_with_literal_command_and_args(
             applied_task_identifier="job-42",
             task_config_uri=Uri("gs://bucket/task.yaml"),
             resource_config_uri=Uri("gs://bucket/resource.yaml"),
-            process_command="ignored",
-            process_runtime_args={"ignored": "v"},
             cpu_docker_uri="gcr.io/p/cpu:tag",
             cuda_docker_uri="gcr.io/p/cuda:tag",
             component=GiGLComponents.Trainer,
@@ -82,8 +79,6 @@ def test_empty_command_raises_value_error(self, mock_run: MagicMock) -> None:
                 applied_task_identifier="job",
                 task_config_uri=Uri("gs://bucket/task.yaml"),
                 resource_config_uri=Uri("gs://bucket/resource.yaml"),
-                process_command="",
-                process_runtime_args={},
                 cpu_docker_uri=None,
                 cuda_docker_uri=None,
                 component=GiGLComponents.Trainer,
@@ -99,8 +94,6 @@ def test_invalid_component_raises_value_error(self, mock_run: MagicMock) -> None
                 applied_task_identifier="job",
                 task_config_uri=Uri("gs://bucket/task.yaml"),
                 resource_config_uri=Uri("gs://bucket/resource.yaml"),
-                process_command="echo 'hello, world!",
-                process_runtime_args={},
                 cpu_docker_uri=None,
                 cuda_docker_uri=None,
                 component=GiGLComponents.DataPreprocessor,
@@ -115,8 +108,6 @@ def test_args_with_spaces_are_shell_quoted(self, mock_run: MagicMock) -> None:
             applied_task_identifier="job",
             task_config_uri=Uri("gs://bucket/task.yaml"),
             resource_config_uri=Uri("gs://bucket/resource.yaml"),
-            process_command="",
-            process_runtime_args={},
             cpu_docker_uri=None,
             cuda_docker_uri=None,
             component=GiGLComponents.Trainer,
@@ -135,8 +126,6 @@ def test_dispatch_sets_gigl_env_vars(self, mock_run: MagicMock) -> None:
             applied_task_identifier="job-42",
             task_config_uri=Uri("gs://bucket/task.yaml"),
             resource_config_uri=Uri("gs://bucket/resource.yaml"),
-            process_command="python -m my.cli",
-            process_runtime_args={},
             cpu_docker_uri="gcr.io/p/cpu:tag",
             cuda_docker_uri="gcr.io/p/cuda:tag",
             component=GiGLComponents.Trainer,
@@ -147,7 +136,6 @@ def test_dispatch_sets_gigl_env_vars(self, mock_run: MagicMock) -> None:
         self.assertEqual(
             env[GIGL_RESOURCE_CONFIG_URI_ENV_KEY], "gs://bucket/resource.yaml"
         )
-        self.assertEqual(env[GIGL_PROCESS_COMMAND_ENV_KEY], "python -m my.cli")
         self.assertEqual(env[GIGL_CPU_DOCKER_URI_ENV_KEY], "gcr.io/p/cpu:tag")
         self.assertEqual(env[GIGL_CUDA_DOCKER_URI_ENV_KEY], "gcr.io/p/cuda:tag")
         # component is exported via .name (the enum member identifier).
@@ -163,8 +151,6 @@ def test_dispatch_defaults_optional_uris_to_release_images(
             applied_task_identifier="job",
             task_config_uri=Uri("gs://bucket/task.yaml"),
             resource_config_uri=Uri("gs://bucket/resource.yaml"),
-            process_command="echo",
-            process_runtime_args={},
             cpu_docker_uri=None,
             cuda_docker_uri=None,
             component=GiGLComponents.Inferencer,
@@ -196,8 +182,6 @@ def test_dispatch_isolates_subprocess_env_from_parent(
                 applied_task_identifier="job",
                 task_config_uri=Uri("gs://bucket/task.yaml"),
                 resource_config_uri=Uri("gs://bucket/resource.yaml"),
-                process_command="echo",
-                process_runtime_args={},
                 cpu_docker_uri="gcr.io/p/cpu:tag",
                 cuda_docker_uri="gcr.io/p/cuda:tag",
                 component=GiGLComponents.Trainer,
@@ -209,7 +193,6 @@ def test_dispatch_isolates_subprocess_env_from_parent(
                 GIGL_APPLIED_TASK_IDENTIFIER_ENV_KEY,
                 GIGL_TASK_CONFIG_URI_ENV_KEY,
                 GIGL_RESOURCE_CONFIG_URI_ENV_KEY,
-                GIGL_PROCESS_COMMAND_ENV_KEY,
                 GIGL_CPU_DOCKER_URI_ENV_KEY,
                 GIGL_CUDA_DOCKER_URI_ENV_KEY,
                 GIGL_COMPONENT_ENV_KEY,