Snapchat · kmontemayor2-sc · May 6, 2026 · May 7, 2026 · May 8, 2026 · May 6, 2026
@@ -16,13 +16,15 @@
     assert_trained_model_exists,
 )
 from gigl.src.validation_check.libs.gbml_and_resource_config_compatibility_checks import (
+    check_custom_launcher_config_requires_glt_backend,
     check_inferencer_graph_store_compatibility,
     check_trainer_graph_store_compatibility,
 )
 from gigl.src.validation_check.libs.name_checks import (
     check_if_kfp_pipeline_job_name_valid,
 )
 from gigl.src.validation_check.libs.resource_config_checks import (
+    check_custom_launcher_config_shape,
     check_if_inferencer_resource_config_valid,
     check_if_preprocessor_resource_config_valid,
     check_if_shared_resource_config_valid,
@@ -202,25 +204,31 @@
     GiGLComponents.ConfigPopulator.value: [
         check_trainer_graph_store_compatibility,
         check_inferencer_graph_store_compatibility,
+        check_custom_launcher_config_requires_glt_backend,
     ],
     GiGLComponents.DataPreprocessor.value: [
         check_trainer_graph_store_compatibility,
         check_inferencer_graph_store_compatibility,
+        check_custom_launcher_config_requires_glt_backend,
     ],
     GiGLComponents.SubgraphSampler.value: [
         check_trainer_graph_store_compatibility,
         check_inferencer_graph_store_compatibility,
+        check_custom_launcher_config_requires_glt_backend,
     ],
     GiGLComponents.SplitGenerator.value: [
         check_trainer_graph_store_compatibility,
         check_inferencer_graph_store_compatibility,
+        check_custom_launcher_config_requires_glt_backend,
     ],
     GiGLComponents.Trainer.value: [
         check_trainer_graph_store_compatibility,
         check_inferencer_graph_store_compatibility,
+        check_custom_launcher_config_requires_glt_backend,
     ],
     GiGLComponents.Inferencer.value: [
         check_inferencer_graph_store_compatibility,
+        check_custom_launcher_config_requires_glt_backend,
     ],
     # PostProcessor doesn't need graph store compatibility checks
 }
@@ -347,6 +355,15 @@ def kfp_validation_checks(
         resource_config_wrapper=resource_config_wrapper,
     )
 
+    # Validate any populated CustomLauncherConfig has a non-empty command.
+    # Unconditional — the check is shape-only and does not call out to any
+    # external service.
+    for component in (GiGLComponents.Trainer, GiGLComponents.Inferencer):
+        check_custom_launcher_config_shape(
+            resource_config_pb=resource_config_pb,
+            component=component,
+        )
+
     # check if trained model file exist when skipping training
     if gbml_config_pb.shared_config.should_skip_training == True:
         assert_trained_model_exists(gbml_config_pb=gbml_config_pb)

@@ -135,3 +135,63 @@ def check_inferencer_graph_store_compatibility(
         raise AssertionError(
             f"If one of GbmlConfig.inferencer_config.graph_store_storage_config or GiglResourceConfig.inferencer_resource_config is set, the other must also be set. GbmlConfig.inferencer_config.graph_store_storage_config is set: {gbml_has_graph_store}, GiglResourceConfig.inferencer_resource_config is set: {resource_has_graph_store}."
         )
+
+
+def check_custom_launcher_config_requires_glt_backend(
+    gbml_config_pb_wrapper: GbmlConfigPbWrapper,
+    resource_config_wrapper: GiglResourceConfigWrapper,
+) -> None:
+    """Enforce that ``CustomLauncherConfig`` is only used with the GLT (v2) backend.
+
+    The v1 trainer/inferencer dispatchers never consult the
+    ``custom_trainer_config`` / ``custom_inferencer_config`` oneof, so pairing
+    a ``CustomLauncherConfig`` with a task config that has
+    ``should_use_glt_backend=False`` would silently fall through the v1 path
+    and fail at runtime. Catch it up-front here so the failure is loud and
+    actionable at validation time.
+
+    Note on naming: the wrapper exposes ``should_use_glt_backend`` (bool) but
+    the raw YAML key users set is ``feature_flags.should_run_glt_backend``.
+    The wrapper translates one into the other; this check always reads the
+    wrapper property and never the raw map.
+
+    Args:
+        gbml_config_pb_wrapper: The GbmlConfig wrapper (template config).
+        resource_config_wrapper: The GiglResourceConfig wrapper (resource config).
+
+    Raises:
+        ValueError: If either the trainer or inferencer resource config is a
+            ``CustomLauncherConfig`` and ``should_use_glt_backend`` is False.
+    """
+    logger.info(
+        "Config validation check: CustomLauncherConfig requires GLT (v2) backend."
+    )
+    trainer_is_custom = isinstance(
+        resource_config_wrapper.trainer_config,
+        gigl_resource_config_pb2.CustomLauncherConfig,
+    )
+    inferencer_is_custom = isinstance(
+        resource_config_wrapper.inferencer_config,
+        gigl_resource_config_pb2.CustomLauncherConfig,
+    )
+    if not (trainer_is_custom or inferencer_is_custom):
+        return
+
+    if not gbml_config_pb_wrapper.should_use_glt_backend:
+        offending: list[str] = []
+        if trainer_is_custom:
+            offending.append("trainer_resource_config.custom_trainer_config")
+        if inferencer_is_custom:
+            offending.append("inferencer_resource_config.custom_inferencer_config")
+        raise ValueError(
+            "CustomLauncherConfig is only wired into the GLT (v2) dispatchers "
+            "(glt_trainer.py / glt_inferencer.py); the v1 trainer/inferencer "
+            "never consult the custom oneof and would fall through to an "
+            "'Unsupported resource config' error at runtime. The following "
+            f"custom resource configs were set: {offending}, but the task "
+            "config has should_use_glt_backend=False (raw YAML key: "
+            "feature_flags.should_run_glt_backend). Either set "
+            "feature_flags.should_run_glt_backend='True' in the task config, "
+            "or replace the CustomLauncherConfig with a built-in resource "
+            "config."
+        )
@@ -3,6 +3,7 @@
 from google.cloud.aiplatform_v1.types.accelerator_type import AcceleratorType
 
 from gigl.common.logger import Logger
+from gigl.src.common.constants.components import GiGLComponents
 from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper
 from gigl.src.common.types.pb_wrappers.gigl_resource_config import (
     GiglResourceConfigWrapper,
@@ -312,6 +313,57 @@ def _validate_machine_config(
         )
 
 
+def check_custom_launcher_config_shape(
+    resource_config_pb: gigl_resource_config_pb2.GiglResourceConfig,
+    component: GiGLComponents,
+) -> None:
+    """Assert the trainer / inferencer's ``CustomLauncherConfig`` is well-shaped.
+
+    Resolves the component's resource config through the wrapper; if it is not
+    a ``CustomLauncherConfig`` this helper is a no-op. Otherwise it asserts
+    ``command`` is non-empty so the launcher's runtime guard does not fail
+    on a typo in the YAML.
+
+    Args:
+        resource_config_pb: The resource config to inspect. The trainer or
+            inferencer oneof (depending on ``component``) is pulled out of the
+            wrapper.
+        component: Which GiGL component to check. Must be Trainer or
+            Inferencer; other components never carry a ``CustomLauncherConfig``.
+
+    Raises:
+        ValueError: If ``component`` is not Trainer or Inferencer, or if a
+            populated ``CustomLauncherConfig`` has an empty ``command``.
+    """
+    if component not in {GiGLComponents.Trainer, GiGLComponents.Inferencer}:
+        raise ValueError(
+            f"check_custom_launcher_config_shape only supports "
+            f"Trainer and Inferencer components; got {component}."
+        )
+
+    wrapper = GiglResourceConfigWrapper(resource_config=resource_config_pb)
+    component_config: Union[
+        gigl_resource_config_pb2.LocalResourceConfig,
+        gigl_resource_config_pb2.VertexAiResourceConfig,
+        gigl_resource_config_pb2.KFPResourceConfig,
+        gigl_resource_config_pb2.VertexAiGraphStoreConfig,
+        gigl_resource_config_pb2.DataflowResourceConfig,
+        gigl_resource_config_pb2.CustomLauncherConfig,
+    ]
+    if component == GiGLComponents.Trainer:
+        component_config = wrapper.trainer_config
+    else:
+        component_config = wrapper.inferencer_config
+
+    if not isinstance(component_config, gigl_resource_config_pb2.CustomLauncherConfig):
+        return
+
+    if not component_config.command:
+        raise ValueError(
+            f"CustomLauncherConfig.command must be set for {component.value}."
+        )
+
+
 def check_if_trainer_graph_store_storage_command_valid(
     gbml_config_pb_wrapper: GbmlConfigPbWrapper,
 ) -> None:

@@ -352,5 +352,61 @@ def test_resource_config_validation_failure_with_mock_configs(
             )
 
 
+class TestCustomLauncherConfigInValidationChain(TestCase):
+    """Confirm the new CustomLauncherConfig gates are wired into the chain.
+
+    The shape check (``check_custom_launcher_config_shape``) runs
+    unconditionally inside ``kfp_validation_checks``. A resource config
+    with a ``CustomLauncherConfig`` trainer that has an empty ``command``
+    must surface a ``ValueError`` from the chain, not a launcher-side
+    runtime failure.
+    """
+
+    def setUp(self):
+        gigl.env.pipelines_config._resource_config = None
+        self._temp_dir = tempfile.mkdtemp()
+        self._proto_utils = ProtoUtils()
+
+    def tearDown(self):
+        shutil.rmtree(self._temp_dir, ignore_errors=True)
+        gigl.env.pipelines_config._resource_config = None
+
+    def _write_proto_to_file(
+        self, proto: google.protobuf.message.Message, filename: str
+    ) -> Uri:
+        filepath = os.path.join(self._temp_dir, filename)
+        uri = UriFactory.create_uri(filepath)
+        self._proto_utils.write_proto_to_yaml(proto, uri)
+        return uri
+
+    def test_empty_custom_trainer_command_raises_via_chain(self) -> None:
+        # Build a live-SGS task config (so the chain runs through
+        # CustomLauncherConfig-aware paths).
+        task_config_uri = self._write_proto_to_file(
+            _create_valid_live_subgraph_sampling_task_config(),
+            "live_task_config.yaml",
+        )
+
+        # Resource config with a CustomLauncherConfig trainer whose
+        # command is empty — the new shape check must catch it.
+        resource_config = _create_valid_live_subgraph_sampling_resource_config()
+        resource_config.trainer_resource_config.ClearField("trainer_config")
+        resource_config.trainer_resource_config.custom_trainer_config.command = ""
+        resource_config_uri = self._write_proto_to_file(
+            resource_config, "live_custom_empty_resource_config.yaml"
+        )
+
+        with self.assertRaises(ValueError) as ctx:
+            kfp_validation_checks(
+                job_name="custom_launcher_config_chain_test",
+                task_config_uri=task_config_uri,
+                start_at="config_populator",
+                resource_config_uri=resource_config_uri,
+            )
+        # Error message must come from check_custom_launcher_config_shape,
+        # not from a generic shape check elsewhere.
+        self.assertIn("CustomLauncherConfig.command", str(ctx.exception))
+
+
 if __name__ == "__main__":
     absltest.main()
@@ -5,12 +5,18 @@
     GiglResourceConfigWrapper,
 )
 from gigl.src.validation_check.libs.gbml_and_resource_config_compatibility_checks import (
+    check_custom_launcher_config_requires_glt_backend,
     check_inferencer_graph_store_compatibility,
     check_trainer_graph_store_compatibility,
 )
 from snapchat.research.gbml import gbml_config_pb2, gigl_resource_config_pb2
 from tests.test_assets.test_case import TestCase
 
+# Placeholder shell snippet used by CustomLauncherConfig fixtures in this
+# module — these tests only exercise type-of-config dispatch, not actual
+# subprocess execution.
+_FAKE_COMMAND = "echo fake"
+
 # Helper functions for creating VertexAiGraphStoreConfig
 
 
@@ -214,5 +220,107 @@ def test_resource_has_inferencer_graph_store_template_does_not(self):
             )
 
 
+# Helper functions for custom + glt-backend compatibility tests
+
+
+def _create_gbml_config_with_glt_flag(value: str) -> GbmlConfigPbWrapper:
+    """Create a GbmlConfig whose feature_flags.should_run_glt_backend is set.
+
+    Note the raw YAML key is ``should_run_glt_backend`` (not
+    ``should_use_glt_backend``). The wrapper's ``should_use_glt_backend``
+    property reads this key from the ``feature_flags`` map and converts it to
+    a bool.
+    """
+    gbml_config = gbml_config_pb2.GbmlConfig()
+    gbml_config.feature_flags["should_run_glt_backend"] = value
+    return GbmlConfigPbWrapper(gbml_config_pb=gbml_config)
+
+
+def _create_resource_config_with_custom_trainer() -> GiglResourceConfigWrapper:
+    """Create a GiglResourceConfig whose trainer is a CustomLauncherConfig."""
+    config = gigl_resource_config_pb2.GiglResourceConfig()
+    _create_shared_resource_config(config)
+    config.trainer_resource_config.custom_trainer_config.command = _FAKE_COMMAND
+    # Inferencer uses a built-in config so only the trainer path is custom.
+    config.inferencer_resource_config.vertex_ai_inferencer_config.CopyFrom(
+        _create_vertex_ai_resource_config()
+    )
+    return GiglResourceConfigWrapper(resource_config=config)
+
+
+def _create_resource_config_with_custom_inferencer() -> GiglResourceConfigWrapper:
+    """Create a GiglResourceConfig whose inferencer is a CustomLauncherConfig."""
+    config = gigl_resource_config_pb2.GiglResourceConfig()
+    _create_shared_resource_config(config)
+    config.trainer_resource_config.vertex_ai_trainer_config.CopyFrom(
+        _create_vertex_ai_resource_config()
+    )
+    config.inferencer_resource_config.custom_inferencer_config.command = _FAKE_COMMAND
+    return GiglResourceConfigWrapper(resource_config=config)
+
+
+class TestCustomLauncherConfigRequiresGltBackend(TestCase):
+    """Test suite for the CustomLauncherConfig + GLT-backend compatibility guard.
+
+    Because v1 trainer/inferencer dispatchers don't consult the custom oneof,
+    pairing a ``CustomLauncherConfig`` with
+    ``feature_flags.should_run_glt_backend = "False"`` must be caught at
+    validation time rather than at runtime.
+    """
+
+    def test_custom_trainer_without_glt_raises(self):
+        """CustomLauncherConfig trainer + glt=False raises a clear ValueError."""
+        gbml_config = _create_gbml_config_with_glt_flag("False")
+        resource_config = _create_resource_config_with_custom_trainer()
+        with self.assertRaises(ValueError) as ctx:
+            check_custom_launcher_config_requires_glt_backend(
+                gbml_config_pb_wrapper=gbml_config,
+                resource_config_wrapper=resource_config,
+            )
+        self.assertIn("should_run_glt_backend", str(ctx.exception))
+        self.assertIn("custom_trainer_config", str(ctx.exception))
+
+    def test_custom_inferencer_without_glt_raises(self):
+        """CustomLauncherConfig inferencer + glt=False raises a clear ValueError."""
+        gbml_config = _create_gbml_config_with_glt_flag("False")
+        resource_config = _create_resource_config_with_custom_inferencer()
+        with self.assertRaises(ValueError) as ctx:
+            check_custom_launcher_config_requires_glt_backend(
+                gbml_config_pb_wrapper=gbml_config,
+                resource_config_wrapper=resource_config,
+            )
+        self.assertIn("custom_inferencer_config", str(ctx.exception))
+
+    def test_custom_trainer_with_glt_passes(self):
+        """CustomLauncherConfig trainer + glt=True passes validation."""
+        gbml_config = _create_gbml_config_with_glt_flag("True")
+        resource_config = _create_resource_config_with_custom_trainer()
+        # Should not raise any exception
+        check_custom_launcher_config_requires_glt_backend(
+            gbml_config_pb_wrapper=gbml_config,
+            resource_config_wrapper=resource_config,
+        )
+
+    def test_custom_inferencer_with_glt_passes(self):
+        """CustomLauncherConfig inferencer + glt=True passes validation."""
+        gbml_config = _create_gbml_config_with_glt_flag("True")
+        resource_config = _create_resource_config_with_custom_inferencer()
+        # Should not raise any exception
+        check_custom_launcher_config_requires_glt_backend(
+            gbml_config_pb_wrapper=gbml_config,
+            resource_config_wrapper=resource_config,
+        )
+
+    def test_no_custom_config_passes_without_glt(self):
+        """No CustomLauncherConfig at all passes regardless of glt flag."""
+        gbml_config = _create_gbml_config_with_glt_flag("False")
+        resource_config = _create_resource_config_without_graph_stores()
+        # Should not raise any exception: no custom oneof means nothing to enforce.
+        check_custom_launcher_config_requires_glt_backend(
+            gbml_config_pb_wrapper=gbml_config,
+            resource_config_wrapper=resource_config,
+        )
+
+
 if __name__ == "__main__":
     absltest.main()