From 2c5dcc4f22ccb2852dde342ac9b3bd068115c51c Mon Sep 17 00:00:00 2001 From: Olatunji Ruwase Date: Mon, 15 Jun 2026 15:50:49 +0000 Subject: [PATCH 1/2] Default gradient_clipping to 1.0 Change GRADIENT_CLIPPING_DEFAULT from 0. (disabled) to 1.0 so configs that omit the key clip at 1.0 by default, matching common RL/LLM training and the FSDP2 reference. Explicit "gradient_clipping": 0.0 still disables clipping. Signed-off-by: Olatunji Ruwase Co-authored-by: Cursor --- deepspeed/runtime/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/runtime/constants.py b/deepspeed/runtime/constants.py index 9e73bad73376..d9f3ba1639e4 100755 --- a/deepspeed/runtime/constants.py +++ b/deepspeed/runtime/constants.py @@ -251,7 +251,7 @@ "gradient_clipping": 1.0 ''' GRADIENT_CLIPPING = 'gradient_clipping' -GRADIENT_CLIPPING_DEFAULT = 0. +GRADIENT_CLIPPING_DEFAULT = 1.0 ######################################### # Capture graph for short kernels sequences From 535a296266ce158a13d3eb72fd84a82bf626569d Mon Sep 17 00:00:00 2001 From: Olatunji Ruwase Date: Tue, 23 Jun 2026 12:14:45 +0000 Subject: [PATCH 2/2] Fix docs --- deepspeed/runtime/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepspeed/runtime/constants.py b/deepspeed/runtime/constants.py index d9f3ba1639e4..d7b4858893c7 100755 --- a/deepspeed/runtime/constants.py +++ b/deepspeed/runtime/constants.py @@ -244,8 +244,8 @@ ######################################### # Gradient clipping ######################################### -# Gradient clipping. By default, this feature is not enabled. -# Users can configure in ds_config.json as below example: +# Gradient clipping. By default, this feature is enabled with a value of 1.0. +# Users can configure in ds_config.json as below example (set to 0.0 to disable): GRADIENT_CLIPPING_FORMAT = ''' Gradient clipping should be enabled as: "gradient_clipping": 1.0