diff --git a/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py b/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py index 4c457fd99..0219495ec 100644 --- a/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py +++ b/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py @@ -65,7 +65,7 @@ def _context_attention_kernel( out=None, ): if self.network_config_["layer_types"][self.layer_num_] == "sliding_attention": - window_size = (self.sliding_window - 1, self.sliding_window - 1) + window_size = (self.sliding_window - 1, 0) use_sliding_window = True else: window_size = (-1, -1)