diff --git a/graph_net/sample_pass/fusible_subgraph_ranges_generator.py b/graph_net/sample_pass/fusible_subgraph_ranges_generator.py
index 676f6a2bc..ad3fe8c78 100644
--- a/graph_net/sample_pass/fusible_subgraph_ranges_generator.py
+++ b/graph_net/sample_pass/fusible_subgraph_ranges_generator.py
@@ -189,11 +189,12 @@ def valid_fused_ops(num_ops_list: list[int]):
             if is_a_range(num_ops_list)
             if valid_fused_ops(num_ops_list)
         ]
+        offset = self.start_offset_in_original_graph
         fusible_subgraph_ranges = [
             (start, end)
             for num_ops_list in num_ops_lists
-            for start in [num_ops_list[0] - 1]
-            for end in [num_ops_list[-1]]
+            for start in [num_ops_list[0] - 1 + offset]
+            for end in [num_ops_list[-1] + offset]
         ]
 
         # sorted by `start`
diff --git a/graph_net/sample_pass/group_fusible_subgraph_ranges.py b/graph_net/sample_pass/group_fusible_subgraph_ranges.py
new file mode 100644
index 000000000..42222ebc5
--- /dev/null
+++ b/graph_net/sample_pass/group_fusible_subgraph_ranges.py
@@ -0,0 +1,119 @@
+from graph_net.sample_pass.sample_pass import SamplePass
+from pathlib import Path
+import json
+
+
+class GroupFusibleSubgraphRanges(SamplePass):
+    def __init__(self, config=None):
+        super().__init__(config)
+        self.original_graph_rel_model_path2ranges: dict[str, list[(int, int)]] = {}
+        self.original_graph_rel_model_path2subgraph_rel_model_paths: dict[
+            str, list[str]
+        ] = {}
+
+    def declare_config(
+        self,
+        subgraph_model_path_prefix: str,
+        output_dir: str,
+        input_json_file_name: str = "fusible_subgraph_ranges.json",
+        output_json_file_name: str = "grouped_fusible_subgraph_ranges.json",
+        output_json_key: str = "subgraph_ranges",
+        output_json_subgraph_rel_model_path_key: str = "fusible_subgraph_relative_model_paths",
+    ):
+        pass
+
+    def __call__(self, subgraph_rel_model_path: str):
+        model_path = (
+            Path(self.config["subgraph_model_path_prefix"])
+            / subgraph_rel_model_path
+            / self.config["input_json_file_name"]
+        )
+        subgraph_sources = json.load(open(model_path))
+        subgraph_ranges = subgraph_sources.get(self.config["output_json_key"], [])
+        original_graph_rel_model_path = self._extract_original_model_path(
+            subgraph_rel_model_path
+        )
+        self._collect_original_graph_rel_model_path2ranges(
+            original_graph_rel_model_path, subgraph_ranges
+        )
+        self._collect_original_graph_rel_model_path2subgraph_rel_model_path(
+            original_graph_rel_model_path,
+            [subgraph_rel_model_path] * len(subgraph_ranges),
+        )
+
+    def _extract_original_model_path(self, rel_model_path: str) -> str:
+        path_parts = Path(rel_model_path).parts
+        if "_decomposed" in path_parts:
+            decomposed_idx = path_parts.index("_decomposed")
+            return str(Path(*path_parts[:decomposed_idx]))
+        return rel_model_path
+
+    def _collect_original_graph_rel_model_path2subgraph_rel_model_path(
+        self,
+        original_graph_rel_model_path: str,
+        subgraph_rel_model_paths: list[str],
+    ):
+        old = self.original_graph_rel_model_path2subgraph_rel_model_paths.get(
+            original_graph_rel_model_path, []
+        )
+        self.original_graph_rel_model_path2subgraph_rel_model_paths[
+            original_graph_rel_model_path
+        ] = [
+            *old,
+            *subgraph_rel_model_paths,
+        ]
+
+    def _collect_original_graph_rel_model_path2ranges(
+        self, original_graph_rel_model_path, subgraph_ranges
+    ):
+        old_ranges = self.original_graph_rel_model_path2ranges.get(
+            original_graph_rel_model_path, []
+        )
+        self.original_graph_rel_model_path2ranges[original_graph_rel_model_path] = [
+            *old_ranges,
+            *subgraph_ranges,
+        ]
+
+    def END(self, rel_model_paths: list[str]):
+        for (
+            original_graph_rel_model_path,
+            subgraph_ranges,
+        ) in self.original_graph_rel_model_path2ranges.items():
+            subgraph_rel_model_paths = (
+                self.original_graph_rel_model_path2subgraph_rel_model_paths[
+                    original_graph_rel_model_path
+                ]
+            )
+            self._save_json(
+                original_graph_rel_model_path, subgraph_ranges, subgraph_rel_model_paths
+            )
+
+    def _save_json(
+        self, original_graph_rel_model_path, subgraph_ranges, subgraph_rel_model_paths
+    ):
+        model_dir = Path(self.config["output_dir"]) / original_graph_rel_model_path
+        model_dir.mkdir(parents=True, exist_ok=True)
+
+        # Sort ranges by start index, and sort paths accordingly
+        sorted_data = sorted(
+            zip(subgraph_ranges, subgraph_rel_model_paths), key=lambda x: x[0][0]
+        )
+        sorted_ranges, sorted_paths = zip(*sorted_data) if sorted_data else ([], [])
+
+        ranges_json = self._get_ranges_json(list(sorted_ranges))
+        paths_json = self._get_paths_json(list(sorted_paths))
+        json_obj = {**ranges_json, **paths_json}
+        json_str = json.dumps(json_obj, indent=4)
+        (model_dir / self.config["output_json_file_name"]).write_text(json_str)
+
+    def _get_paths_json(self, subgraph_rel_model_paths: list[str]):
+        json_obj = {
+            self.config[
+                "output_json_subgraph_rel_model_path_key"
+            ]: subgraph_rel_model_paths
+        }
+        return json_obj
+
+    def _get_ranges_json(self, subgraph_ranges: list[(int, int)]):
+        json_obj = {self.config["output_json_key"]: subgraph_ranges}
+        return json_obj
diff --git a/graph_net/test/group_fusible_subgraph_ranges_test.sh b/graph_net/test/group_fusible_subgraph_ranges_test.sh
new file mode 100644
index 000000000..a8b2fa6ea
--- /dev/null
+++ b/graph_net/test/group_fusible_subgraph_ranges_test.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(os.path.dirname(graph_net.__file__)))")
+
+python3 -m graph_net.apply_sample_pass \
+    --model-path-list "$GRAPH_NET_ROOT/graph_net/test/workspace_group_fusible_subgraph_ranges/sample_list.txt" \
+    --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/sample_pass/group_fusible_subgraph_ranges.py" \
+    --sample-pass-class-name "GroupFusibleSubgraphRanges" \
+    --sample-pass-config $(base64 -w 0 <<EOF
+{
+    "subgraph_model_path_prefix": "$GRAPH_NET_ROOT/graph_net/test/workspace_group_fusible_subgraph_ranges",
+    "output_dir": "/tmp/workspace_group_fusible_subgraph_ranges",
+    "input_json_file_name": "fusible_subgraph_ranges.json",
+    "output_json_file_name": "grouped_fusible_subgraph_ranges.json",
+    "output_json_key": "subgraph_ranges"
+}
+EOF
+)
\ No newline at end of file
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/sample_list.txt b/graph_net/test/workspace_group_fusible_subgraph_ranges/sample_list.txt
new file mode 100644
index 000000000..41df1caf3
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/sample_list.txt
@@ -0,0 +1,10 @@
+samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start167_end183_2
+samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start230_end288_4
+samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start186_end207_3
+samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start72_end130_1
+samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start5_end63_0
+samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start643_end700_11
+samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start126_end183_2
+samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start297_end354_5
+samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start65_end122_1
+samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start4_end61_0
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start126_end183_2/fusible_subgraph_ranges.json b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start126_end183_2/fusible_subgraph_ranges.json
new file mode 100644
index 000000000..2985af0b9
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start126_end183_2/fusible_subgraph_ranges.json
@@ -0,0 +1,40 @@
+{
+    "subgraph_ranges": [
+        [
+            126,
+            129
+        ],
+        [
+            130,
+            132
+        ],
+        [
+            133,
+            138
+        ],
+        [
+            145,
+            148
+        ],
+        [
+            149,
+            151
+        ],
+        [
+            152,
+            157
+        ],
+        [
+            164,
+            167
+        ],
+        [
+            168,
+            170
+        ],
+        [
+            171,
+            176
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start297_end354_5/fusible_subgraph_ranges.json b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start297_end354_5/fusible_subgraph_ranges.json
new file mode 100644
index 000000000..b5494ba48
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start297_end354_5/fusible_subgraph_ranges.json
@@ -0,0 +1,40 @@
+{
+    "subgraph_ranges": [
+        [
+            297,
+            300
+        ],
+        [
+            301,
+            303
+        ],
+        [
+            304,
+            309
+        ],
+        [
+            316,
+            319
+        ],
+        [
+            320,
+            322
+        ],
+        [
+            323,
+            328
+        ],
+        [
+            335,
+            338
+        ],
+        [
+            339,
+            341
+        ],
+        [
+            342,
+            347
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start4_end61_0/fusible_subgraph_ranges.json b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start4_end61_0/fusible_subgraph_ranges.json
new file mode 100644
index 000000000..1a869890f
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start4_end61_0/fusible_subgraph_ranges.json
@@ -0,0 +1,40 @@
+{
+    "subgraph_ranges": [
+        [
+            4,
+            7
+        ],
+        [
+            8,
+            10
+        ],
+        [
+            11,
+            16
+        ],
+        [
+            23,
+            26
+        ],
+        [
+            27,
+            29
+        ],
+        [
+            30,
+            35
+        ],
+        [
+            42,
+            45
+        ],
+        [
+            46,
+            48
+        ],
+        [
+            49,
+            54
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start643_end700_11/fusible_subgraph_ranges.json b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start643_end700_11/fusible_subgraph_ranges.json
new file mode 100644
index 000000000..ba2e7bbd8
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start643_end700_11/fusible_subgraph_ranges.json
@@ -0,0 +1,64 @@
+{
+    "subgraph_ranges": [
+        [
+            643,
+            646
+        ],
+        [
+            647,
+            649
+        ],
+        [
+            650,
+            655
+        ],
+        [
+            655,
+            657
+        ],
+        [
+            658,
+            661
+        ],
+        [
+            662,
+            665
+        ],
+        [
+            666,
+            668
+        ],
+        [
+            669,
+            674
+        ],
+        [
+            674,
+            676
+        ],
+        [
+            677,
+            680
+        ],
+        [
+            681,
+            684
+        ],
+        [
+            685,
+            687
+        ],
+        [
+            688,
+            693
+        ],
+        [
+            693,
+            695
+        ],
+        [
+            696,
+            699
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start65_end122_1/fusible_subgraph_ranges.json b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start65_end122_1/fusible_subgraph_ranges.json
new file mode 100644
index 000000000..8094bb491
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/timm/convnextv2_base.fcmae_ft_in1k/_decomposed/convnextv2_base.fcmae_ft_in1k_start65_end122_1/fusible_subgraph_ranges.json
@@ -0,0 +1,40 @@
+{
+    "subgraph_ranges": [
+        [
+            65,
+            68
+        ],
+        [
+            69,
+            71
+        ],
+        [
+            72,
+            77
+        ],
+        [
+            84,
+            87
+        ],
+        [
+            88,
+            90
+        ],
+        [
+            91,
+            96
+        ],
+        [
+            103,
+            106
+        ],
+        [
+            107,
+            109
+        ],
+        [
+            110,
+            115
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start167_end183_2/fusible_subgraph_ranges.json b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start167_end183_2/fusible_subgraph_ranges.json
new file mode 100644
index 000000000..4d39f6101
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start167_end183_2/fusible_subgraph_ranges.json
@@ -0,0 +1,20 @@
+{
+    "subgraph_ranges": [
+        [
+            167,
+            170
+        ],
+        [
+            171,
+            173
+        ],
+        [
+            174,
+            177
+        ],
+        [
+            179,
+            182
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start186_end207_3/fusible_subgraph_ranges.json b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start186_end207_3/fusible_subgraph_ranges.json
new file mode 100644
index 000000000..7b8d6b881
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start186_end207_3/fusible_subgraph_ranges.json
@@ -0,0 +1,24 @@
+{
+    "subgraph_ranges": [
+        [
+            186,
+            198
+        ],
+        [
+            198,
+            200
+        ],
+        [
+            200,
+            202
+        ],
+        [
+            202,
+            204
+        ],
+        [
+            204,
+            207
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start230_end288_4/fusible_subgraph_ranges.json b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start230_end288_4/fusible_subgraph_ranges.json
new file mode 100644
index 000000000..2e8798739
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start230_end288_4/fusible_subgraph_ranges.json
@@ -0,0 +1,52 @@
+{
+    "subgraph_ranges": [
+        [
+            231,
+            237
+        ],
+        [
+            237,
+            240
+        ],
+        [
+            240,
+            243
+        ],
+        [
+            243,
+            246
+        ],
+        [
+            250,
+            253
+        ],
+        [
+            255,
+            258
+        ],
+        [
+            268,
+            270
+        ],
+        [
+            270,
+            272
+        ],
+        [
+            272,
+            277
+        ],
+        [
+            277,
+            282
+        ],
+        [
+            284,
+            286
+        ],
+        [
+            286,
+            288
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start5_end63_0/fusible_subgraph_ranges.json b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start5_end63_0/fusible_subgraph_ranges.json
new file mode 100644
index 000000000..63f5e4a99
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start5_end63_0/fusible_subgraph_ranges.json
@@ -0,0 +1,52 @@
+{
+    "subgraph_ranges": [
+        [
+            6,
+            12
+        ],
+        [
+            12,
+            15
+        ],
+        [
+            15,
+            18
+        ],
+        [
+            18,
+            21
+        ],
+        [
+            25,
+            28
+        ],
+        [
+            30,
+            33
+        ],
+        [
+            43,
+            45
+        ],
+        [
+            45,
+            47
+        ],
+        [
+            47,
+            52
+        ],
+        [
+            52,
+            57
+        ],
+        [
+            59,
+            61
+        ],
+        [
+            61,
+            63
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start72_end130_1/fusible_subgraph_ranges.json b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start72_end130_1/fusible_subgraph_ranges.json
new file mode 100644
index 000000000..bf9d663d9
--- /dev/null
+++ b/graph_net/test/workspace_group_fusible_subgraph_ranges/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification/_decomposed/hf-tiny-model-private_tiny-random-Swinv2ForImageClassification_start72_end130_1/fusible_subgraph_ranges.json
@@ -0,0 +1,52 @@
+{
+    "subgraph_ranges": [
+        [
+            73,
+            79
+        ],
+        [
+            79,
+            82
+        ],
+        [
+            82,
+            85
+        ],
+        [
+            85,
+            88
+        ],
+        [
+            92,
+            95
+        ],
+        [
+            97,
+            100
+        ],
+        [
+            110,
+            112
+        ],
+        [
+            112,
+            114
+        ],
+        [
+            114,
+            119
+        ],
+        [
+            119,
+            124
+        ],
+        [
+            126,
+            128
+        ],
+        [
+            128,
+            130
+        ]
+    ]
+}
\ No newline at end of file
diff --git a/graph_net/tools/generate_subgraph_dataset.sh b/graph_net/tools/generate_subgraph_dataset.sh
index a56ecaaf6..3670c304b 100755
--- a/graph_net/tools/generate_subgraph_dataset.sh
+++ b/graph_net/tools/generate_subgraph_dataset.sh
@@ -13,13 +13,15 @@ GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(
 RESUME="true"
 
 DECOMPOSE_WORKSPACE=/tmp/subgraph_dataset_workspace
+DEVICE_REWRITED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/device_rewrited
 OP_NAMES_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/sample_op_names
+SPLIT_POINTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/split_points
 RANGE_DECOMPOSE_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/range_decompose
 GRAPH_VAR_RENAME_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/graph_var_renamed
 DEDUPLICATED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/deduplicated
-DEVICE_REWRITED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/device_rewrited
 CUMSUM_NUM_KERNELS_DIR=$DECOMPOSE_WORKSPACE/cumsum_num_kernels
 FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/fusible_subgraph_ranges
+GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/grouped_fusible_subgraph_ranges
 FUSIBLE_SUBGRAPH_SAMPLES_DIR=$DECOMPOSE_WORKSPACE/fusible_subgraph_samples
 RENAMED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/renamed_fusible_subgraphs
 DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/deduplicated_fusible_subgraphs
@@ -47,8 +49,28 @@ function generate_subgraph_list() {
         | tee $sample_list
 }
 
+function rewrite_device() {
+    echo ">>> [1] Rewrite devices for samples in ${model_list}."
+    echo ">>>"
+    python3 -m graph_net.model_path_handler \
+        --model-path-list $model_list \
+        --handler-config=$(base64 -w 0 <<EOF
+{
+    "handler_path": "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/device_rewrite_sample_pass.py",
+    "handler_class_name": "DeviceRewriteSamplePass",
+    "handler_config": {
+        "device": "cuda",
+        "resume": ${RESUME},
+        "model_path_prefix": "$GRAPH_NET_ROOT",
+        "output_dir": "${DEVICE_REWRITED_OUTPUT_DIR}"
+    }
+}
+EOF
+)
+}
+
 function generate_op_names() {
-    echo ">>> [1] Generate op_names.txt for samples in ${model_list}."
+    echo ">>> [2] Generate op_names.txt for samples in ${model_list}."
     echo ">>>"
     python3 -m graph_net.model_path_handler \
         --model-path-list $model_list \
@@ -58,7 +80,7 @@ function generate_op_names() {
     "handler_class_name": "OpNamesExtractor",
     "handler_config": {
         "resume": ${RESUME},
-        "model_path_prefix": "$GRAPH_NET_ROOT",
+        "model_path_prefix": "${DEVICE_REWRITED_OUTPUT_DIR}",
         "output_dir": "${OP_NAMES_OUTPUT_DIR}"
     }
 }
@@ -67,7 +89,7 @@ EOF
 }
 
 function generate_split_point() {
-    echo ">>> [2] Generate split points for samples in ${model_list}."
+    echo ">>> [3] Generate split points for samples in ${model_list}."
     echo ">>>   MIN_SEQ_OPS: ${MIN_SEQ_OPS}, MAX_SEQ_OPS: ${MAX_SEQ_OPS}"
     echo ">>>"
     python3 -m graph_net.apply_sample_pass \
@@ -77,7 +99,7 @@ function generate_split_point() {
         --sample-pass-config=$(base64 -w 0 <<EOF
 {
         "model_path_prefix": "$GRAPH_NET_ROOT",
-        "output_dir": "$DECOMPOSE_WORKSPACE", 
+        "output_dir": "$SPLIT_POINTS_OUTPUT_DIR", 
         "op_names_path_prefix": "${OP_NAMES_OUTPUT_DIR}",
         "device": "cuda",
         "window_size": 64,
@@ -94,7 +116,7 @@ EOF
 }
 
 function range_decompose() {
-    echo ">>> [3] Decompose according to split_results.json for samples in ${model_list}."
+    echo ">>> [4] Decompose according to split_results.json for samples in ${model_list}."
     echo ">>>"
     python3 -m graph_net.model_path_handler \
         --model-path-list "$model_list" \
@@ -106,7 +128,7 @@ function range_decompose() {
         "resume": ${RESUME},
         "model_path_prefix": "$GRAPH_NET_ROOT",
         "output_dir": "${RANGE_DECOMPOSE_OUTPUT_DIR}",
-        "subgraph_ranges_json_root": "$DECOMPOSE_WORKSPACE",
+        "subgraph_ranges_json_root": "$SPLIT_POINTS_OUTPUT_DIR",
         "subgraph_ranges_json_file_name": "typical_subgraph_ranges.json",
         "group_head_and_tail": false,
         "chain_style": false
@@ -117,7 +139,7 @@ EOF
 }
 
 function rename_decomposed_subgraph() {
-    echo ">>> [4] Rename subgraph samples under ${RANGE_DECOMPOSE_OUTPUT_DIR}."
+    echo ">>> [5] Rename subgraph samples under ${RANGE_DECOMPOSE_OUTPUT_DIR}."
     echo ">>>"
     python3 -m graph_net.model_path_handler \
         --model-path-list ${range_decomposed_subgraph_list} \
@@ -142,46 +164,27 @@ EOF
 }
 
 function remove_duplicate_renamed_graphs() {
-    echo ">>> [5] Remove duplicated subgraph samples under ${GRAPH_VAR_RENAME_OUTPUT_DIR}."
+    echo ">>> [6] Remove duplicated subgraph samples under ${GRAPH_VAR_RENAME_OUTPUT_DIR}."
     echo ">>>"
     python3 -m graph_net.tools.deduplicated \
         --samples-dir ${GRAPH_VAR_RENAME_OUTPUT_DIR} \
         --target-dir ${DEDUPLICATED_OUTPUT_DIR}
 }
 
-function rewrite_device() {
-    echo ">>> [6] Rewrite devices for subgraph samples under ${DEDUPLICATED_OUTPUT_DIR}."
-    echo ">>>"
-    python3 -m graph_net.model_path_handler \
-        --model-path-list ${deduplicated_subgraph_list} \
-        --handler-config=$(base64 -w 0 <<EOF
-{
-    "handler_path": "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/device_rewrite_sample_pass.py",
-    "handler_class_name": "DeviceRewriteSamplePass",
-    "handler_config": {
-        "device": "cuda",
-        "resume": ${RESUME},
-        "model_path_prefix": "${DEDUPLICATED_OUTPUT_DIR}",
-        "output_dir": "${DEVICE_REWRITED_OUTPUT_DIR}"
-    }
-}
-EOF
-)
-}
 
 function gen_fusible_subgraphs() {
-    echo ">>> [7] Generate fusible subgraphs for subgraph samples under ${DEVICE_REWRITED_OUTPUT_DIR}."
+    echo ">>> [7] Generate fusible subgraphs for subgraph samples under ${DEDUPLICATED_OUTPUT_DIR}."
     echo ">>>"
     python3 -m graph_net.model_path_handler \
         --use-subprocess    \
-        --model-path-list "$device_rewrited_subgraph_list" \
+        --model-path-list "$deduplicated_subgraph_list" \
         --handler-config $(base64 -w 0 <<EOF
 {
     "handler_path": "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/cumsum_num_kernels_generator.py",
     "handler_class_name": "CumSumNumKernelsGenerator",
     "handler_config": {
         "output_json_file_name": "cumsum_num_kernels.json",
-        "model_path_prefix": "${DEVICE_REWRITED_OUTPUT_DIR}",
+        "model_path_prefix": "${DEDUPLICATED_OUTPUT_DIR}",
         "output_dir": "$CUMSUM_NUM_KERNELS_DIR",
         "device": "cuda",
         "resume": ${RESUME}
@@ -191,7 +194,7 @@ EOF
 )
 
     python3 -m graph_net.model_path_handler \
-        --model-path-list "$device_rewrited_subgraph_list" \
+        --model-path-list "$deduplicated_subgraph_list" \
         --handler-config $(base64 -w 0 <<EOF
 {
     "handler_path": "$GRAPH_NET_ROOT/graph_net/sample_pass/fusible_subgraph_ranges_generator.py",
@@ -205,19 +208,34 @@ EOF
     }
 }
 EOF
+)
+
+    python3 -m graph_net.apply_sample_pass \
+        --model-path-list "$deduplicated_subgraph_list" \
+        --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/sample_pass/group_fusible_subgraph_ranges.py" \
+        --sample-pass-class-name "GroupFusibleSubgraphRanges" \
+        --sample-pass-config $(base64 -w 0 <<EOF
+{
+    "subgraph_model_path_prefix": "$FUSIBLE_SUBGRAPH_RANGES_DIR",
+    "output_dir": "$GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR",
+    "input_json_file_name": "fusible_subgraph_ranges.json",
+    "output_json_file_name": "grouped_fusible_subgraph_ranges.json",
+    "output_json_key": "subgraph_ranges"
+}
+EOF
 )
 
     python3 -m graph_net.model_path_handler \
-        --model-path-list "$device_rewrited_subgraph_list" \
+        --model-path-list "$model_list" \
         --handler-config $(base64 -w 0 <<EOF
 {
     "handler_path": "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/subgraph_generator.py",
     "handler_class_name": "SubgraphGenerator",
     "handler_config": {
-        "model_path_prefix": "${DEVICE_REWRITED_OUTPUT_DIR}",
+        "model_path_prefix": "$GRAPH_NET_ROOT",
         "output_dir": "$FUSIBLE_SUBGRAPH_SAMPLES_DIR",
-        "subgraph_ranges_json_root": "$FUSIBLE_SUBGRAPH_RANGES_DIR",
-        "subgraph_ranges_json_file_name": "fusible_subgraph_ranges.json",
+        "subgraph_ranges_json_root": "$GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR",
+        "subgraph_ranges_json_file_name": "grouped_fusible_subgraph_ranges.json",
         "device": "cuda",
         "resume": ${RESUME}
     }
@@ -226,6 +244,7 @@ EOF
 )
 }
 
+
 function rename_fusible_subgraph() {
     echo ">>> [8] Rename subgraph samples under ${FUSIBLE_SUBGRAPH_SAMPLES_DIR}."
     echo ">>>"
@@ -287,7 +306,8 @@ EOF
 main() {
     timestamp=`date +%Y%m%d_%H%M`
     suffix="${OP_RANGE}ops_${timestamp}"
-    
+
+    rewrite_device 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rewrite_device_${suffix}.txt
     generate_op_names 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_op_names_${suffix}.txt
     generate_split_point 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_split_point_${suffix}.txt
     range_decompose 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_range_decompose_${suffix}.txt
@@ -297,9 +317,6 @@ main() {
     remove_duplicate_renamed_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_renamed_graphs_${suffix}.txt
 
     generate_subgraph_list ${DEDUPLICATED_OUTPUT_DIR} ${deduplicated_subgraph_list}
-    rewrite_device 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rewrite_device_${suffix}.txt
-
-    generate_subgraph_list ${DEVICE_REWRITED_OUTPUT_DIR} ${device_rewrited_subgraph_list}
     gen_fusible_subgraphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt
 
     generate_subgraph_list ${FUSIBLE_SUBGRAPH_SAMPLES_DIR} ${fusible_subgraph_list}
diff --git a/graph_net/torch/sample_pass/cumsum_num_kernels_generator.py b/graph_net/torch/sample_pass/cumsum_num_kernels_generator.py
index a21a915a1..6454ea5de 100644
--- a/graph_net/torch/sample_pass/cumsum_num_kernels_generator.py
+++ b/graph_net/torch/sample_pass/cumsum_num_kernels_generator.py
@@ -41,7 +41,7 @@ def sample_handled(self, rel_model_path: str) -> bool:
     def resume(self, rel_model_path: str):
         model_path = Path(self.config["model_path_prefix"]) / rel_model_path
         device = self._choose_device(self.config["device"])
-        start_offset_in_original_graph = self.config["start_offset_in_original_graph"]
+        start_offset_in_original_graph = self._resolve_start_offset(model_path)
         analyzer = CumsumNumKernelsAnalyzer(
             model_path, device, start_offset_in_original_graph
         )
@@ -52,6 +52,19 @@ def resume(self, rel_model_path: str):
         output_file_path = output_dir_path / self.config["output_json_file_name"]
         output_file_path.write_text(cumsum_num_kernels_json)
 
+    def _resolve_start_offset(self, model_path: Path) -> int:
+        subgraph_sources_json_file = model_path / "subgraph_sources.json"
+        if not subgraph_sources_json_file.exists():
+            return 0
+        with open(subgraph_sources_json_file, "r") as f:
+            sources_data = json.load(f)
+        if not sources_data:
+            raise ValueError(f"No sources found in {subgraph_sources_json_file}")
+        for original_model_name, ranges in sources_data.items():
+            if ranges and len(ranges) > 0:
+                return ranges[0][0]
+        return 0
+
     def _choose_device(self, device) -> str:
         if device in ["cpu", "cuda"]:
             return device