diff --git a/pyproject.toml b/pyproject.toml
index 478104f..064d34f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,6 +19,11 @@ dependencies = [
     "openai>=1.108.1",
     "tiktoken",
     "jinja2",
+    # HuggingFace's Rust BPE library. ``_get_offset_tokenizer`` uses
+    # ``tokenizers.Tokenizer.from_pretrained`` for offset-aware encoding
+    # (body/scaffold attribution on the render path) — keeps the heavy
+    # ``transformers`` framework off the offset path for most models.
+    "tokenizers>=0.20",
     "transformers>=4.50.0",
     # Used by GptOssRenderer to render and parse harmony tokens. Vendoring
     # OpenAI's reference implementation keeps us byte-identical with vLLM
diff --git a/renderers/base.py b/renderers/base.py
index 45768de..7928d93 100644
--- a/renderers/base.py
+++ b/renderers/base.py
@@ -1635,42 +1635,80 @@ def trim_to_turn_close(
     return previous_ids
 
 
-# Per-model offset-aware tokenizer cache. ``attribute_text_segments``
-# uses the fast HuggingFace tokenizer's ``offset_mapping`` to attribute
-# each token to its source text segment under one BPE pass. Fastokens
-# (the Rust BPE we patch in by default for ~10x faster encode) does not
-# track character offsets — the patched tokenizer's
-# ``return_offsets_mapping=True`` raises ``NotImplementedError``. So we
-# keep a parallel vanilla tokenizer per model purely for offset queries.
-# Memory cost is one extra tokenizer per *unique* model name across all
-# pools / renderers (the cache is process-global), independent of pool
-# size.
-_offset_tokenizers: dict[str, Any] = {}
+# Per-model offset-aware ``tokenizers.Tokenizer`` cache. Renderers whose
+# ``emit_text_segments`` block has mixed ``is_content`` labels (and
+# minimax_m2's ``emit_token_overlap_body``) need character offsets to
+# attribute each joined-encode token back to its source segment. We use
+# the ``tokenizers`` package directly — its native ``Encoding.offsets``
+# is exactly what we want and ``transformers`` is not required.
+# ``tokenizers.Tokenizer.from_pretrained`` just downloads
+# ``tokenizer.json`` via ``huggingface_hub``; no model config build,
+# no remote-code execution. Fastokens (the Rust BPE that
+# ``load_tokenizer`` patches in by default) substitutes a non-offset
+# backend into ``PreTrainedTokenizerFast``, so when we detect a
+# fastokens-patched (or other non-offset) backend we fall back to
+# loading a separate vanilla ``tokenizers.Tokenizer`` keyed by
+# ``name_or_path``. Cache memory cost is one extra tokenizer per
+# *unique* model name across all pools / renderers (process-global),
+# independent of pool size.
+_offset_tokenizers: "dict[str, Any]" = {}
 _offset_tokenizers_lock = threading.Lock()
 
 
-def _get_offset_tokenizer(tokenizer):
-    """Return a tokenizer that supports ``return_offsets_mapping=True``.
+_OFFSET_PROBE_TEXT = "Hello, world.\n\n# Test"
+"""Probe string used to verify a loaded ``tokenizers.Tokenizer`` matches
+the user's tokenizer. Spans the ``.\\n\\n`` boundary because some models
+(MiniMax-M2.5's ``GPT2Tokenizer`` wrapper) ship a ``tokenizer.json``
+whose pre_tokenizer disagrees with what ``transformers.AutoTokenizer``
+applies at construction — that's where they diverge."""
+
 
-    If ``tokenizer`` itself supports offsets, returns it unchanged.
-    Otherwise loads a vanilla (non-fastokens) tokenizer from
-    ``tokenizer.name_or_path`` and caches it. Raises if the tokenizer
-    has no usable ``name_or_path`` — hand-coded renderers always pass
-    a tokenizer loaded via ``load_tokenizer`` which does set it.
+def _get_offset_tokenizer(tokenizer):
+    """Return a ``tokenizers.Tokenizer`` for offset-aware encoding.
+
+    Resolution order:
+
+    1. If ``tokenizer`` is already a ``tokenizers.Tokenizer``, return
+       it as-is (BYO offset-capable tokenizer — no extra load).
+    2. If ``tokenizer.backend_tokenizer`` is a vanilla
+       ``tokenizers.Tokenizer`` (vanilla ``PreTrainedTokenizerFast``,
+       not fastokens-patched), use it directly — no extra load.
+    3. Load via ``tokenizers.Tokenizer.from_pretrained(name_or_path)``
+       and verify it encodes a probe string to the same ids as the
+       user's tokenizer. If they match, cache and use it. Pinned
+       ``TRUSTED_REVISIONS`` are honoured.
+    4. If the bare load diverges from the user's tokenizer, fall back
+       to ``transformers.AutoTokenizer`` and pull out *its* backend —
+       some models (MiniMax-M2.5) ship a ``tokenizer.json`` whose
+       pre_tokenizer disagrees with the AutoTokenizer-applied backend
+       mutations, so the bare load is incorrect for them. This is the
+       only branch that needs ``transformers``; we surface a clear
+       ``[transformers]`` extra hint if it's not installed.
+
+    Most models clear path 3 with no extra load and no ``transformers``
+    dependency.
     """
-    # Cheap probe: does this tokenizer already provide offsets?
-    try:
-        tokenizer("a", add_special_tokens=False, return_offsets_mapping=True)
+    from tokenizers import Tokenizer as RustTokenizer
+
+    # Path 1: already a tokenizers.Tokenizer.
+    if isinstance(tokenizer, RustTokenizer):
         return tokenizer
-    except (NotImplementedError, ValueError, TypeError):
-        pass
+
+    # Path 2: vanilla PreTrainedTokenizerFast exposes its underlying
+    # tokenizers.Tokenizer via ``backend_tokenizer``. Fastokens
+    # replaces that with a shim whose isinstance check fails — caught
+    # here, falls through.
+    backend = getattr(tokenizer, "backend_tokenizer", None)
+    if isinstance(backend, RustTokenizer):
+        return backend
 
     name_or_path = getattr(tokenizer, "name_or_path", "")
     if not name_or_path:
         raise RuntimeError(
             "Cannot construct an offset-aware tokenizer: the supplied "
             "tokenizer has no ``name_or_path`` to fall back on. Pass a "
-            "tokenizer loaded via ``renderers.base.load_tokenizer``."
+            "tokenizer loaded via ``renderers.base.load_tokenizer`` or "
+            "a ``tokenizers.Tokenizer`` directly."
         )
 
     with _offset_tokenizers_lock:
@@ -1678,26 +1716,62 @@ def _get_offset_tokenizer(tokenizer):
         if cached is not None:
             return cached
 
-        kwargs: dict[str, Any] = {}
         revision = TRUSTED_REVISIONS.get(name_or_path)
+
+        # Path 3: bare ``tokenizers.Tokenizer.from_pretrained`` — works
+        # for almost all supported models, no ``transformers`` needed.
         if revision is not None:
-            kwargs = {"trust_remote_code": True, "revision": revision}
+            candidate = RustTokenizer.from_pretrained(name_or_path, revision=revision)
         else:
-            kwargs = {"trust_remote_code": False}
-        # Explicitly vanilla — we want HF's Rust tokenizer with offset
-        # tracking, not the fastokens shim. ``load_tokenizer`` would
-        # patch fastokens in by default; routing through
-        # ``_load_tokenizer_via_auto`` keeps the fastokens patch out
-        # of this code path while still applying the config-build
-        # fallback (RoPE-validation failures on nested
-        # ``rope_parameters``, etc.).
-        offset_tok = _load_tokenizer_via_auto(name_or_path, **kwargs)
-        if not getattr(offset_tok, "is_fast", False):
+            candidate = RustTokenizer.from_pretrained(name_or_path)
+
+        # Verify equivalence with the user's tokenizer on a probe that
+        # spans known boundary cases.
+        try:
+            user_ids = list(
+                tokenizer.encode(_OFFSET_PROBE_TEXT, add_special_tokens=False)
+            )
+            candidate_ids = list(
+                candidate.encode(_OFFSET_PROBE_TEXT, add_special_tokens=False).ids
+            )
+        except Exception:
+            user_ids = None
+            candidate_ids = None
+
+        if user_ids is not None and user_ids == candidate_ids:
+            _offset_tokenizers[name_or_path] = candidate
+            return candidate
+
+        # Path 4: bare load diverges from the user's tokenizer.
+        # ``AutoTokenizer`` mutates the backend at construction (e.g.
+        # substituting a ByteLevel pre_tokenizer); replicate by routing
+        # through it and pulling out the now-correct backend. Requires
+        # the optional ``transformers`` extra.
+        try:
+            from transformers import AutoTokenizer
+        except ImportError as exc:
+            raise ImportError(
+                f"Loading an offset-aware tokenizer for {name_or_path!r} via "
+                f"the bare ``tokenizers`` library produced a token stream "
+                f"that doesn't match the user's tokenizer (this happens for "
+                f"models whose ``AutoTokenizer`` mutates the backend at "
+                f"load, e.g. MiniMax). Install the optional ``transformers`` "
+                f"extra to enable the AutoTokenizer fallback: "
+                f"``pip install renderers[transformers]``."
+            ) from exc
+
+        kwargs: "dict[str, Any]" = (
+            {"trust_remote_code": True, "revision": revision}
+            if revision is not None
+            else {"trust_remote_code": False}
+        )
+        hf_tok = AutoTokenizer.from_pretrained(name_or_path, **kwargs)
+        offset_tok = getattr(hf_tok, "backend_tokenizer", None)
+        if not isinstance(offset_tok, RustTokenizer):
             raise RuntimeError(
-                f"Vanilla tokenizer for {name_or_path!r} is not a fast "
-                "tokenizer; offset_mapping is unavailable. Hand-coded "
-                "renderers require a fast tokenizer for body/scaffold "
-                "attribution."
+                f"AutoTokenizer.from_pretrained({name_or_path!r}) did not "
+                f"expose a ``tokenizers.Tokenizer`` backend; offset-aware "
+                f"encoding is unavailable for this model."
             )
         _offset_tokenizers[name_or_path] = offset_tok
         return offset_tok
@@ -1715,23 +1789,19 @@ def attribute_text_segments(
     (content, True)]`` for a user message. Concatenation is done before
     encoding to preserve BPE merges across the wrap/body boundary; the
     resulting tokens are then attributed back to their source segment
-    via the fast tokenizer's ``offset_mapping``.
+    via ``tokenizers.Encoding.offsets``.
 
     A token is attributed to the segment containing its first source
-    character (``offset_mapping[k][0]``). Tokens whose first character
-    falls exactly on a segment boundary are attributed to the segment
-    that *starts* at that offset (the "later" segment). Zero-length
-    tokens (rare; usually pre-tokenizer artefacts) are attributed to
-    the most recently entered segment.
-
-    Requires a HuggingFace fast tokenizer with offset tracking. The
-    ``fastokens`` patch ``load_tokenizer`` applies by default does
-    **not** track offsets — when that's the case we transparently load
-    a vanilla offset-capable tokenizer for the same model and cache it
-    (see :func:`_get_offset_tokenizer`). Hand-coded renderers are only
-    registered for model families that ship a fast tokenizer, so a
-    silent slow-tokenizer fallback isn't supported — BPE drift at the
-    wrap/body boundary would defeat the whole point.
+    character (``offsets[k][0]``). Tokens whose first character falls
+    exactly on a segment boundary go to the segment that *starts* at
+    that offset (the "later" segment). Zero-length tokens (rare;
+    pre-tokenizer artefacts) are attributed to the most recently
+    entered segment.
+
+    Uses ``tokenizers`` directly via :func:`_get_offset_tokenizer`; no
+    ``transformers`` dependency. Hand-coded renderers register only
+    for model families whose ``tokenizer.json`` ships a fast Rust
+    tokenizer, so the offset lookup always succeeds.
 
     Empty input or empty joined text returns an empty list.
     """
@@ -1742,47 +1812,34 @@ def attribute_text_segments(
         return []
 
     offset_tokenizer = _get_offset_tokenizer(tokenizer)
-    encoding = offset_tokenizer(
-        full_text,
-        add_special_tokens=False,
-        return_offsets_mapping=True,
-    )
-    token_ids = list(encoding["input_ids"])
-    offsets = list(encoding["offset_mapping"])
+    encoding = offset_tokenizer.encode(full_text, add_special_tokens=False)
+    token_ids = list(encoding.ids)
+    offsets = list(encoding.offsets)
 
     # Build segment char-span lookup. Track the half-open span
     # [seg_start, seg_end) of each segment and its is_content bit.
-    spans: list[tuple[int, int, bool]] = []
+    spans: "list[tuple[int, int, bool]]" = []
     pos = 0
     for text, is_content in segments:
         spans.append((pos, pos + len(text), is_content))
         pos += len(text)
     total_len = pos
 
-    out: list[tuple[int, bool]] = []
+    out: "list[tuple[int, bool]]" = []
     last_is_content = spans[-1][2] if spans else False
     for tok_id, (start, _end) in zip(token_ids, offsets):
         if start >= total_len:
-            # Token's character offset is past every segment (shouldn't
-            # normally happen for add_special_tokens=False, but defensive
-            # against tokenizer-specific edge cases).
+            # Token's char offset is past every segment (shouldn't
+            # normally happen for add_special_tokens=False, but defensive).
             out.append((tok_id, last_is_content))
             continue
-        # Find the segment that contains `start`. Segments are
-        # contiguous and ordered, so a linear scan is fine — the inner
-        # loop runs at most len(segments) times per token and segments
+        # Find the segment that contains `start`. Linear scan — segments
         # is typically 2-3 in practice.
         is_content = last_is_content
         for seg_start, seg_end, seg_is_content in spans:
             if seg_start <= start < seg_end:
                 is_content = seg_is_content
                 break
-        else:
-            # start == total_len handled above; the remaining case is
-            # an empty segment in the middle. Empty segments emit no
-            # characters, so no token can land in them; fall through to
-            # the last non-empty segment's bit.
-            pass
         out.append((tok_id, is_content))
     return out
 
diff --git a/renderers/deepseek_v3.py b/renderers/deepseek_v3.py
index 7bec3de..77e5837 100644
--- a/renderers/deepseek_v3.py
+++ b/renderers/deepseek_v3.py
@@ -17,11 +17,11 @@
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 from renderers.base import (
+    attribute_text_segments,
     Message,
     ParsedResponse,
     RenderedTokens,
     ToolSpec,
-    attribute_text_segments,
     extract_message_tool_names,
     reject_assistant_in_extension,
     trim_to_turn_close,
@@ -148,8 +148,25 @@ def emit_text(
         def emit_text_segments(
             segments: list[tuple[str, bool]], msg_idx: int, *, is_sampled: bool
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries (e.g., ``.\n\n``).
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 tokens.append(tok_id)
                 indices.append(msg_idx)
diff --git a/renderers/glm45.py b/renderers/glm45.py
index 7af9259..73ff601 100644
--- a/renderers/glm45.py
+++ b/renderers/glm45.py
@@ -16,11 +16,11 @@
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 from renderers.base import (
+    attribute_text_segments,
     Message,
     ParsedResponse,
     RenderedTokens,
     ToolSpec,
-    attribute_text_segments,
     extract_message_tool_names,
     reject_assistant_in_extension,
     should_preserve_past_thinking,
@@ -146,15 +146,25 @@ def emit_text(
         def emit_text_segments(
             segments: list[tuple[str, bool]], msg_idx: int, *, is_sampled: bool
         ) -> None:
-            """Tokenize concatenated segments as one BPE pass; per-token
-            ``is_content`` follows each token's source segment.
-
-            Lets call sites express "this wrap + this body, joined the
-            same way as the chat template, but attributed separately"
-            without splitting the encode call (which could shift BPE
-            merges at the boundary)."""
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries (e.g., ``.\n\n``).
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 tokens.append(tok_id)
                 indices.append(msg_idx)
@@ -377,8 +387,25 @@ def emit_text_segments(
             *,
             is_sampled: bool = False,
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries.
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 ext.append(tok_id)
                 ext_indices.append(msg_idx)
diff --git a/renderers/glm5.py b/renderers/glm5.py
index 924d754..bd344e7 100644
--- a/renderers/glm5.py
+++ b/renderers/glm5.py
@@ -17,11 +17,11 @@
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 from renderers.base import (
+    attribute_text_segments,
     Message,
     ParsedResponse,
     RenderedTokens,
     ToolSpec,
-    attribute_text_segments,
     extract_message_tool_names,
     reject_assistant_in_extension,
     should_preserve_past_thinking,
@@ -166,15 +166,25 @@ def emit_text(
         def emit_text_segments(
             segments: list[tuple[str, bool]], msg_idx: int, *, is_sampled: bool
         ) -> None:
-            """Tokenize concatenated segments as one BPE pass; per-token
-            ``is_content`` follows each token's source segment.
-
-            Lets call sites express "this wrap + this body, joined the
-            same way as the chat template, but attributed separately"
-            without splitting the encode call (which could shift BPE
-            merges at the boundary)."""
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries (e.g., ``.\n\n``).
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 tokens.append(tok_id)
                 indices.append(msg_idx)
@@ -397,8 +407,25 @@ def emit_text_segments(
             *,
             is_sampled: bool = False,
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries.
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 ext.append(tok_id)
                 ext_indices.append(msg_idx)
diff --git a/renderers/laguna_xs2.py b/renderers/laguna_xs2.py
index bd6b64f..583b7aa 100644
--- a/renderers/laguna_xs2.py
+++ b/renderers/laguna_xs2.py
@@ -30,12 +30,12 @@
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 from renderers.base import (
+    attribute_text_segments,
     Content,
     Message,
     ParsedResponse,
     RenderedTokens,
     ToolSpec,
-    attribute_text_segments,
     extract_message_tool_names,
     reject_assistant_in_extension,
 )
@@ -169,8 +169,25 @@ def emit_text(
         def emit_text_segments(
             segments: list[tuple[str, bool]], msg_idx: int, *, is_sampled: bool
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries (e.g., ``.\n\n``).
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 tokens.append(tok_id)
                 indices.append(msg_idx)
@@ -382,8 +399,25 @@ def emit_text_segments(
             *,
             is_sampled: bool = False,
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries.
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 ext.append(tok_id)
                 ext_indices.append(msg_idx)
diff --git a/renderers/minimax_m2.py b/renderers/minimax_m2.py
index f990274..d690c70 100644
--- a/renderers/minimax_m2.py
+++ b/renderers/minimax_m2.py
@@ -17,11 +17,11 @@
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 from renderers.base import (
+    attribute_text_segments,
     Message,
     ParsedResponse,
     RenderedTokens,
     ToolSpec,
-    attribute_text_segments,
     extract_message_tool_names,
     reject_assistant_in_extension,
     should_preserve_past_thinking,
@@ -133,8 +133,25 @@ def emit_text(
         def emit_text_segments(
             segments: list[tuple[str, bool]], msg_idx: int, *, is_sampled: bool
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries (e.g., ``.\n\n``).
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 tokens.append(tok_id)
                 indices.append(msg_idx)
@@ -152,23 +169,22 @@ def emit_token_overlap_body(
             """Tokenize ``full_text`` and mark tokens that overlap the body
             char span as ``is_content=True``.
 
-            Differs from :func:`attribute_text_segments` only in the
-            boundary-token rule: a token straddling scaffold→body gets
-            ``True`` if any of its bytes are body bytes (overlap rule),
-            rather than being attributed to whichever segment its first
-            char belongs to. The body's first byte is preserved even when
-            BPE merges it with the wrap's trailing byte (``>The`` →
-            single token).
+            Uses an "intersects body span" rule: a token straddling
+            scaffold→body gets ``True`` if any of its bytes are body
+            bytes, rather than being attributed to whichever segment its
+            first char belongs to. The body's first byte is preserved
+            even when BPE merges it with the wrap's trailing byte
+            (``>The`` → single token). The other renderers don't need
+            this because their scaffolds break at characters BPE
+            doesn't merge across (``\\n``, special tokens); the
+            ``<response>...`` template here glues scaffold and body
+            with no separator.
             """
             from renderers.base import _get_offset_tokenizer
 
             offset_tok = _get_offset_tokenizer(self._tokenizer)
-            encoding = offset_tok(
-                full_text, add_special_tokens=False, return_offsets_mapping=True
-            )
-            for tok_id, (start, end) in zip(
-                encoding["input_ids"], encoding["offset_mapping"]
-            ):
+            encoding = offset_tok.encode(full_text, add_special_tokens=False)
+            for tok_id, (start, end) in zip(encoding.ids, encoding.offsets):
                 overlaps = start < body_end and end > body_start
                 tokens.append(tok_id)
                 indices.append(msg_idx)
@@ -381,8 +397,25 @@ def emit_text_segments(
             *,
             is_sampled: bool = False,
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries.
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 ext.append(tok_id)
                 ext_indices.append(msg_idx)
@@ -627,15 +660,13 @@ def _render_tool(
 
         # ``<response>`` is plain text with no separator between the
         # closing ``>`` and ``content``'s first byte, so BPE can merge
-        # them into a single token (e.g., ``>The``). The shared
-        # ``attribute_text_segments`` helper picks the segment of a
-        # boundary-spanning token by its *first* char (here scaffold),
-        # which would drop the body's leading letter out of the body
-        # run. We instead use an "intersects body" rule: any token whose
-        # ``[start, end)`` char range overlaps the body span gets
+        # them into a single token (e.g., ``>The``). A "first char
+        # wins" rule would drop the body's leading letter out of the
+        # body run. We instead use an "intersects body" rule: any token
+        # whose ``[start, end)`` char range overlaps the body span gets
         # ``is_content=True``. A few scaffold bytes (the leading ``>``
-        # or trailing ``<``) bleed into the body run, but body bytes are
-        # recoverable as a substring of the decoded body span.
+        # or trailing ``<``) bleed into the body run, but body bytes
+        # are recoverable as a substring of the decoded body span.
         body_text = prefix + "<response>" + content + "</response>" + suffix
         body_start = len(prefix) + len("<response>")
         body_end = body_start + len(content)
diff --git a/renderers/nemotron3.py b/renderers/nemotron3.py
index e6398b5..6664a42 100644
--- a/renderers/nemotron3.py
+++ b/renderers/nemotron3.py
@@ -20,11 +20,11 @@
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 from renderers.base import (
+    attribute_text_segments,
     Message,
     ParsedResponse,
     RenderedTokens,
     ToolSpec,
-    attribute_text_segments,
     extract_message_tool_names,
     reject_assistant_in_extension,
     should_preserve_past_thinking,
@@ -268,8 +268,25 @@ def emit_text(
         def emit_text_segments(
             segments: list[tuple[str, bool]], msg_idx: int, *, is_sampled: bool
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries (e.g., ``.\n\n``).
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 tokens.append(tok_id)
                 indices.append(msg_idx)
@@ -523,8 +540,25 @@ def emit_text_segments(
             *,
             is_sampled: bool = False,
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries.
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 ext.append(tok_id)
                 ext_indices.append(msg_idx)
diff --git a/renderers/qwen3.py b/renderers/qwen3.py
index f744b8c..358765a 100644
--- a/renderers/qwen3.py
+++ b/renderers/qwen3.py
@@ -14,11 +14,11 @@
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 from renderers.base import (
+    attribute_text_segments,
     Message,
     ParsedResponse,
     RenderedTokens,
     ToolSpec,
-    attribute_text_segments,
     extract_message_tool_names,
     reject_assistant_in_extension,
     should_preserve_past_thinking,
@@ -126,15 +126,25 @@ def emit_text(
         def emit_text_segments(
             segments: list[tuple[str, bool]], msg_idx: int, *, is_sampled: bool
         ) -> None:
-            """Tokenize concatenated segments as one BPE pass; per-token
-            ``is_content`` follows each token's source segment.
-
-            Lets call sites express "this wrap + this body, joined the
-            same way as the chat template, but attributed separately"
-            without splitting the encode call (which could shift BPE
-            merges at the boundary)."""
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries (e.g., ``.\n\n``).
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 tokens.append(tok_id)
                 indices.append(msg_idx)
@@ -349,8 +359,25 @@ def emit_text_segments(
             *,
             is_sampled: bool = False,
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries.
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 ext.append(tok_id)
                 ext_indices.append(msg_idx)
diff --git a/renderers/qwen35.py b/renderers/qwen35.py
index cdb8ee1..5d95478 100644
--- a/renderers/qwen35.py
+++ b/renderers/qwen35.py
@@ -20,13 +20,13 @@
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 from renderers.base import (
+    attribute_text_segments,
     Message,
     MultiModalData,
     ParsedResponse,
     PlaceholderRange,
     RenderedTokens,
     ToolSpec,
-    attribute_text_segments,
     extract_message_tool_names,
     reject_assistant_in_extension,
     should_preserve_past_thinking,
@@ -341,15 +341,25 @@ def emit_text(
         def emit_text_segments(
             segments: list[tuple[str, bool]], msg_idx: int, *, is_sampled: bool
         ) -> None:
-            """Tokenize concatenated segments as one BPE pass; per-token
-            ``is_content`` follows each token's source segment.
-
-            Lets call sites express "this wrap + this body, joined the
-            same way as the chat template, but attributed separately"
-            without splitting the encode call (which could shift BPE
-            merges at the boundary)."""
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries (e.g., ``.\n\n``).
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 tokens.append(tok_id)
                 indices.append(msg_idx)
@@ -706,8 +716,25 @@ def emit_text_segments(
             *,
             is_sampled: bool = False,
         ) -> None:
+            collapsed: list[tuple[str, bool]] = []
+            for text, label in segments:
+                if not text:
+                    continue
+                if collapsed and collapsed[-1][1] == label:
+                    collapsed[-1] = (collapsed[-1][0] + text, label)
+                else:
+                    collapsed.append((text, label))
+            if not collapsed:
+                return
+            if len(collapsed) == 1:
+                # Homogeneous — single joined encode preserves all BPE merges.
+                text, label = collapsed[0]
+                emit_text(text, msg_idx, is_sampled=is_sampled, is_content=label)
+                return
+            # Mixed labels remain — joined encode + offset attribution handles
+            # BPE merges across label-transition boundaries (e.g., ``.\n\n``).
             for tok_id, is_content in attribute_text_segments(
-                self._tokenizer, segments
+                self._tokenizer, collapsed
             ):
                 tokens.append(tok_id)
                 indices.append(msg_idx)
diff --git a/renderers/qwen3_vl.py b/renderers/qwen3_vl.py
index 9a4ffde..184d5fe 100644
--- a/renderers/qwen3_vl.py
+++ b/renderers/qwen3_vl.py
@@ -36,13 +36,13 @@
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 from renderers.base import (
+    attribute_text_segments,
     Message,
     MultiModalData,
     ParsedResponse,
     PlaceholderRange,
     RenderedTokens,
     ToolSpec,
-    attribute_text_segments,
     extract_message_tool_names,
     reject_assistant_in_extension,
     trim_to_turn_close,
@@ -223,11 +223,10 @@ def text(self, text: str, *, is_sampled: bool, is_content: bool) -> None:
         if not text:
             return
         # Adjacent text under different msg_idx or is_sampled is rare in
-        # this template — but flush at those boundaries so attribution
-        # and the sampled signal stay accurate. is_content boundaries do
-        # NOT force a flush: they're carried through the joined BPE pass
-        # via :func:`attribute_text_segments`, preserving merges across
-        # the wrap/body boundary.
+        # this template — but flush at those boundaries so the sampled
+        # signal stays accurate. is_content boundaries do NOT force a
+        # flush: mixed-is_content flushes encode each segment
+        # independently (see ``_flush``).
         if self._segments and (
             self._buf_idx != self.msg_idx or self._buf_sampled != is_sampled
         ):
@@ -274,13 +273,11 @@ def _flush(self) -> None:
             self.sampled.extend([self._buf_sampled] * len(ids))
             self.is_content.extend([first_ic] * len(ids))
             return
-        # Mixed body/scaffold flush — encode once and attribute back to
-        # each segment via the fast tokenizer's offset_mapping. Requires
-        # a tokenizer (not just the encode fn) to look up offsets.
-        assert self._tokenizer is not None, (
-            "_Emitter mixed-is_content flush requires a tokenizer; "
-            "pass one to the constructor."
-        )
+        # Mixed body/scaffold flush — joined encode + offset attribution
+        # preserves BPE merges across the label-transition boundary
+        # (e.g., ``"user\n"`` scaffold ↔ caller body, where a trailing
+        # char of the body could merge with the leading scaffold byte
+        # of the next segment).
         for tok_id, is_content in attribute_text_segments(self._tokenizer, segments):
             self.token_ids.append(tok_id)
             self.message_indices.append(self._buf_idx)