diff --git a/Cargo.lock b/Cargo.lock index 1e48ab7..95a7b90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -253,6 +253,7 @@ dependencies = [ "glow", "lru", "rustc-hash", + "rustybuzz", "thiserror 2.0.18", "unicode-width", ] @@ -2267,6 +2268,24 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "rustybuzz" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3c7c96f8a08ee34eff8857b11b49b07d71d1c3f4e88f8a88d4c9e9f90b1702" +dependencies = [ + "bitflags 2.11.0", + "bytemuck", + "core_maths", + "log", + "smallvec", + "ttf-parser", + "unicode-bidi-mirroring", + "unicode-ccc", + "unicode-properties", + "unicode-script", +] + [[package]] name = "ryu" version = "1.0.23" @@ -2836,12 +2855,36 @@ dependencies = [ "core_maths", ] +[[package]] +name = "unicode-bidi-mirroring" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfa6e8c60bb66d49db113e0125ee8711b7647b5579dc7f5f19c42357ed039fe" + +[[package]] +name = "unicode-ccc" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce61d488bcdc9bc8b5d1772c404828b17fc481c0a582b5581e95fb233aef503e" + [[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-properties" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" + +[[package]] +name = "unicode-script" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "383ad40bb927465ec0ce7720e033cb4ca06912855fc35db31b5755d0de75b1ee" + [[package]] name = "unicode-segmentation" version = "1.13.2" diff --git a/beamterm-core/Cargo.toml b/beamterm-core/Cargo.toml index 031e4ea..b83591b 100644 --- a/beamterm-core/Cargo.toml +++ b/beamterm-core/Cargo.toml @@ -23,6 +23,9 @@ keywords.workspace = true [features] default = [] native-dynamic-atlas = ["dep:beamterm-rasterizer"] +# Ligature-aware text shaping (GSUB) via rustybuzz. Enables multi-cell ligature +# glyphs when the atlas is given font bytes. +ligatures = ["dep:rustybuzz"] [dependencies] bitflags.workspace = true @@ -35,6 +38,7 @@ unicode-width.workspace = true glow.workspace = true lru = "0.16" rustc-hash = "2.1.1" +rustybuzz = { version = "0.20", optional = true } [dev-dependencies] diff --git a/beamterm-core/src/gl/atlas.rs b/beamterm-core/src/gl/atlas.rs index 7446d3e..f2b6082 100644 --- a/beamterm-core/src/gl/atlas.rs +++ b/beamterm-core/src/gl/atlas.rs @@ -100,6 +100,47 @@ pub trait Atlas: sealed::Sealed { /// rendering to populate the texture. fn resolve_glyph_slot(&mut self, key: &str, style_bits: u16) -> Option; + /// Segments a horizontal text run into ligature-aware spans. + /// + /// Returns `None` when the atlas has no ligature shaper configured (the + /// caller should then render the run grapheme-by-grapheme as usual). When + /// `Some`, the returned segments cover the run left-to-right with no gaps; + /// segments with `cells >= 2` are ligatures. + /// + /// The default implementation returns `None`. + fn segment_run(&self, _text: &str) -> Option> { + None + } + + /// Resolves a multi-cell ligature glyph spanning `cells` cells. + /// + /// Used for ligatures of three or more cells; two-cell ligatures resolve via + /// [`resolve_glyph_slot`](Self::resolve_glyph_slot) (the wide path). Returns + /// `None` when the atlas does not support ligatures. + /// + /// The default implementation returns `None`. + fn resolve_ligature_slot( + &mut self, + _key: &str, + _style_bits: u16, + _cells: u8, + ) -> Option { + None + } + + /// Configures ligature shaping from raw sfnt (TrueType/OpenType) font bytes. + /// + /// Enables [`segment_run`](Self::segment_run) when the font advertises + /// ligatures. The bytes must match the font the atlas rasterizes with. + /// + /// The default implementation is a no-op (atlases without ligature support). + /// + /// # Errors + /// Returns an error if the bytes cannot be parsed as a font face. + fn set_font_shaper_bytes(&mut self, _bytes: &[u8]) -> Result<(), Error> { + Ok(()) + } + /// Returns the bit position used for emoji detection in the fragment shader. /// /// The glyph ID encodes the base slot index (bits 0-12, masked by `0x1FFF`) @@ -257,6 +298,31 @@ impl FontAtlas { self.inner.resolve_glyph_slot(key, style_bits) } + /// Segments a text run into ligature-aware spans, or `None` if unsupported. + #[must_use] + pub fn segment_run(&self, text: &str) -> Option> { + self.inner.segment_run(text) + } + + /// Resolves a multi-cell ligature glyph spanning `cells` cells. + pub fn resolve_ligature_slot( + &mut self, + key: &str, + style_bits: u16, + cells: u8, + ) -> Option { + self.inner + .resolve_ligature_slot(key, style_bits, cells) + } + + /// Configures ligature shaping from raw sfnt font bytes. + /// + /// # Errors + /// Returns an error if the bytes cannot be parsed as a font face. + pub fn set_font_shaper_bytes(&mut self, bytes: &[u8]) -> Result<(), Error> { + self.inner.set_font_shaper_bytes(bytes) + } + /// Flushes pending glyph data to the GPU texture. /// /// # Errors @@ -316,6 +382,11 @@ pub enum GlyphSlot { Wide(SlotId), /// Emoji glyph slot (occupies two consecutive texture slots). Emoji(SlotId), + /// Ligature glyph spanning three or more cells (e.g. `===`, `<==>`). + /// + /// Occupies `cells` consecutive texture slots (`id`, `id + 1`, … , + /// `id + cells - 1`). Two-cell ligatures use [`Wide`](Self::Wide) instead. + Ligature(SlotId, u8), } impl GlyphSlot { @@ -323,7 +394,10 @@ impl GlyphSlot { #[must_use] pub fn slot_id(&self) -> SlotId { match *self { - GlyphSlot::Normal(id) | GlyphSlot::Wide(id) | GlyphSlot::Emoji(id) => id, + GlyphSlot::Normal(id) + | GlyphSlot::Wide(id) + | GlyphSlot::Emoji(id) + | GlyphSlot::Ligature(id, _) => id, } } @@ -335,6 +409,7 @@ impl GlyphSlot { Normal(id) => Normal(id | style_bits), Wide(id) => Wide(id | style_bits), Emoji(id) => Emoji(id | style_bits), + Ligature(id, cells) => Ligature(id | style_bits, cells), } } @@ -343,6 +418,33 @@ impl GlyphSlot { pub fn is_double_width(&self) -> bool { matches!(self, GlyphSlot::Wide(_) | GlyphSlot::Emoji(_)) } + + /// Returns the number of terminal cells this glyph spans. + /// + /// `Normal` spans one cell, `Wide`/`Emoji` span two, and `Ligature` spans + /// its stored cell count. + #[must_use] + pub fn cell_span(&self) -> u8 { + match *self { + GlyphSlot::Normal(_) => 1, + GlyphSlot::Wide(_) | GlyphSlot::Emoji(_) => 2, + GlyphSlot::Ligature(_, cells) => cells, + } + } +} + +/// A ligature-aware segment of a text run produced by [`Atlas::segment_run`]. +/// +/// `start`/`len` are byte offsets into the run. A segment spanning `cells >= 2` +/// cells is a ligature that should be rendered as one multi-cell glyph. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ShapedSegment { + /// Byte offset into the run where this segment starts. + pub start: usize, + /// Byte length of this segment. + pub len: usize, + /// Number of terminal cells this segment spans. + pub cells: u8, } /// Tracks glyphs that were requested but not found in the font atlas. diff --git a/beamterm-core/src/gl/dynamic_atlas.rs b/beamterm-core/src/gl/dynamic_atlas.rs index 8c2872d..22c0014 100644 --- a/beamterm-core/src/gl/dynamic_atlas.rs +++ b/beamterm-core/src/gl/dynamic_atlas.rs @@ -5,7 +5,9 @@ use compact_str::{CompactString, ToCompactString}; use super::{ atlas::{self, Atlas, GlyphSlot, GlyphTracker}, - glyph_cache::{ASCII_SLOTS, DYNAMIC_EMOJI_FLAG, GlyphCache, NORMAL_CAPACITY, WIDE_CAPACITY}, + glyph_cache::{ + ASCII_SLOTS, DYNAMIC_EMOJI_FLAG, GlyphCache, NORMAL_CAPACITY, TOTAL_SLOTS, WIDE_CAPACITY, + }, glyph_rasterizer::GlyphRasterizer, texture::{RasterizedGlyph, Texture}, }; @@ -13,10 +15,12 @@ use crate::Error; /// Glyphs per layer (1x32 vertical grid) const GLYPHS_PER_LAYER: usize = 32; -/// Total number of glyph slots (2048 normal + 4096 wide = 2048 double-width glyphs) -const TOTAL_SLOTS: usize = 6144; -/// Number of texture layers in the atlas -const NUM_LAYERS: i32 = (TOTAL_SLOTS / GLYPHS_PER_LAYER) as i32; // 192 layers +/// Number of texture layers in the atlas. +/// +/// Sized to cover every slot region (normal + wide + ligature pools), rounded +/// up to a whole number of layers. [`TOTAL_SLOTS`] is derived from the cache +/// region layout, so the texture grows automatically if the pools change. +const NUM_LAYERS: i32 = (TOTAL_SLOTS as usize).div_ceil(GLYPHS_PER_LAYER) as i32; /// A dynamic texture atlas that rasterizes font glyphs on demand. /// @@ -42,6 +46,10 @@ pub struct DynamicFontAtlas { debug_space_pattern: Option, base_font_size: f32, pixel_ratio: f32, + /// Optional ligature shaper; when present (and the font ligates) text runs + /// are segmented into multi-cell ligature glyphs. + #[cfg(feature = "ligatures")] + shaper: Option, } impl DynamicFontAtlas { @@ -98,12 +106,24 @@ impl DynamicFontAtlas { debug_space_pattern, base_font_size, pixel_ratio, + #[cfg(feature = "ligatures")] + shaper: None, }; atlas.upload_ascii_glyphs(gl)?; Ok(atlas) } + /// Sets (or clears) the ligature shaper used to segment text runs. + /// + /// When a shaper is configured and its font advertises ligatures, + /// [`segment_run`](Atlas::segment_run) groups adjacent characters into + /// multi-cell ligature glyphs. + #[cfg(feature = "ligatures")] + pub fn set_shaper(&mut self, shaper: Option) { + self.shaper = shaper; + } + fn upload_ascii_glyphs(&mut self, gl: &glow::Context) -> Result<(), Error> { let all_pending: Vec = (0x20u8..=0x7Eu8) .map(|b| PendingGlyph { @@ -161,20 +181,34 @@ impl DynamicFontAtlas { std::borrow::Cow::Borrowed(glyph_data) }; - if pending_glyph.slot.is_double_width() { - let (left, right) = split_double_width_glyph(&glyph_data, cell_w, cell_h); - let slot_id = pending_glyph.slot.slot_id() & DYNAMIC_EMOJI_FLAG.not(); - self.texture - .upload_glyph(gl, slot_id, padded_cell_size, &left)?; - self.texture - .upload_glyph(gl, slot_id + 1, padded_cell_size, &right)?; - } else { - self.texture.upload_glyph( - gl, - pending_glyph.slot.slot_id(), - padded_cell_size, - &glyph_data, - )?; + match pending_glyph.slot { + GlyphSlot::Wide(_) | GlyphSlot::Emoji(_) => { + let (left, right) = split_double_width_glyph(&glyph_data, cell_w, cell_h); + let slot_id = pending_glyph.slot.slot_id() & DYNAMIC_EMOJI_FLAG.not(); + self.texture + .upload_glyph(gl, slot_id, padded_cell_size, &left)?; + self.texture + .upload_glyph(gl, slot_id + 1, padded_cell_size, &right)?; + }, + GlyphSlot::Ligature(slot_id, cells) => { + let pieces = split_glyph_n(&glyph_data, cell_w, cell_h, cells); + for (i, piece) in pieces.iter().enumerate() { + self.texture.upload_glyph( + gl, + slot_id + i as u16, + padded_cell_size, + piece, + )?; + } + }, + GlyphSlot::Normal(_) => { + self.texture.upload_glyph( + gl, + pending_glyph.slot.slot_id(), + padded_cell_size, + &glyph_data, + )?; + }, } } @@ -303,6 +337,65 @@ impl Atlas for DynamicFontAtlas { Some(slot.with_styling(styling)) } + #[cfg(feature = "ligatures")] + fn segment_run(&self, text: &str) -> Option> { + let shaper = self.shaper.as_ref()?; + if !shaper.has_ligatures() { + return None; + } + // only worth segmenting if at least one ligature forms + let segments = shaper.segment(text); + if !segments.iter().any(|s| s.cells >= 2) { + return None; + } + Some( + segments + .into_iter() + .map(|s| atlas::ShapedSegment { start: s.start, len: s.len, cells: s.cells }) + .collect(), + ) + } + + fn resolve_ligature_slot( + &mut self, + key: &str, + style_bits: u16, + cells: u8, + ) -> Option { + // two-cell ligatures resolve via the regular wide path + if cells < 3 { + return self.resolve_glyph_slot(key, style_bits); + } + + let font_variant = FontStyle::from_u16(style_bits & FontStyle::MASK).ok()?; + let styling = style_bits & (Glyph::STRIKETHROUGH_FLAG | Glyph::UNDERLINE_FLAG); + + if let Some(slot) = self.cache.get_ligature(key, font_variant, cells) { + return Some(slot.with_styling(styling)); + } + + let (slot, _evicted) = self + .cache + .insert_ligature(key, font_variant, cells)?; + self.symbol_lookup + .insert(slot.slot_id(), CompactString::new(key)); + self.glyphs_pending_upload.add(PendingGlyph { + slot, + key: CompactString::new(key), + style: font_variant, + }); + + Some(slot.with_styling(styling)) + } + + #[cfg(feature = "ligatures")] + fn set_font_shaper_bytes(&mut self, bytes: &[u8]) -> Result<(), Error> { + let shaper = + super::shaper::Shaper::from_bytes(bytes).map_err(|e| Error::Resource(e.to_string()))?; + self.shaper = Some(shaper); + Ok(()) + } + fn emoji_bit(&self) -> u32 { 15 } @@ -379,7 +472,11 @@ impl PendingUploads { fn add(&mut self, glyph: PendingGlyph) { match glyph.slot { GlyphSlot::Normal(_) => self.normal.push(glyph), - GlyphSlot::Wide(_) | GlyphSlot::Emoji(_) => self.wide.push(glyph), + // multi-cell glyphs (wide CJK/emoji and N-cell ligatures) share the + // wide upload queue; each is split into its cell pieces on upload + GlyphSlot::Wide(_) | GlyphSlot::Emoji(_) | GlyphSlot::Ligature(..) => { + self.wide.push(glyph); + }, } } @@ -529,6 +626,95 @@ fn split_double_width_glyph( ) } +/// Splits a glyph spanning `cells` cells into `cells` consecutive `cell_w` × `cell_h` +/// pieces. +/// +/// Generalizes [`split_double_width_glyph`] to N cells: the source content +/// (`glyph.width - 2 * padding`) is divided into `cells` equal parts. The first +/// piece keeps the source's left padding, the last keeps the right padding, and +/// every split edge between cells is zero-padded — matching the two-cell scheme +/// so the shader's per-cell sampling reproduces a seamless glyph. +fn split_glyph_n( + glyph: &RasterizedGlyph, + cell_w: u32, + cell_h: u32, + cells: u8, +) -> Vec { + let cells = cells as usize; + let bytes_per_pixel = 4usize; + let padding = FontAtlasData::PADDING as usize; + let dst_content_w = (cell_w as usize).saturating_sub(2 * padding); + + let src_row_stride = glyph.width as usize * bytes_per_pixel; + let dst_row_stride = cell_w as usize * bytes_per_pixel; + let src_content_start = padding; + let src_content_width = (glyph.width as usize).saturating_sub(2 * padding); + + // partition the source content into `cells` parts (remainder spread over the + // first parts so the totals match the source width exactly) + let base = src_content_width / cells; + let extra = src_content_width % cells; + let part_width = |i: usize| base + usize::from(i < extra); + let part_offset = |i: usize| base * i + extra.min(i); + + let copy_px = |dst: &mut [u8], dst_idx: usize, src_idx: usize| { + if src_idx + 4 <= glyph.pixels.len() && dst_idx + 4 <= dst.len() { + dst[dst_idx..dst_idx + 4].copy_from_slice(&glyph.pixels[src_idx..src_idx + 4]); + } + }; + + let mut pieces = Vec::with_capacity(cells); + for i in 0..cells { + let mut dst_pixels = vec![0u8; (cell_w * cell_h) as usize * bytes_per_pixel]; + let content_width = part_width(i).min(dst_content_w); + let content_off = part_offset(i); + + for row in 0..cell_h.min(glyph.height) as usize { + let src_row = row * src_row_stride; + let dst_row = row * dst_row_stride; + + // leftmost piece preserves the source's left padding + if i == 0 { + for col in 0..padding { + copy_px( + &mut dst_pixels, + dst_row + col * bytes_per_pixel, + src_row + col * bytes_per_pixel, + ); + } + } + + // content for this cell, placed after the destination's left padding + for col in 0..content_width { + let src_col = src_content_start + content_off + col; + let dst_col = padding + col; + copy_px( + &mut dst_pixels, + dst_row + dst_col * bytes_per_pixel, + src_row + src_col * bytes_per_pixel, + ); + } + + // rightmost piece preserves the source's right padding + if i == cells - 1 { + for col in 0..padding { + let src_col = glyph.width as usize - padding + col; + let dst_col = cell_w as usize - padding + col; + copy_px( + &mut dst_pixels, + dst_row + dst_col * bytes_per_pixel, + src_row + src_col * bytes_per_pixel, + ); + } + } + } + + pieces.push(RasterizedGlyph::new(dst_pixels, cell_w, cell_h)); + } + + pieces +} + #[cfg(test)] mod tests { use super::*; diff --git a/beamterm-core/src/gl/glyph_cache.rs b/beamterm-core/src/gl/glyph_cache.rs index 4bc43b4..44db6c3 100644 --- a/beamterm-core/src/gl/glyph_cache.rs +++ b/beamterm-core/src/gl/glyph_cache.rs @@ -10,7 +10,7 @@ use lru::LruCache; use unicode_width::UnicodeWidthStr; use crate::{ - gl::atlas::{GlyphSlot, SlotId}, + gl::atlas::{GLYPH_SLOT_MASK, GlyphSlot, SlotId}, is_emoji, }; @@ -23,6 +23,40 @@ pub(crate) const NORMAL_CAPACITY: usize = 2048; pub(crate) const WIDE_CAPACITY: usize = 2048; const WIDE_BASE: SlotId = NORMAL_CAPACITY as SlotId; +/// Smallest ligature width handled by the dedicated ligature pools. +/// +/// Two-cell ligatures reuse the [wide region](GlyphCache::wide) (same stride), +/// so the pools only cover widths 3..=[`MAX_LIGATURE_CELLS`]. +pub(crate) const MIN_LIGATURE_CELLS: u8 = 3; +/// Largest ligature width that can be stored as a single multi-cell glyph. +pub const MAX_LIGATURE_CELLS: u8 = 8; +/// Number of size-classed ligature pools (one per width 3..=8). +const NUM_LIGATURE_POOLS: usize = (MAX_LIGATURE_CELLS - MIN_LIGATURE_CELLS + 1) as usize; + +/// First slot of the ligature region (immediately after the wide region). +const LIGATURE_BASE: SlotId = (NORMAL_CAPACITY + WIDE_CAPACITY * 2) as SlotId; +/// Glyph capacity per ligature pool, indexed by `width - MIN_LIGATURE_CELLS`. +/// +/// Width-3 ligatures are the most common (`===`, `!==`, `>>=`, `...`); wider +/// ones are rarer, so capacity tapers off. Total slots consumed: +/// `sum(cap[w] * width[w])` must stay within the 13-bit (8192-slot) address space. +const LIGATURE_POOL_GLYPHS: [SlotId; NUM_LIGATURE_POOLS] = [96, 64, 48, 40, 32, 24]; + +/// One-past-the-last texture slot used by any region. +/// +/// The dynamic atlas texture must allocate enough layers to cover this many +/// slots. Derived from the region layout so the texture and the cache cannot +/// drift apart. +pub(crate) const TOTAL_SLOTS: SlotId = { + let mut total = LIGATURE_BASE; + let mut i = 0; + while i < NUM_LIGATURE_POOLS { + total += LIGATURE_POOL_GLYPHS[i] * (i as SlotId + MIN_LIGATURE_CELLS as SlotId); + i += 1; + } + total +}; + /// Emoji flag for the dynamic atlas (bit 15). /// /// Unlike the static atlas which uses `Glyph::EMOJI_FLAG` (bit 12) as part of @@ -42,19 +76,100 @@ pub(crate) struct GlyphCache { normal: LruCache, /// LRU for double-width glyphs wide: LruCache, + /// Size-classed LRU pools for ligatures spanning 3..=8 cells. + ligature: [LruCache; NUM_LIGATURE_POOLS], /// Next slot in normal region (0-2047) normal_next: SlotId, /// Next index in wide region (starts at 2048) wide_next: SlotId, + /// Next slot in each ligature pool. + ligature_next: [SlotId; NUM_LIGATURE_POOLS], + /// First slot of each ligature pool. + ligature_base: [SlotId; NUM_LIGATURE_POOLS], + /// One-past-the-last slot of each ligature pool. + ligature_end: [SlotId; NUM_LIGATURE_POOLS], } impl GlyphCache { pub(crate) fn new() -> Self { + let mut ligature_base = [0; NUM_LIGATURE_POOLS]; + let mut ligature_end = [0; NUM_LIGATURE_POOLS]; + let mut base = LIGATURE_BASE; + for pool in 0..NUM_LIGATURE_POOLS { + let width = pool as SlotId + MIN_LIGATURE_CELLS as SlotId; + ligature_base[pool] = base; + base += LIGATURE_POOL_GLYPHS[pool] * width; + ligature_end[pool] = base; + } + debug_assert!( + u32::from(base) <= GLYPH_SLOT_MASK + 1, + "ligature region overflows slot address space" + ); + Self { normal: LruCache::unbounded(), wide: LruCache::unbounded(), + ligature: std::array::from_fn(|_| LruCache::unbounded()), normal_next: ASCII_SLOTS, wide_next: WIDE_BASE, + ligature_next: ligature_base, + ligature_base, + ligature_end, + } + } + + /// Gets the slot for a ligature glyph of the given cell width, marking it + /// recently used. `cells` must be in `MIN_LIGATURE_CELLS..=MAX_LIGATURE_CELLS`. + pub(crate) fn get_ligature( + &mut self, + key: &str, + style: FontStyle, + cells: u8, + ) -> Option { + let pool = Self::ligature_pool(cells)?; + let cache_key = (CompactString::new(key), style); + self.ligature[pool].get(&cache_key).copied() + } + + /// Inserts a ligature glyph spanning `cells` cells, returning its slot and + /// the evicted key (if any). Allocates `cells` consecutive slots. + pub(crate) fn insert_ligature( + &mut self, + key: &str, + style: FontStyle, + cells: u8, + ) -> Option<(GlyphSlot, Option)> { + let pool = Self::ligature_pool(cells)?; + let cache_key = (CompactString::new(key), style); + + if let Some(&slot) = self.ligature[pool].get(&cache_key) { + return Some((slot, None)); + } + + let width = cells as SlotId; + let (idx, evicted) = if self.ligature_next[pool] + width <= self.ligature_end[pool] { + let idx = self.ligature_next[pool]; + self.ligature_next[pool] += width; + (idx, None) + } else { + let (evicted_key, evicted_slot) = self.ligature[pool] + .pop_lru() + .expect("ligature pool should not be empty when full"); + (evicted_slot.slot_id(), Some(evicted_key)) + }; + + let slot = GlyphSlot::Ligature(idx, cells); + self.ligature[pool].put(cache_key, slot); + Some((slot, evicted)) + } + + /// Returns the pool index for a ligature of the given width, or `None` if + /// the width is outside the supported ligature range. + fn ligature_pool(cells: u8) -> Option { + if (MIN_LIGATURE_CELLS..=MAX_LIGATURE_CELLS).contains(&cells) { + Some((cells - MIN_LIGATURE_CELLS) as usize) + } else { + None } } @@ -167,16 +282,26 @@ impl GlyphCache { /// Returns total number of cached glyphs. pub(crate) fn len(&self) -> usize { - self.normal.len() + self.wide.len() + self.normal.len() + + self.wide.len() + + self + .ligature + .iter() + .map(LruCache::len) + .sum::() } /// Clears all cached glyphs. pub(crate) fn clear(&mut self) { self.normal.clear(); self.wide.clear(); + for pool in &mut self.ligature { + pool.clear(); + } self.normal_next = ASCII_SLOTS; self.wide_next = WIDE_BASE; + self.ligature_next = self.ligature_base; } } @@ -329,6 +454,55 @@ mod tests { ); } + #[test] + fn test_ligature_pools_are_width_classed() { + let mut cache = GlyphCache::new(); + + let (s3, _) = cache.insert_ligature("===", S, 3).unwrap(); + let (s3b, _) = cache.insert_ligature("!==", S, 3).unwrap(); + let (s4, _) = cache.insert_ligature("<==>", S, 4).unwrap(); + + // width-3 pool: consecutive entries are `width` slots apart + assert!(matches!(s3, GlyphSlot::Ligature(_, 3))); + assert_eq!(s3b.slot_id(), s3.slot_id() + 3); + // width-4 lives in a different pool, after the width-3 region + assert!(matches!(s4, GlyphSlot::Ligature(_, 4))); + assert!(s4.slot_id() >= LIGATURE_BASE); + + // lookups round-trip + assert_eq!(cache.get_ligature("===", S, 3), Some(s3)); + assert_eq!(cache.get_ligature("<==>", S, 4), Some(s4)); + // wrong width class doesn't find it + assert_eq!(cache.get_ligature("===", S, 4), None); + } + + #[test] + fn test_ligature_width_out_of_range() { + let mut cache = GlyphCache::new(); + // width 2 is handled by the wide region, not the ligature pools + assert!(cache.insert_ligature("=>", S, 2).is_none()); + // width 9 exceeds MAX_LIGATURE_CELLS + assert!(cache.insert_ligature("=========", S, 9).is_none()); + } + + #[test] + fn test_ligature_eviction_reuses_slots() { + let mut cache = GlyphCache::new(); + let cap = LIGATURE_POOL_GLYPHS[0] as usize; // width-3 pool capacity + + // fill the width-3 pool exactly + for i in 0..cap { + let (_, evicted) = cache + .insert_ligature(&format!("l3-{i}"), S, 3) + .unwrap(); + assert!(evicted.is_none(), "no eviction while filling"); + } + // next insert must evict the LRU entry and reuse its slot + let (slot, evicted) = cache.insert_ligature("overflow", S, 3).unwrap(); + assert_eq!(evicted, Some((CompactString::new("l3-0"), S))); + assert!(matches!(slot, GlyphSlot::Ligature(_, 3))); + } + #[test] fn test_reinsert_existing() { let mut cache = GlyphCache::new(); diff --git a/beamterm-core/src/gl/mod.rs b/beamterm-core/src/gl/mod.rs index 05003c8..e723f1c 100644 --- a/beamterm-core/src/gl/mod.rs +++ b/beamterm-core/src/gl/mod.rs @@ -11,6 +11,8 @@ mod native_dynamic_atlas; mod program; pub(crate) mod renderer; pub(crate) mod selection; +#[cfg(feature = "ligatures")] +pub(crate) mod shaper; pub(crate) mod static_atlas; pub(crate) mod terminal_grid; pub(crate) mod texture; @@ -18,7 +20,7 @@ mod ubo; // Primary API re-exports // Re-exports for sibling crates (beamterm-renderer) -pub use atlas::{Atlas, FontAtlas, GlyphSlot, GlyphTracker, sealed}; +pub use atlas::{Atlas, FontAtlas, GlyphSlot, GlyphTracker, ShapedSegment, sealed}; // Crate-internal re-exports use buffer::*; pub use cell_query::{CellIterator, CellQuery, SelectionMode, select}; @@ -28,6 +30,7 @@ pub use context::GlState; /// [`NativeDynamicAtlas`] type alias instead. #[doc(hidden)] pub use dynamic_atlas::DynamicFontAtlas; +pub use glyph_cache::MAX_LIGATURE_CELLS; /// Internal trait — not covered by semver guarantees. /// Use pre-built implementations ([`NativeGlyphRasterizer`] or `CanvasRasterizer`) instead. #[doc(hidden)] @@ -37,6 +40,8 @@ pub use native_dynamic_atlas::{NativeDynamicAtlas, NativeGlyphRasterizer}; pub(crate) use program::*; pub use renderer::{Drawable, RenderContext}; pub use selection::SelectionTracker; +#[cfg(feature = "ligatures")] +pub use shaper::{Shaper, ShaperError}; pub use static_atlas::StaticFontAtlas; pub use terminal_grid::{CellData, CellDynamic, TerminalGrid}; /// internal type; not covered by semver guarantees. diff --git a/beamterm-core/src/gl/shaper.rs b/beamterm-core/src/gl/shaper.rs new file mode 100644 index 0000000..77fc134 --- /dev/null +++ b/beamterm-core/src/gl/shaper.rs @@ -0,0 +1,379 @@ +//! Ligature-aware text shaping via [`rustybuzz`] (a pure-Rust HarfBuzz port). +//! +//! This module is used purely to *detect* ligature clusters so the renderer can +//! treat a multi-character ligature (e.g. `=>`, `===`, `<==>`) as a single glyph +//! spanning multiple terminal cells. The actual pixel rasterization is still +//! performed by the platform rasterizer (the browser canvas in the WASM path), +//! which re-shapes the same byte sequence using the same font and therefore +//! produces the matching ligature glyph. +//! +//! Programming ligatures in fonts such as Fira Code, JetBrains Mono, Cascadia +//! Code and Monaspace Neon are implemented mostly through the OpenType `calt` +//! (contextual alternates) feature rather than plain `liga`, so a static table +//! read is insufficient — full shaping is required to find them. + +use std::{cell::RefCell, num::NonZeroUsize}; + +use lru::LruCache; +use rustybuzz::{Face, Feature, UnicodeBuffer, ttf_parser::Tag}; + +/// Maximum number of cells a single ligature may span. +/// +/// Runs that the font would ligate into a wider cluster are left un-ligated and +/// rendered per cell. This bounds the texture-slot span allocated per glyph. +pub const MAX_LIGATURE_CELLS: u8 = 8; + +const LIGA: Tag = Tag::from_bytes(b"liga"); +const CALT: Tag = Tag::from_bytes(b"calt"); + +/// A contiguous shaped segment of a text run. +/// +/// `start`/`len` are byte offsets into the run that was passed to +/// [`Shaper::segment`]. A segment with `cells > 1` and `ligated == true` +/// represents a ligature that should be rasterized as one multi-cell glyph. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Segment { + /// Byte offset into the shaped run where this segment starts. + pub start: usize, + /// Byte length of this segment. + pub len: usize, + /// Number of source cells (= source characters) this segment covers. + pub cells: u8, + /// True when several source characters collapse into a single ligature glyph. + pub ligated: bool, +} + +/// Errors produced while constructing a [`Shaper`]. +#[derive(Debug, thiserror::Error)] +pub enum ShaperError { + /// The supplied bytes are a WOFF/WOFF2 container and must be decompressed + /// to raw TrueType/OpenType (sfnt) before being passed to the shaper. + #[error("compressed font (WOFF/WOFF2) is not supported; decompress to sfnt first")] + CompressedFont, + /// The supplied bytes could not be parsed as a font face. + #[error("failed to parse font face from the supplied bytes")] + ParseFailed, +} + +/// Upper bound on distinct text runs whose segmentation is memoized. +/// +/// Generously covers a screenful of distinct runs (rows × per-row style spans) +/// plus churn from a moving cursor line; entries are tiny (`Vec`). +const SEGMENT_CACHE_CAP: usize = 1024; + +/// Detects ligature clusters for a single font using rustybuzz. +/// +/// Owns the raw font bytes; the borrowing [`Face`] is constructed transiently +/// for each *uncached* shaping call. Results are memoized per run text (see +/// [`Shaper::segment`]) because the renderer re-shapes the whole screen every +/// frame and the vast majority of runs are unchanged frame-to-frame. +pub struct Shaper { + font_data: Box<[u8]>, + face_index: u32, + has_ligatures: bool, + /// run text → segmentation. Keyed on text alone: segmentation depends only + /// on the characters and the font, and a font change builds a new `Shaper` + /// (hence a fresh cache), so no explicit invalidation is needed. + cache: RefCell>>, +} + +impl Shaper { + /// Builds a shaper from raw sfnt (TrueType/OpenType) font bytes. + /// + /// # Errors + /// Returns [`ShaperError::CompressedFont`] for WOFF/WOFF2 input and + /// [`ShaperError::ParseFailed`] if the bytes are not a valid font face. + pub fn from_bytes(data: &[u8]) -> Result { + if data.len() >= 4 && (data[0..4] == *b"wOFF" || data[0..4] == *b"wOF2") { + return Err(ShaperError::CompressedFont); + } + + let font_data: Box<[u8]> = Box::from(data); + let has_ligatures = { + let face = Face::from_slice(&font_data, 0).ok_or(ShaperError::ParseFailed)?; + face_has_ligature_features(&face) + }; + + let cache = RefCell::new(LruCache::new( + NonZeroUsize::new(SEGMENT_CACHE_CAP).expect("cache cap is non-zero"), + )); + + Ok(Self { font_data, face_index: 0, has_ligatures, cache }) + } + + /// Returns true if the font advertises `liga` or `calt` substitutions. + /// + /// When false, callers can skip shaping entirely since no ligatures form. + #[must_use] + pub fn has_ligatures(&self) -> bool { + self.has_ligatures + } + + /// Segments a text run into ligated and non-ligated spans. + /// + /// The returned segments cover `text` left-to-right with no gaps. Single-cell + /// segments (`cells == 1`) should be rendered glyph-by-glyph as before; multi- + /// cell ligated segments should be rasterized as one wide glyph. + /// + /// Detection works for both ligature implementations used by programming + /// fonts: classic GSUB ligature substitution (which reduces the glyph count, + /// merging clusters) and the `calt` "spacer" approach used by Fira Code / + /// JetBrains Mono / Cascadia (which keeps the glyph count equal to the + /// character count but swaps each glyph for a ligature piece). A ligature is + /// a maximal run of two or more consecutive characters whose glyphs were + /// altered from their nominal `cmap` mapping (or merged). + /// + /// Ligatures wider than [`MAX_LIGATURE_CELLS`] are decomposed into single-cell + /// segments. + /// + /// Results are memoized per run text. Building a [`Face`] and running the + /// shaper for every run on every frame dominates render time on a static + /// screen; the cache turns repeated runs into an `O(len)` map lookup. + #[must_use] + pub fn segment(&self, text: &str) -> Vec { + if text.is_empty() { + return Vec::new(); + } + + if let Some(cached) = self.cache.borrow_mut().get(text) { + return cached.clone(); + } + + let segments = self.segment_uncached(text); + self.cache + .borrow_mut() + .put(text.to_string(), segments.clone()); + segments + } + + /// Performs the actual rustybuzz shaping for a run (the cache miss path). + fn segment_uncached(&self, text: &str) -> Vec { + let Some(face) = Face::from_slice(&self.font_data, self.face_index) else { + return per_char_segments(text); + }; + + let chars: Vec<(usize, char)> = text.char_indices().collect(); + let n = chars.len(); + + let mut buffer = UnicodeBuffer::new(); + buffer.push_str(text); + buffer.guess_segment_properties(); + + let features = [Feature::new(LIGA, 1, ..), Feature::new(CALT, 1, ..)]; + let glyphs = rustybuzz::shape(&face, &features, buffer); + let infos = glyphs.glyph_infos(); + + // Non-LTR / reordered runs break the monotonic-cluster assumption below; + // programming-ligature runs are always LTR, but guard anyway. + let monotonic = infos + .windows(2) + .all(|w| w[0].cluster <= w[1].cluster); + if !monotonic { + return per_char_segments(text); + } + + // Mark which source characters were altered or merged by shaping. + let mut altered = vec![false; n]; + for (i, info) in infos.iter().enumerate() { + let start_byte = info.cluster as usize; + let end_byte = infos + .get(i + 1) + .map_or(text.len(), |g| g.cluster as usize); + + let Some(ci) = chars.iter().position(|&(b, _)| b == start_byte) else { + continue; + }; + let covered = chars[ci..] + .iter() + .take_while(|&&(b, _)| b < end_byte) + .count() + .max(1); + + if covered > 1 { + // classic ligature merge: every covered character participates + for slot in altered.iter_mut().skip(ci).take(covered) { + *slot = true; + } + } else { + // 1:1 glyph — altered if it differs from the nominal cmap glyph + let nominal = face + .glyph_index(chars[ci].1) + .map(|g| u32::from(g.0)); + if Some(info.glyph_id) != nominal { + altered[ci] = true; + } + } + } + + build_segments(&chars, &altered, text.len()) + } +} + +/// Groups maximal runs of altered characters into ligature segments. +/// +/// Runs of length 2..=[`MAX_LIGATURE_CELLS`] become a single ligature segment; +/// everything else is emitted as one single-cell segment per character. +fn build_segments(chars: &[(usize, char)], altered: &[bool], text_len: usize) -> Vec { + let n = chars.len(); + let mut segments = Vec::with_capacity(n); + let mut i = 0; + while i < n { + if altered[i] { + let mut j = i + 1; + while j < n && altered[j] { + j += 1; + } + let run = j - i; + if (2..=MAX_LIGATURE_CELLS as usize).contains(&run) { + let start = chars[i].0; + let end = if j < n { chars[j].0 } else { text_len }; + segments.push(Segment { + start, + len: end - start, + cells: run as u8, + ligated: true, + }); + i = j; + continue; + } + } + + let start = chars[i].0; + let end = if i + 1 < n { chars[i + 1].0 } else { text_len }; + segments.push(Segment { start, len: end - start, cells: 1, ligated: false }); + i += 1; + } + segments +} + +/// Returns true if the face's GSUB table exposes `liga` or `calt` features. +fn face_has_ligature_features(face: &Face<'_>) -> bool { + let Some(gsub) = face.tables().gsub else { + return false; + }; + gsub.features + .into_iter() + .any(|f| f.tag == LIGA || f.tag == CALT) +} + +/// One single-cell segment per character of `text`. +fn per_char_segments(text: &str) -> Vec { + let mut segments = Vec::new(); + push_per_char(&mut segments, text, 0, text.len()); + segments +} + +/// Appends one single-cell segment per character of `text[start..end]`. +fn push_per_char(segments: &mut Vec, text: &str, start: usize, end: usize) { + let mut offset = start; + for ch in text[start..end].chars() { + let len = ch.len_utf8(); + segments.push(Segment { start: offset, len, cells: 1, ligated: false }); + offset += len; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rejects_woff2() { + let bytes = b"wOF2\x00\x00\x00\x00"; + assert!(matches!( + Shaper::from_bytes(bytes), + Err(ShaperError::CompressedFont) + )); + } + + #[test] + fn rejects_garbage() { + let bytes = b"not a font at all"; + assert!(matches!( + Shaper::from_bytes(bytes), + Err(ShaperError::ParseFailed) + )); + } + + #[test] + fn per_char_segments_cover_text() { + let segs = per_char_segments("a=>b"); + assert_eq!(segs.len(), 4); + assert!(segs.iter().all(|s| s.cells == 1 && !s.ligated)); + assert_eq!(segs[0].start, 0); + assert_eq!(segs[3].start, 3); + } + + /// The memoized `segment()` returns results identical to the uncached path, + /// on both the first (miss) and second (hit) call. Requires a ligature font. + #[test] + fn cache_returns_identical_segments() { + let Ok(path) = std::env::var("BEAMTERM_LIGATURE_TEST_FONT") else { + return; + }; + let bytes = std::fs::read(path).expect("read test font"); + let shaper = Shaper::from_bytes(&bytes).expect("parse test font"); + + for text in ["a => b", "x != y", "plain text", "let v = vec![];"] { + let uncached = shaper.segment_uncached(text); + let first = shaper.segment(text); // miss → populates cache + let second = shaper.segment(text); // hit → from cache + assert_eq!(first, uncached, "miss diverges from uncached for {text:?}"); + assert_eq!(second, uncached, "hit diverges from uncached for {text:?}"); + } + + // A run that ligates must still report the ligature on the cached call. + let _ = shaper.segment("a => b"); + assert!( + shaper.segment("a => b").iter().any(|s| s.ligated), + "cached call lost the ligature" + ); + } + + /// Exercises real shaping when a ligature font is available on disk. + /// Set `BEAMTERM_LIGATURE_TEST_FONT` to an sfnt (.ttf/.otf) path to run. + #[test] + fn shapes_ligatures_when_font_provided() { + let Ok(path) = std::env::var("BEAMTERM_LIGATURE_TEST_FONT") else { + return; + }; + let bytes = std::fs::read(path).expect("read test font"); + let shaper = Shaper::from_bytes(&bytes).expect("parse test font"); + assert!( + shaper.has_ligatures(), + "test font should advertise liga/calt" + ); + + let ligature_of = |text: &str| -> Option<(String, u8)> { + let segs = shaper.segment(text); + // total cells must always equal the source character count + let total: usize = segs.iter().map(|s| s.cells as usize).sum(); + assert_eq!( + total, + text.chars().count(), + "cell total mismatch for {text:?}: {segs:?}" + ); + segs.iter() + .find(|s| s.ligated) + .map(|s| (text[s.start..s.start + s.len].to_string(), s.cells)) + }; + + for (input, want_text, want_cells) in + [("a => b", "=>", 2u8), ("a -> b", "->", 2), ("x != y", "!=", 2), ("x === y", "===", 3)] + { + let got = ligature_of(input); + eprintln!("{input:?} -> ligature {got:?}"); + assert_eq!( + got, + Some((want_text.to_string(), want_cells)), + "unexpected ligature for {input:?}" + ); + } + + // A lone '=' between spaces must NOT ligate. + assert!( + shaper.segment("a = b").iter().all(|s| !s.ligated), + "lone = should not ligate" + ); + } +} diff --git a/beamterm-core/src/gl/terminal_grid.rs b/beamterm-core/src/gl/terminal_grid.rs index 86ad300..e29b589 100644 --- a/beamterm-core/src/gl/terminal_grid.rs +++ b/beamterm-core/src/gl/terminal_grid.rs @@ -1,4 +1,8 @@ -use std::{cmp::min, fmt::Debug}; +use std::{ + cmp::min, + collections::{HashSet, VecDeque}, + fmt::Debug, +}; use beamterm_data::{CellSize, FontAtlasData, FontStyle, Glyph, GlyphEffect, TerminalSize}; use compact_str::CompactString; @@ -9,7 +13,7 @@ use crate::{ error::Error, gl::{ CellIterator, CellQuery, Drawable, GlState, RenderContext, ShaderProgram, - atlas::{self, FontAtlas, GlyphSlot}, + atlas::{self, FontAtlas, GlyphSlot, ShapedSegment}, buffer_upload_array, dirty_regions::DirtyRegions, selection::SelectionTracker, @@ -312,10 +316,10 @@ impl TerminalGrid { .map_or(space_glyph, |slot| slot.slot_id()); // translate existing glyph ids to new atlas - let mut skip_next = false; + let mut skip = 0usize; for idx in 0..self.cells.len() { - if skip_next { - skip_next = false; + if skip > 0 { + skip -= 1; continue; } @@ -336,7 +340,20 @@ impl TerminalGrid { // update right-half in next cell if within bounds if let Some(next_cell) = self.cells.get_mut(idx + 1) { next_cell.set_glyph_id(id + 1); - skip_next = true; + skip = 1; + } + }, + Some(GlyphSlot::Ligature(id, cells)) => { + // place the ligature's consecutive halves across `cells` cells + self.cells[idx].set_glyph_id(id); + for i in 1..cells as usize { + match self.cells.get_mut(idx + i) { + Some(c) => { + c.set_glyph_id(id + i as u16); + skip += 1; + }, + None => break, + } } }, None => { @@ -542,8 +559,9 @@ impl TerminalGrid { let atlas = &mut self.atlas; let cell_buf = &mut self.cells; - // handle double-width emoji that span two cells - let mut pending_cell: Option = None; + // handle multi-cell glyphs (wide emoji/CJK span 2 cells, ligatures span N); + // their trailing halves are queued and consumed on subsequent cells. + let mut pending: VecDeque = VecDeque::new(); cell_buf .iter_mut() .zip(cells) @@ -552,15 +570,23 @@ impl TerminalGrid { .resolve_glyph_slot(data.symbol, data.style_bits) .unwrap_or(fallback_glyph); - *cell = if let Some(second_cell) = pending_cell.take() { - second_cell + *cell = if let Some(next_half) = pending.pop_front() { + next_half } else { match glyph { GlyphSlot::Normal(id) => CellDynamic::new(id, data.fg, data.bg), GlyphSlot::Wide(id) | GlyphSlot::Emoji(id) => { // storing a double-width glyph, reserve next cell with right-half id - pending_cell = Some(CellDynamic::new(id + 1, data.fg, data.bg)); + pending.push_back(CellDynamic::new(id + 1, data.fg, data.bg)); + CellDynamic::new(id, data.fg, data.bg) + }, + + GlyphSlot::Ligature(id, cells) => { + // reserve the trailing halves for the following cells + for i in 1..cells as u16 { + pending.push_back(CellDynamic::new(id + i, data.fg, data.bg)); + } CellDynamic::new(id, data.fg, data.bg) }, } @@ -605,15 +631,15 @@ impl TerminalGrid { // ratatui and beamterm can disagree on which emoji // are double-width (beamterm assumes double-width for all emoji), - // so for ratatui and similar clients we need to skip the next cell - // if we just wrote a double-width emoji in the current cell. - let mut skip_idx = None; + // so for ratatui and similar clients we need to skip the trailing cells + // that were already written as the halves of a previous multi-cell glyph. + let mut skip: HashSet = HashSet::new(); cells .filter(|(idx, _)| *idx < cell_count) .for_each(|(idx, cell)| { - if skip_idx.take() == Some(idx) { - // skip this cell, already handled as part of previous double-width emoji + if skip.remove(&idx) { + // skip this cell, already handled as part of a previous multi-cell glyph return; } @@ -636,7 +662,25 @@ impl TerminalGrid { if let Some(c) = cell_buf.get_mut(idx + 1) { *c = CellDynamic::new(id + 1, cell.fg, cell.bg); dirty_regions.mark(idx + 1); - skip_idx = Some(idx + 1); + skip.insert(idx + 1); + } + }, + + GlyphSlot::Ligature(id, cells) => { + // render leftmost half in current cell, trailing halves after + cell_buf[idx] = CellDynamic::new(id, cell.fg, cell.bg); + dirty_regions.mark(idx); + + for i in 1..cells as usize { + let j = idx + i; + match cell_buf.get_mut(j) { + Some(c) => { + *c = CellDynamic::new(id + i as u16, cell.fg, cell.bg); + dirty_regions.mark(j); + skip.insert(j); + }, + None => break, + } } }, } @@ -665,6 +709,68 @@ impl TerminalGrid { self.update_cells_by_index(std::iter::once((idx, cell_data))) } + /// Configures ligature shaping for the active atlas from raw sfnt font bytes. + /// + /// # Errors + /// Returns an error if the bytes cannot be parsed as a font face. + pub fn set_font_shaper_bytes(&mut self, bytes: &[u8]) -> Result<(), Error> { + self.atlas.set_font_shaper_bytes(bytes) + } + + /// Segments a horizontal text run into ligature-aware spans. + /// + /// Returns `None` when the atlas has no ligature support, in which case the + /// caller should render the run grapheme-by-grapheme. When `Some`, segments + /// with `cells >= 2` are ligatures; place two-cell ligatures with + /// [`update_cell`](Self::update_cell) (the two-character symbol resolves + /// through the wide path) and wider ones with + /// [`place_ligature`](Self::place_ligature). + #[must_use] + pub fn segment_run(&self, text: &str) -> Option> { + self.atlas.segment_run(text) + } + + /// Places a ligature glyph spanning three or more columns starting at (x, y). + /// + /// `cell` carries the symbol (the ligature substring), style bits, and + /// colors; `cells` is the number of columns the ligature spans. + /// + /// # Errors + /// Infallible today; returns `Result` for API consistency. + pub fn place_ligature( + &mut self, + x: u16, + y: u16, + cell: CellData, + cells: u8, + ) -> Result<(), Error> { + let cols = self.terminal_size.cols as usize; + let idx = y as usize * cols + x as usize; + if idx >= self.cells.len() { + return Ok(()); + } + + let slot = self + .atlas + .resolve_ligature_slot(cell.symbol, cell.style_bits, cells) + .unwrap_or(GlyphSlot::Normal(self.fallback_glyph)); + + let span = slot.cell_span() as usize; + let base = slot.slot_id(); + self.cells[idx] = CellDynamic::new(base, cell.fg, cell.bg); + self.dirty_regions.mark(idx); + for i in 1..span { + match self.cells.get_mut(idx + i) { + Some(c) => { + *c = CellDynamic::new(base + i as u16, cell.fg, cell.bg); + self.dirty_regions.mark(idx + i); + }, + None => break, + } + } + Ok(()) + } + /// Flushes pending cell updates to the GPU. /// /// This also flushes any pending glyph data in the atlas texture diff --git a/beamterm-renderer/Cargo.toml b/beamterm-renderer/Cargo.toml index d8d13d8..a00b1e2 100644 --- a/beamterm-renderer/Cargo.toml +++ b/beamterm-renderer/Cargo.toml @@ -88,7 +88,13 @@ features = [ ] [features] +# Ligature shaping (pulls rustybuzz via beamterm-core). Optional so that +# renderer consumers that don't need it (e.g. the demo examples) don't compile +# rustybuzz. Enabled by `js-api` since the browser package ships ligatures. +ligatures = ["beamterm-core/ligatures"] js-api = [ + "ligatures", + "compact_str/serde", "dep:serde", diff --git a/beamterm-renderer/src/gl/canvas_rasterizer.rs b/beamterm-renderer/src/gl/canvas_rasterizer.rs index 3a0cf64..a1f20e8 100644 --- a/beamterm-renderer/src/gl/canvas_rasterizer.rs +++ b/beamterm-renderer/src/gl/canvas_rasterizer.rs @@ -30,8 +30,10 @@ //! } //! ``` +use beamterm_core::gl::MAX_LIGATURE_CELLS; use beamterm_data::{FontAtlasData, FontStyle}; use compact_str::CompactString; +use unicode_width::UnicodeWidthStr; use wasm_bindgen::prelude::*; use web_sys::{OffscreenCanvas, OffscreenCanvasRenderingContext2d}; @@ -42,6 +44,13 @@ const PADDING: u32 = FontAtlasData::PADDING as u32; const OFFSCREEN_CANVAS_WIDTH: u32 = 256; +/// Number of cells a grapheme occupies on the canvas, clamped to the maximum +/// supported glyph span. Ligature substrings (e.g. `===`) report their full +/// character count; emoji/CJK report 2; ordinary text reports 1. +fn cell_span(grapheme: &str) -> u32 { + (UnicodeWidthStr::width(grapheme).max(1) as u32).min(u32::from(MAX_LIGATURE_CELLS)) +} + /// Number of glyphs per rasterization batch. /// Canvas height is scaled to fit this many glyphs. const GLYPH_BATCH_SIZE: usize = 32; @@ -96,8 +105,12 @@ impl CanvasRasterizer { let cell_metrics = Self::measure_cell_metrics(&ctx)?; - // Resize canvas to fit GLYPH_BATCH_SIZE glyphs + // Resize canvas to fit GLYPH_BATCH_SIZE glyphs vertically and the widest + // possible glyph (a MAX_LIGATURE_CELLS-wide ligature) horizontally. let required_height = GLYPH_BATCH_SIZE as u32 * cell_metrics.padded_height; + let required_width = + (cell_metrics.padded_width * u32::from(MAX_LIGATURE_CELLS)).max(OFFSCREEN_CANVAS_WIDTH); + canvas.set_width(required_width); canvas.set_height(required_height); // Re-initialize context after resize (canvas resize clears context state) @@ -149,8 +162,9 @@ impl CanvasRasterizer { let num_glyphs = symbols.len() as u32; - // canvas needs to be double-width (for emoji) and tall enough for all glyphs - let canvas_width = cell_w * 2; + // canvas must be wide enough for the widest glyph (emoji=2, ligatures up + // to MAX_LIGATURE_CELLS) and tall enough for all glyphs + let canvas_width = cell_w * u32::from(MAX_LIGATURE_CELLS); let canvas_height = cell_h * num_glyphs; self.render_ctx.clear_rect( @@ -207,8 +221,7 @@ impl CanvasRasterizer { let mut results = Vec::with_capacity(symbols.len()); for (i, &(grapheme, _)) in symbols.iter().enumerate() { - let padded_width = - if beamterm_core::is_double_width(grapheme) { cell_w * 2 } else { cell_w }; + let padded_width = cell_w * cell_span(grapheme); let glyph_start = i * glyph_stride; let mut pixels = Vec::with_capacity((padded_width * cell_h) as usize * bytes_per_pixel); diff --git a/beamterm-renderer/src/terminal.rs b/beamterm-renderer/src/terminal.rs index 2124263..591b1eb 100644 --- a/beamterm-renderer/src/terminal.rs +++ b/beamterm-renderer/src/terminal.rs @@ -294,6 +294,25 @@ impl Terminal { Ok(()) } + /// Enables ligature shaping for the current dynamic atlas from raw sfnt bytes. + /// + /// The bytes must be raw TrueType/OpenType (decompress WOFF/WOFF2 first) and + /// match the font being rendered. Ligatures activate automatically when the + /// font advertises them. This is a no-op for static atlases or fonts without + /// ligature tables. Re-apply after [`replace_with_dynamic_atlas`] when the + /// font changes. + /// + /// [`replace_with_dynamic_atlas`]: Self::replace_with_dynamic_atlas + /// + /// # Errors + /// Returns an error if the bytes cannot be parsed as a font face. + pub fn set_font_bytes(&mut self, bytes: &[u8]) -> Result<(), Error> { + self.grid + .borrow_mut() + .set_font_shaper_bytes(bytes)?; + Ok(()) + } + /// Returns the textual content of the specified cell selection. pub fn get_text(&self, selection: CellQuery) -> CompactString { self.grid.borrow().get_text(selection) diff --git a/beamterm-renderer/src/wasm.rs b/beamterm-renderer/src/wasm.rs index ab56bfe..6076d4d 100644 --- a/beamterm-renderer/src/wasm.rs +++ b/beamterm-renderer/src/wasm.rs @@ -370,7 +370,13 @@ impl Batch { } } - /// Write text to the terminal + /// Write text to the terminal. + /// + /// When the atlas has a ligature shaper configured (see + /// [`setFontBytes`](BeamtermRenderer::set_font_bytes)) and the font ligates, + /// the run is segmented so sequences like `=>`, `->`, `===` and `<==>` render + /// as single multi-cell ligature glyphs. Otherwise the run is written + /// grapheme-by-grapheme as before. #[wasm_bindgen(js_name = "text")] pub fn text(&mut self, x: u16, y: u16, text: &str, style: &CellStyle) -> Result<(), JsValue> { let mut terminal_grid = self.terminal_grid.borrow_mut(); @@ -380,6 +386,30 @@ impl Batch { return Ok(()); // oob, ignore } + // ligature-aware path: segment the run and place ligatures as wide glyphs + if let Some(segments) = terminal_grid.segment_run(text) { + let mut col = x; + for seg in segments { + if col >= ts.cols { + break; + } + let sub = &text[seg.start..seg.start + seg.len]; + let cell = CellData::new_with_style_bits(sub, style.style_bits, style.fg, style.bg); + if seg.cells >= 3 { + terminal_grid + .place_ligature(col, y, cell, seg.cells) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + } else { + // 1- or 2-cell: the wide path handles 2-char ligatures + terminal_grid + .update_cell(col, y, cell) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + } + col += seg.cells as u16; + } + return Ok(()); + } + let mut col_offset: u16 = 0; for ch in text.graphemes(true) { let char_width = if ch.len() == 1 { 1 } else { ch.width() }; @@ -617,6 +647,25 @@ impl BeamtermRenderer { Ok(BeamtermRenderer { terminal }) } + /// Enable programming ligatures by supplying the active font's raw bytes. + /// + /// `font_bytes` must be raw TrueType/OpenType (sfnt) data. WOFF/WOFF2 must be + /// decompressed first (the `@beamterm/renderer` JS package provides a helper). + /// The bytes must match the font passed to [`withDynamicAtlas`](Self::with_dynamic_atlas). + /// + /// Ligatures (`=>`, `->`, `===`, `<==>`, …) activate automatically when the + /// font advertises them. Re-call after + /// [`replaceWithDynamicAtlas`](Self::replace_with_dynamic_atlas) on font change. + /// + /// # Errors + /// Returns an error if the bytes cannot be parsed as a font face. + #[wasm_bindgen(js_name = "setFontBytes")] + pub fn set_font_bytes(&mut self, font_bytes: &[u8]) -> Result<(), JsValue> { + self.terminal + .set_font_bytes(font_bytes) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + /// Enable default mouse selection behavior with built-in copy to clipboard #[wasm_bindgen(js_name = "enableSelection")] pub fn enable_selection( diff --git a/beamterm-unicode/src/lib.rs b/beamterm-unicode/src/lib.rs index e7873e2..714fdd7 100644 --- a/beamterm-unicode/src/lib.rs +++ b/beamterm-unicode/src/lib.rs @@ -20,8 +20,12 @@ pub fn is_emoji(s: &str) -> bool { // ASCII (1 byte, U+0000–U+007F): single ASCII is never emoji, but // multi-codepoint sequences starting with ASCII can be (e.g. keycap "1️⃣"). + // Such sequences always carry a non-ASCII continuation (U+FE0F / U+20E3); + // a pure-ASCII run is never emoji. Without the non-ASCII guard, a 2-char + // ligature substring like "->" or "==" (len > 1, width 2) is misclassified + // as emoji and gets the texture-color flag, rendering white instead of fg. if first_byte < 0x80 { - return s.len() > 1 && s.width() >= 2; + return s.len() > 1 && s.width() >= 2 && bytes.iter().any(|&b| b >= 0x80); } // 2-byte UTF-8 (U+0080–U+07FF): no emoji exist in this range. @@ -147,6 +151,21 @@ mod tests { // Not emoji assert!(!is_emoji("A")); assert!(!is_emoji("\u{2588}")); + + // ASCII-led keycap sequences (digit + U+FE0F + U+20E3) ARE emoji. + assert!(is_emoji("1\u{FE0F}\u{20E3}")); + assert!(is_emoji("#\u{FE0F}\u{20E3}")); + + // Pure-ASCII ligature substrings are NOT emoji, even though they are + // multi-char and width 2 — regression guard for ligatures rendering + // white (Emoji slot sets the texture-color flag instead of tinting fg). + assert!(!is_emoji("->")); + assert!(!is_emoji("=>")); + assert!(!is_emoji("<-")); + assert!(!is_emoji("==")); + assert!(!is_emoji("&&")); + assert!(!is_emoji("|>")); + assert!(!is_emoji("::")); } #[test] diff --git a/js/README.md b/js/README.md index f0abb32..88c0c6d 100644 --- a/js/README.md +++ b/js/README.md @@ -103,6 +103,33 @@ Swap the font atlas at runtime. Existing cell content is preserved and translate | `replaceWithDynamicAtlas(fontFamilies, fontSize)` | Switch to a dynamic atlas with the given fonts | | `replaceWithStaticAtlas(atlasData?)` | Switch to a static atlas (`Uint8Array` or `null` for default) | +#### Ligatures + +Programming ligatures (`=>`, `->`, `!=`, `===`, `<==>`, …) render when the active +font ships ligature tables (Fira Code, JetBrains Mono, Cascadia Code, Monaspace Neon) +and you supply the font's raw bytes so beamterm can shape text runs. + +| Method | Description | +| ---------------------------- | ------------------------------------------------------------------------------------------- | +| `setFontBytes(fontBytes)` | Enable ligatures from the active font's raw **sfnt** (`.ttf`/`.otf`) bytes (`Uint8Array`) | + +Notes: + +- The bytes must be raw TrueType/OpenType. **WOFF/WOFF2 must be decompressed to sfnt first** + (`setFontBytes` rejects compressed containers). Use a small woff2 decoder, or fetch a `.ttf`. +- The bytes must match the font passed to `withDynamicAtlas`. Ligatures activate automatically + when the font advertises them — there is no separate on/off flag. +- Re-call `setFontBytes` after `replaceWithDynamicAtlas` when the font changes. +- Only the dynamic atlas supports ligatures (the static atlas is pre-rasterized). + +```javascript +const renderer = BeamtermRenderer.withDynamicAtlas('#terminal', ['Fira Code'], 16.0); + +// `fontBytes` is a Uint8Array of raw .ttf/.otf data (decompress woff2 beforehand) +const fontBytes = new Uint8Array(await (await fetch('/fonts/FiraCode-Regular.ttf')).arrayBuffer()); +renderer.setFontBytes(fontBytes); +``` + #### Selection & Mouse | Method | Description |