Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 100 additions & 4 deletions vortex-array/src/arrays/listview/rebuild.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,92 @@ impl ListViewArray {
})
}

// TODO(connor)[ListView]: We should benchmark if it is faster to use `take` on the elements
// instead of using a builder.
/// The inner function for `rebuild_zero_copy_to_list`, which rebuilds a `ListViewArray` piece
/// by piece.
/// Picks between [`rebuild_with_take`](Self::rebuild_with_take) and
/// [`rebuild_list_by_list`](Self::rebuild_list_by_list) based on element dtype and average
/// list size.
fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
&self,
) -> VortexResult<ListViewArray> {
let sizes_canonical = self.sizes().to_primitive();
let total: u64 = sizes_canonical
.as_slice::<S>()
.iter()
.map(|s| (*s).as_() as u64)
.sum();
if Self::should_use_take(total, self.len()) {
self.rebuild_with_take::<O, NewOffset, S>()
} else {
self.rebuild_list_by_list::<O, NewOffset, S>()
}
}

/// Returns `true` when we are confident that `rebuild_with_take` will
/// outperform `rebuild_list_by_list`.
///
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is good, would appreciate a small high-level comment here explaining the general gist of this, something like:

Rebuilding list by list can introduce unnecessary overhead if there are a lot of small lists. In those cases, it is better to model this rebuild as a take over indices.
Beyond that, the size of the elements can also determine which is faster. Generally, if the sizes of the lists are large, we want to do list-by-list `memcpy`s.

/// Take is dramatically faster for small lists (often 10-100×) because it
/// avoids per-list builder overhead. LBL is the safer default for larger
/// lists since its sequential memcpy scales well. We only choose take when
/// the average list size is small enough that take clearly dominates.
fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
if num_lists == 0 {
return true;
}
let avg = total_output_elements / num_lists as u64;
avg < 128
}

/// Rebuilds elements using a single bulk `take`: collect all element indices into a flat
/// `BufferMut<u64>`, perform a single `take`.
fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
&self,
) -> VortexResult<ListViewArray> {
let offsets_canonical = self.offsets().to_primitive();
let offsets_slice = offsets_canonical.as_slice::<O>();
let sizes_canonical = self.sizes().to_primitive();
let sizes_slice = sizes_canonical.as_slice::<S>();

let len = offsets_slice.len();

let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
let mut new_sizes = BufferMut::<S>::with_capacity(len);
let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());

let mut n_elements = NewOffset::zero();
for index in 0..len {
if !self.is_valid(index)? {
new_offsets.push(n_elements);
new_sizes.push(S::zero());
continue;
}

let offset = offsets_slice[index];
let size = sizes_slice[index];
let start = offset.as_();
let stop = start + size.as_();

new_offsets.push(n_elements);
new_sizes.push(size);
take_indices.extend(start as u64..stop as u64);
n_elements += num_traits::cast(size).vortex_expect("Cast failed");
}

let elements = self.elements().take(take_indices.into_array())?;
let offsets = new_offsets.into_array();
let sizes = new_sizes.into_array();

// SAFETY: same invariants as `rebuild_list_by_list` — offsets are sequential and
// non-overlapping, all (offset, size) pairs reference valid elements, and the validity
// array is preserved from the original.
Ok(unsafe {
ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
.with_zero_copy_to_list(true)
})
}

/// Rebuilds elements list-by-list: canonicalize elements upfront, then for each list `slice`
/// the relevant range and `extend_from_array` into a typed builder.
fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
&self,
) -> VortexResult<ListViewArray> {
let element_dtype = self
.dtype()
Expand Down Expand Up @@ -273,6 +353,7 @@ impl ListViewArray {
}

#[cfg(test)]
#[allow(clippy::cast_possible_truncation)]
mod tests {
use vortex_buffer::BitBuffer;
use vortex_error::VortexResult;
Expand Down Expand Up @@ -459,4 +540,19 @@ mod tests {
);
Ok(())
}

// ── should_use_take heuristic tests ────────────────────────────────────

#[test]
fn heuristic_zero_lists_uses_take() {
assert!(ListViewArray::should_use_take(0, 0));
}

#[test]
fn heuristic_small_lists_use_take() {
// avg = 127 → take
assert!(ListViewArray::should_use_take(127_000, 1_000));
// avg = 128 → LBL
assert!(!ListViewArray::should_use_take(128_000, 1_000));
}
}
Loading