Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions datafusion/common/src/utils/proxy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ pub trait HashTableAllocExt {
///
/// Returns the bucket where the element was inserted.
/// Note that allocation counts capacity, not size.
/// This method assumes that the element is not already present
///
/// # Example:
/// ```
Expand All @@ -134,7 +135,7 @@ pub trait HashTableAllocExt {
/// assert_eq!(allocated, 64);
///
/// // insert more values
/// for i in 0..100 {
/// for i in 2..100 {
/// table.insert_accounted(i, hash_fn, &mut allocated);
/// }
/// assert_eq!(allocated, 400);
Expand All @@ -161,22 +162,24 @@ where
) {
let hash = hasher(&x);

// NOTE: `find_entry` does NOT grow!
match self.find_entry(hash, |y| y == &x) {
Ok(_occupied) => {}
Err(_absent) => {
if self.len() == self.capacity() {
// need to request more memory
let bump_elements = self.capacity().max(16);
let bump_size = bump_elements * size_of::<T>();
*accounting = (*accounting).checked_add(bump_size).expect("overflow");
if cfg!(debug_assertions) {
// In debug mode, check that the element is not already present
debug_assert!(
self.find_entry(hash, |y| y == &x).is_err(),
"attempted to insert duplicate element into HashTableAllocExt::insert_accounted"
);
}

self.reserve(bump_elements, &hasher);
}
if self.len() == self.capacity() {
// need to request more memory
let bump_elements = self.capacity().max(16);
let bump_size = bump_elements * size_of::<T>();
*accounting = (*accounting).checked_add(bump_size).expect("overflow");

// still need to insert the element since first try failed
self.entry(hash, |y| y == &x, hasher).insert(x);
}
self.reserve(bump_elements, &hasher);
}

// still need to insert the element since first try failed
self.insert_unique(hash, x, hasher);
}
}
4 changes: 2 additions & 2 deletions datafusion/physical-expr-common/src/binary_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ where
// is value is already present in the set?
let entry = self.map.find_mut(hash, |header| {
// compare value if hashes match
if header.len != value_len {
if header.hash != hash {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment says "compare value if hashes match" but actual implementation is comparing lengths 🤔

return false;
}
// value is stored inline so no need to consult buffer
Expand Down Expand Up @@ -427,7 +427,7 @@ where
// Check if the value is already present in the set
let entry = self.map.find_mut(hash, |header| {
// compare value if hashes match
if header.len != value_len {
if header.hash != hash {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should save some random access if a lot of values share the same length

return false;
}
// Need to compare the bytes in the buffer
Expand Down
5 changes: 2 additions & 3 deletions datafusion/physical-expr-common/src/binary_view_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,11 +273,10 @@ where
let value: &[u8] = value.as_ref();

let entry = self.map.find_mut(hash, |header| {
let v = self.builder.get_value(header.view_idx);

if v.len() != value.len() {
if header.hash != hash {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should save some random access if a lot of values share the same length

return false;
}
let v = self.builder.get_value(header.view_idx);

v == value
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ where
let hash = key.hash(state);
let insert = self.map.entry(
hash,
|&(g, _)| unsafe { self.values.get_unchecked(g).is_eq(key) },
|&(g, h)| unsafe {
hash == h && self.values.get_unchecked(g).is_eq(key)
},
|&(_, h)| h,
);

Expand Down