From 446bb8128618919ce481ff8db09bcc2fd91e7985 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 24 Apr 2026 16:02:33 -0600 Subject: [PATCH] perf: reduce per-node allocations in to_native_metric_node Pre-size the per-node HashMap and children Vec, and skip the empty MetricsSet allocation for nodes that produce no metrics. Net effect on the Rust side of the protobuf metric pipeline (Cargo bench, M-series laptop, release build): | plan shape | tree walk | tree walk + encode | | ------------------- | ------------- | ------------------ | | linear 3 x 5 mtx | 826 -> 614ns | 1.11 -> 1.03us | | linear 8 x 8 mtx | 3.95 -> 2.24us| 5.67 -> 4.30us | | linear 20 x 10 mtx | 11.1 -> 6.91us| 19.3 -> 15.9us | | join 2x5 chains x 8 | 5.46 -> 3.10us| 7.65 -> 5.71us | The HashMap pre-sizing dominates. Most operator metric maps are well under 20 entries (hash-join reports 9, native-scan ~20), so the literal 16 avoids the default-capacity rehash on virtually every node. Refs apache/datafusion-comet#4072. --- native/core/src/execution/metrics/utils.rs | 33 +++++++++++----------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/native/core/src/execution/metrics/utils.rs b/native/core/src/execution/metrics/utils.rs index eb7e10bfc9..94cc5892a4 100644 --- a/native/core/src/execution/metrics/utils.rs +++ b/native/core/src/execution/metrics/utils.rs @@ -45,11 +45,6 @@ pub(crate) fn update_comet_metric( pub(crate) fn to_native_metric_node( spark_plan: &Arc, ) -> Result { - let mut native_metric_node = NativeMetricNode { - metrics: HashMap::new(), - children: Vec::new(), - }; - let node_metrics = if spark_plan.additional_native_plans.is_empty() { spark_plan.native_plan.metrics() } else { @@ -68,18 +63,24 @@ pub(crate) fn to_native_metric_node( Some(metrics.aggregate_by_name()) }; - // add metrics - node_metrics - .unwrap_or_default() - .iter() - .map(|m| m.value()) - .map(|m| (m.name(), m.as_usize() as i64)) - .for_each(|(name, value)| { - native_metric_node.metrics.insert(name.to_string(), value); - }); + let children = spark_plan.children(); + let mut native_metric_node = NativeMetricNode { + // Most operator metric maps are well under 20 entries (e.g. hash-join: 9, + // native-scan: ~20). Pre-sizing to 16 avoids the default-capacity rehash. + metrics: HashMap::with_capacity(16), + children: Vec::with_capacity(children.len()), + }; + + if let Some(metrics) = node_metrics { + for m in metrics.iter() { + let value = m.value(); + native_metric_node + .metrics + .insert(value.name().to_string(), value.as_usize() as i64); + } + } - // add children - for child_plan in spark_plan.children() { + for child_plan in children { let child_node = to_native_metric_node(child_plan)?; native_metric_node.children.push(child_node); }