From 3adc2318d7601a7fca2e4c0deb6246204637fa04 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 27 Jun 2026 10:56:06 -0700 Subject: [PATCH 1/2] fix(transaction): don't let user properties override computed summary metrics Building a snapshot summary merged the user's `snapshot_properties` on top of the computed metrics, so a caller could overwrite a metric key such as `added-data-files` with an arbitrary value, corrupting the summary. A non-integer value then panicked `update_totals`, which parsed the added/removed deltas with `.expect()`. This matches iceberg-java's behavior on both points (verified against `SnapshotProducer.java` and `SnapshotSummary.java` on `main`): - `SnapshotSummary.Builder.build()` adds custom properties first, then `metrics.addTo(builder)`, so computed metrics win on a key collision. We now apply `snapshot_properties` first and let the computed metrics overwrite them. - `SnapshotProducer.updateTotal` wraps parsing in try/catch and skips the total on a `NumberFormatException`. `update_totals` now tolerates an unparseable added/removed value by skipping that total instead of panicking. Closes #2725 --- crates/iceberg/src/spec/snapshot_summary.rs | 80 ++++++++++++++++----- crates/iceberg/src/transaction/append.rs | 54 ++++++++++++++ crates/iceberg/src/transaction/snapshot.rs | 14 +++- 3 files changed, 130 insertions(+), 18 deletions(-) diff --git a/crates/iceberg/src/spec/snapshot_summary.rs b/crates/iceberg/src/spec/snapshot_summary.rs index 3fb4c47ea4..c0afb1a6d9 100644 --- a/crates/iceberg/src/spec/snapshot_summary.rs +++ b/crates/iceberg/src/spec/snapshot_summary.rs @@ -506,22 +506,31 @@ fn update_totals( }, }; - let added = summary - .additional_properties - .get(added_property) - .map_or(0, |value| { - value - .parse::() - .expect("must be parsable as it was just serialized") - }); - let removed = summary - .additional_properties - .get(removed_property) - .map_or(0, |value| { - value - .parse::() - .expect("must be parsable as it was just serialized") - }); + // Parse the added/removed deltas, tolerating an unparseable value by skipping + // the total entirely rather than panicking. Computed metrics always overwrite + // user-supplied summary properties (see `SnapshotProducer::summary`), so a bad + // value should only ever come from a previous snapshot's summary; matching + // iceberg-java's `updateTotal`, we ignore it instead of failing the commit. + let parse_delta = |property: &str| -> Option { + match summary.additional_properties.get(property) { + None => Some(0), + Some(value) => match value.parse::() { + Ok(v) => Some(v), + Err(parse_err) => { + tracing::warn!( + "Property '{property}' could not be parsed when computing '{total_property}': {parse_err}. \ + Skipping total computation.", + ); + None + } + }, + } + }; + + let (Some(added), Some(removed)) = (parse_delta(added_property), parse_delta(removed_property)) + else { + return; + }; let new_total = previous_total + added - removed; summary @@ -1156,6 +1165,45 @@ mod tests { } } + #[test] + fn test_update_totals_tolerates_unparseable_added_value() { + // A non-integer added value (which can survive in a previous snapshot's + // summary) must not panic the commit. Matching iceberg-java's `updateTotal` + // try/catch, the affected total is skipped while other totals still compute. + let prev_props: HashMap = [(TOTAL_DATA_FILES, "8"), (TOTAL_RECORDS, "80")] + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + + let previous_summary = Summary { + operation: Operation::Append, + additional_properties: prev_props, + }; + + let new_props: HashMap = + [(ADDED_DATA_FILES, "not-a-number"), (ADDED_RECORDS, "40")] + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + + let summary = Summary { + operation: Operation::Append, + additional_properties: new_props, + }; + + // Must not panic. + let updated = update_snapshot_summaries(summary, Some(&previous_summary), false).unwrap(); + let props = &updated.additional_properties; + + // The total whose added delta was unparseable is skipped... + assert!( + !props.contains_key(TOTAL_DATA_FILES), + "TOTAL_DATA_FILES should be skipped when its added value is unparseable", + ); + // ...while a sibling total with valid deltas still computes. + assert_eq!(props.get(TOTAL_RECORDS).unwrap(), "120"); + } + #[test] fn test_update_totals_computed_when_no_previous_summary() { let new_props: HashMap = [ diff --git a/crates/iceberg/src/transaction/append.rs b/crates/iceberg/src/transaction/append.rs index 36fde117ab..546e1a2d64 100644 --- a/crates/iceberg/src/transaction/append.rs +++ b/crates/iceberg/src/transaction/append.rs @@ -413,6 +413,60 @@ mod tests { ); } + #[tokio::test] + async fn test_snapshot_properties_cannot_override_computed_metrics() { + // A user-supplied snapshot property must not shadow a computed metric key + // such as `added-data-files`. Matching iceberg-java, the computed value + // wins, so the summary reflects the real count and a bad value can neither + // corrupt the summary nor panic total computation (see #2184-adjacent fix). + let table = make_v2_minimal_table(); + let tx = Transaction::new(&table); + + let mut snapshot_properties = HashMap::new(); + // Both a benign-but-wrong value and a non-integer value collide with + // computed metric keys; neither should reach the final summary. + snapshot_properties.insert("added-data-files".to_string(), "9999".to_string()); + snapshot_properties.insert("added-records".to_string(), "not-a-number".to_string()); + + let data_file = DataFileBuilder::default() + .content(DataContentType::Data) + .file_path("test/1.parquet".to_string()) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(100) + .record_count(1) + .partition_spec_id(table.metadata().default_partition_spec_id()) + .partition(Struct::from_iter([Some(Literal::long(300))])) + .build() + .unwrap(); + + let action = tx + .fast_append() + .set_snapshot_properties(snapshot_properties) + .add_data_files(vec![data_file]); + // Must not panic during total computation. + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_snapshot = if let TableUpdate::AddSnapshot { snapshot } = &updates[0] { + snapshot + } else { + unreachable!() + }; + let props = &new_snapshot.summary().additional_properties; + + // Computed metric wins over the user's colliding values. + assert_eq!( + props.get("added-data-files").unwrap(), + "1", + "computed added-data-files must override the user-supplied value" + ); + assert_eq!( + props.get("added-records").unwrap(), + "1", + "computed added-records must override the user-supplied non-integer value" + ); + } + #[tokio::test] async fn test_append_snapshot_properties() { let table = make_v2_minimal_table(); diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index 8e47226072..0a2621cc45 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -404,8 +404,18 @@ impl<'a> SnapshotProducer<'a> { let previous_snapshot = table_metadata.current_snapshot(); - let mut additional_properties = summary_collector.build(); - additional_properties.extend(self.snapshot_properties.clone()); + // User-supplied snapshot properties are applied first, then the computed + // metrics overwrite any colliding keys. This matches iceberg-java + // (`SnapshotProducer.summary`), where computed `added-*`/`total-*` values + // are written after user properties so a user cannot shadow them with a + // bad (or merely wrong) value that would corrupt the snapshot summary. + // User-supplied snapshot properties are applied first, then the computed + // metrics overwrite any colliding keys. This matches iceberg-java + // (`SnapshotProducer.summary`), where computed `added-*`/`total-*` values + // are written after user properties so a user cannot shadow them with a + // bad (or merely wrong) value that would corrupt the snapshot summary. + let mut additional_properties = self.snapshot_properties.clone(); + additional_properties.extend(summary_collector.build()); let summary = Summary { operation: snapshot_produce_operation.operation(), From 526004f977fec2ee0b7a4bb0d54532638f4132d5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 27 Jun 2026 11:03:12 -0700 Subject: [PATCH 2/2] fix: spelling unparseable -> unparsable to satisfy typos check --- crates/iceberg/src/spec/snapshot_summary.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/iceberg/src/spec/snapshot_summary.rs b/crates/iceberg/src/spec/snapshot_summary.rs index c0afb1a6d9..c9fd022724 100644 --- a/crates/iceberg/src/spec/snapshot_summary.rs +++ b/crates/iceberg/src/spec/snapshot_summary.rs @@ -506,7 +506,7 @@ fn update_totals( }, }; - // Parse the added/removed deltas, tolerating an unparseable value by skipping + // Parse the added/removed deltas, tolerating an unparsable value by skipping // the total entirely rather than panicking. Computed metrics always overwrite // user-supplied summary properties (see `SnapshotProducer::summary`), so a bad // value should only ever come from a previous snapshot's summary; matching @@ -1166,7 +1166,7 @@ mod tests { } #[test] - fn test_update_totals_tolerates_unparseable_added_value() { + fn test_update_totals_tolerates_unparsable_added_value() { // A non-integer added value (which can survive in a previous snapshot's // summary) must not panic the commit. Matching iceberg-java's `updateTotal` // try/catch, the affected total is skipped while other totals still compute. @@ -1195,10 +1195,10 @@ mod tests { let updated = update_snapshot_summaries(summary, Some(&previous_summary), false).unwrap(); let props = &updated.additional_properties; - // The total whose added delta was unparseable is skipped... + // The total whose added delta was unparsable is skipped... assert!( !props.contains_key(TOTAL_DATA_FILES), - "TOTAL_DATA_FILES should be skipped when its added value is unparseable", + "TOTAL_DATA_FILES should be skipped when its added value is unparsable", ); // ...while a sibling total with valid deltas still computes. assert_eq!(props.get(TOTAL_RECORDS).unwrap(), "120");