diff --git a/crates/iceberg/src/spec/snapshot_summary.rs b/crates/iceberg/src/spec/snapshot_summary.rs index 3fb4c47ea4..c9fd022724 100644 --- a/crates/iceberg/src/spec/snapshot_summary.rs +++ b/crates/iceberg/src/spec/snapshot_summary.rs @@ -506,22 +506,31 @@ fn update_totals( }, }; - let added = summary - .additional_properties - .get(added_property) - .map_or(0, |value| { - value - .parse::() - .expect("must be parsable as it was just serialized") - }); - let removed = summary - .additional_properties - .get(removed_property) - .map_or(0, |value| { - value - .parse::() - .expect("must be parsable as it was just serialized") - }); + // Parse the added/removed deltas, tolerating an unparsable value by skipping + // the total entirely rather than panicking. Computed metrics always overwrite + // user-supplied summary properties (see `SnapshotProducer::summary`), so a bad + // value should only ever come from a previous snapshot's summary; matching + // iceberg-java's `updateTotal`, we ignore it instead of failing the commit. + let parse_delta = |property: &str| -> Option { + match summary.additional_properties.get(property) { + None => Some(0), + Some(value) => match value.parse::() { + Ok(v) => Some(v), + Err(parse_err) => { + tracing::warn!( + "Property '{property}' could not be parsed when computing '{total_property}': {parse_err}. \ + Skipping total computation.", + ); + None + } + }, + } + }; + + let (Some(added), Some(removed)) = (parse_delta(added_property), parse_delta(removed_property)) + else { + return; + }; let new_total = previous_total + added - removed; summary @@ -1156,6 +1165,45 @@ mod tests { } } + #[test] + fn test_update_totals_tolerates_unparsable_added_value() { + // A non-integer added value (which can survive in a previous snapshot's + // summary) must not panic the commit. Matching iceberg-java's `updateTotal` + // try/catch, the affected total is skipped while other totals still compute. + let prev_props: HashMap = [(TOTAL_DATA_FILES, "8"), (TOTAL_RECORDS, "80")] + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + + let previous_summary = Summary { + operation: Operation::Append, + additional_properties: prev_props, + }; + + let new_props: HashMap = + [(ADDED_DATA_FILES, "not-a-number"), (ADDED_RECORDS, "40")] + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + + let summary = Summary { + operation: Operation::Append, + additional_properties: new_props, + }; + + // Must not panic. + let updated = update_snapshot_summaries(summary, Some(&previous_summary), false).unwrap(); + let props = &updated.additional_properties; + + // The total whose added delta was unparsable is skipped... + assert!( + !props.contains_key(TOTAL_DATA_FILES), + "TOTAL_DATA_FILES should be skipped when its added value is unparsable", + ); + // ...while a sibling total with valid deltas still computes. + assert_eq!(props.get(TOTAL_RECORDS).unwrap(), "120"); + } + #[test] fn test_update_totals_computed_when_no_previous_summary() { let new_props: HashMap = [ diff --git a/crates/iceberg/src/transaction/append.rs b/crates/iceberg/src/transaction/append.rs index 36fde117ab..546e1a2d64 100644 --- a/crates/iceberg/src/transaction/append.rs +++ b/crates/iceberg/src/transaction/append.rs @@ -413,6 +413,60 @@ mod tests { ); } + #[tokio::test] + async fn test_snapshot_properties_cannot_override_computed_metrics() { + // A user-supplied snapshot property must not shadow a computed metric key + // such as `added-data-files`. Matching iceberg-java, the computed value + // wins, so the summary reflects the real count and a bad value can neither + // corrupt the summary nor panic total computation (see #2184-adjacent fix). + let table = make_v2_minimal_table(); + let tx = Transaction::new(&table); + + let mut snapshot_properties = HashMap::new(); + // Both a benign-but-wrong value and a non-integer value collide with + // computed metric keys; neither should reach the final summary. + snapshot_properties.insert("added-data-files".to_string(), "9999".to_string()); + snapshot_properties.insert("added-records".to_string(), "not-a-number".to_string()); + + let data_file = DataFileBuilder::default() + .content(DataContentType::Data) + .file_path("test/1.parquet".to_string()) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(100) + .record_count(1) + .partition_spec_id(table.metadata().default_partition_spec_id()) + .partition(Struct::from_iter([Some(Literal::long(300))])) + .build() + .unwrap(); + + let action = tx + .fast_append() + .set_snapshot_properties(snapshot_properties) + .add_data_files(vec![data_file]); + // Must not panic during total computation. + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_snapshot = if let TableUpdate::AddSnapshot { snapshot } = &updates[0] { + snapshot + } else { + unreachable!() + }; + let props = &new_snapshot.summary().additional_properties; + + // Computed metric wins over the user's colliding values. + assert_eq!( + props.get("added-data-files").unwrap(), + "1", + "computed added-data-files must override the user-supplied value" + ); + assert_eq!( + props.get("added-records").unwrap(), + "1", + "computed added-records must override the user-supplied non-integer value" + ); + } + #[tokio::test] async fn test_append_snapshot_properties() { let table = make_v2_minimal_table(); diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index 8e47226072..0a2621cc45 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -404,8 +404,18 @@ impl<'a> SnapshotProducer<'a> { let previous_snapshot = table_metadata.current_snapshot(); - let mut additional_properties = summary_collector.build(); - additional_properties.extend(self.snapshot_properties.clone()); + // User-supplied snapshot properties are applied first, then the computed + // metrics overwrite any colliding keys. This matches iceberg-java + // (`SnapshotProducer.summary`), where computed `added-*`/`total-*` values + // are written after user properties so a user cannot shadow them with a + // bad (or merely wrong) value that would corrupt the snapshot summary. + // User-supplied snapshot properties are applied first, then the computed + // metrics overwrite any colliding keys. This matches iceberg-java + // (`SnapshotProducer.summary`), where computed `added-*`/`total-*` values + // are written after user properties so a user cannot shadow them with a + // bad (or merely wrong) value that would corrupt the snapshot summary. + let mut additional_properties = self.snapshot_properties.clone(); + additional_properties.extend(summary_collector.build()); let summary = Summary { operation: snapshot_produce_operation.operation(),