diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java b/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java index 3c350b89373d..5b6007d49375 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java @@ -25,6 +25,7 @@ import java.util.Set; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.ArrayUtil; @@ -162,6 +163,14 @@ private TrackedFileStruct(TrackedFileStruct toCopy, boolean withStats, Set= 0, "Invalid first row ID: %s (must be >= 0)", newFirstRowId); + Preconditions.checkState(status != EntryStatus.ADDED, "Cannot set first row id on ADDED entry"); + Preconditions.checkState(firstRowId == null, "First row ID is already set"); + this.firstRowId = newFirstRowId; + } + void setManifestLocation(String location) { this.manifestLocation = location; } diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java index 0b725a39fb6b..b0ae0b8636e6 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import java.nio.ByteBuffer; @@ -28,6 +29,8 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; class TestTrackedFileStruct { private static final int WRITER_FORMAT_VERSION_V4 = 4; @@ -325,6 +328,48 @@ void testAllFileContentTypesSupported() { } } + @ParameterizedTest + @EnumSource( + value = FileContent.class, + names = {"DATA", "DATA_MANIFEST"}) + public void testSettingFirstRowId(FileContent content) { + TrackedFileStruct file = + new TrackedFileStruct( + existingTracking(), + content, + WRITER_FORMAT_VERSION_V4, + "s3://bucket/data/file", + FileFormat.PARQUET, + new PartitionData(Types.StructType.of()), + 0L, + 0L); + + file.setFirstRowId(42); + + assertThat(file.tracking().firstRowId()).isEqualTo(42L); + } + + @ParameterizedTest + @EnumSource( + value = FileContent.class, + names = {"EQUALITY_DELETES", "DELETE_MANIFEST"}) + public void testSettingFirstRowIdNotAllowedForSomeContentTypes(FileContent content) { + TrackedFileStruct file = + new TrackedFileStruct( + existingTracking(), + content, + WRITER_FORMAT_VERSION_V4, + "s3://bucket/data/file", + FileFormat.PARQUET, + new PartitionData(Types.StructType.of()), + 0L, + 0L); + + assertThatThrownBy(() -> file.setFirstRowId(42)) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set first row id on an equality delete or delete manifest entry"); + } + @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { TrackedFileStruct file = createFullTrackedFile(); @@ -412,6 +457,13 @@ private static PartitionData newPartition(int idBucket, String category) { return partition; } + private static TrackingStruct existingTracking() { + TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build(); + source.set(2, 10L); + source.set(3, 11L); + return (TrackingStruct) TrackingBuilder.from(source, 20L).build(); + } + static TrackedFileStruct createTrackedFileWithStats() { Types.StructType statsStruct = Types.StructType.of( diff --git a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java index 0d1803022a23..af6e5cf843e5 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java @@ -211,6 +211,63 @@ void testInheritFromNullIsNoOp() { assertThat(tracking.fileSequenceNumber()).isNull(); } + @Test + public void testFirstRowIdIsNullInitially() { + TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build(); + source.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L); + source.set(FILE_SEQUENCE_NUMBER_ORDINAL, 11L); + + assertThat(source.firstRowId()).isNull(); + + TrackingStruct existing = (TrackingStruct) TrackingBuilder.from(source, 20L).build(); + + assertThat(existing.firstRowId()).isNull(); + } + + @Test + public void testSettingFirstRowId() { + TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build(); + source.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L); + source.set(FILE_SEQUENCE_NUMBER_ORDINAL, 11L); + + TrackingStruct existing = (TrackingStruct) TrackingBuilder.from(source, 20L).build(); + existing.setFirstRowId(150L); + + assertThat(existing.firstRowId()).isEqualTo(150L); + } + + @Test + public void testSettingFirstRowIdForAddedEntry() { + TrackingStruct added = (TrackingStruct) TrackingBuilder.added(5L).build(); + + assertThatThrownBy(() -> added.setFirstRowId(12L)) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set first row id on ADDED entry"); + } + + @Test + public void testRejectSettingFirstRowIdIfAlreadySet() { + TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build(); + source.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L); + source.set(FILE_SEQUENCE_NUMBER_ORDINAL, 11L); + + TrackingStruct existing = (TrackingStruct) TrackingBuilder.from(source, 20L).build(); + existing.setFirstRowId(0L); + + assertThatThrownBy(() -> existing.setFirstRowId(200L)) + .isInstanceOf(IllegalStateException.class) + .hasMessage("First row ID is already set"); + } + + @Test + public void testInvalidFirstRowId() { + TrackingStruct tracking = (TrackingStruct) TrackingBuilder.added(5L).build(); + + assertThatThrownBy(() -> tracking.setFirstRowId(-1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid first row ID: -1 (must be >= 0)"); + } + private static Tracking createManifestTracking(long snapshotId, long sequenceNumber) { TrackingStruct tracking = new TrackingStruct(Tracking.schema()); tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id());