Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions core/src/main/java/org/apache/iceberg/TrackedFileStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.Set;
import org.apache.iceberg.avro.SupportsIndexProjection;
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.ArrayUtil;
Expand Down Expand Up @@ -162,6 +163,14 @@ private TrackedFileStruct(TrackedFileStruct toCopy, boolean withStats, Set<Integ
: null;
}

/** Sets the first row id on this {@link TrackedFileStruct} */
public void setFirstRowId(int firstRowId) {
Preconditions.checkState(
contentType == FileContent.DATA || contentType == FileContent.DATA_MANIFEST,
"Cannot set first row id on an equality delete or delete manifest entry");
((TrackingStruct) tracking).setFirstRowId(firstRowId);
}

@Override
public Tracking tracking() {
return tracking;
Expand Down
8 changes: 8 additions & 0 deletions core/src/main/java/org/apache/iceberg/TrackingStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ void inheritFrom(Tracking manifestTracking) {
}
}

void setFirstRowId(long newFirstRowId) {
Preconditions.checkArgument(
newFirstRowId >= 0, "Invalid first row ID: %s (must be >= 0)", newFirstRowId);
Preconditions.checkState(status != EntryStatus.ADDED, "Cannot set first row id on ADDED entry");
Preconditions.checkState(firstRowId == null, "First row ID is already set");
this.firstRowId = newFirstRowId;
}

void setManifestLocation(String location) {
this.manifestLocation = location;
}
Expand Down
52 changes: 52 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.io.IOException;
import java.nio.ByteBuffer;
Expand All @@ -28,6 +29,8 @@
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.types.Types;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;

class TestTrackedFileStruct {
private static final int WRITER_FORMAT_VERSION_V4 = 4;
Expand Down Expand Up @@ -325,6 +328,48 @@ void testAllFileContentTypesSupported() {
}
}

@ParameterizedTest
@EnumSource(
value = FileContent.class,
names = {"DATA", "DATA_MANIFEST"})
public void testSettingFirstRowId(FileContent content) {
TrackedFileStruct file =
new TrackedFileStruct(
existingTracking(),
content,
WRITER_FORMAT_VERSION_V4,
"s3://bucket/data/file",
FileFormat.PARQUET,
new PartitionData(Types.StructType.of()),
0L,
0L);

file.setFirstRowId(42);

assertThat(file.tracking().firstRowId()).isEqualTo(42L);
}

@ParameterizedTest
@EnumSource(
value = FileContent.class,
names = {"EQUALITY_DELETES", "DELETE_MANIFEST"})
public void testSettingFirstRowIdNotAllowedForSomeContentTypes(FileContent content) {
TrackedFileStruct file =
new TrackedFileStruct(
existingTracking(),
content,
WRITER_FORMAT_VERSION_V4,
"s3://bucket/data/file",
FileFormat.PARQUET,
new PartitionData(Types.StructType.of()),
0L,
0L);

assertThatThrownBy(() -> file.setFirstRowId(42))
.isInstanceOf(IllegalStateException.class)
.hasMessage("Cannot set first row id on an equality delete or delete manifest entry");
}

@Test
void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException {
TrackedFileStruct file = createFullTrackedFile();
Expand Down Expand Up @@ -412,6 +457,13 @@ private static PartitionData newPartition(int idBucket, String category) {
return partition;
}

private static TrackingStruct existingTracking() {
TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build();
source.set(2, 10L);
source.set(3, 11L);
return (TrackingStruct) TrackingBuilder.from(source, 20L).build();
}

static TrackedFileStruct createTrackedFileWithStats() {
Types.StructType statsStruct =
Types.StructType.of(
Expand Down
57 changes: 57 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestTrackingStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,63 @@ void testInheritFromNullIsNoOp() {
assertThat(tracking.fileSequenceNumber()).isNull();
}

@Test
public void testFirstRowIdIsNullInitially() {
TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build();
source.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L);
source.set(FILE_SEQUENCE_NUMBER_ORDINAL, 11L);

assertThat(source.firstRowId()).isNull();

TrackingStruct existing = (TrackingStruct) TrackingBuilder.from(source, 20L).build();

assertThat(existing.firstRowId()).isNull();
}

@Test
public void testSettingFirstRowId() {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about adding a test case for setFirstRowId(0L) to demonstrate the boundary of the >= 0 check?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed one of the tests to use zero for input. Thx

TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build();
source.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L);
source.set(FILE_SEQUENCE_NUMBER_ORDINAL, 11L);

TrackingStruct existing = (TrackingStruct) TrackingBuilder.from(source, 20L).build();
existing.setFirstRowId(150L);

assertThat(existing.firstRowId()).isEqualTo(150L);
}

@Test
public void testSettingFirstRowIdForAddedEntry() {
TrackingStruct added = (TrackingStruct) TrackingBuilder.added(5L).build();

assertThatThrownBy(() -> added.setFirstRowId(12L))
.isInstanceOf(IllegalStateException.class)
.hasMessage("Cannot set first row id on ADDED entry");
}

@Test
public void testRejectSettingFirstRowIdIfAlreadySet() {
TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build();
source.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L);
source.set(FILE_SEQUENCE_NUMBER_ORDINAL, 11L);

TrackingStruct existing = (TrackingStruct) TrackingBuilder.from(source, 20L).build();
existing.setFirstRowId(0L);

assertThatThrownBy(() -> existing.setFirstRowId(200L))
.isInstanceOf(IllegalStateException.class)
.hasMessage("First row ID is already set");
}

@Test
public void testInvalidFirstRowId() {
TrackingStruct tracking = (TrackingStruct) TrackingBuilder.added(5L).build();

assertThatThrownBy(() -> tracking.setFirstRowId(-1L))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Invalid first row ID: -1 (must be >= 0)");
}

private static Tracking createManifestTracking(long snapshotId, long sequenceNumber) {
TrackingStruct tracking = new TrackingStruct(Tracking.schema());
tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id());
Expand Down