Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove deprecated iceberg.use-file-size-from-metadata catalog config property #24063

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions docs/src/main/sphinx/connector/iceberg.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,6 @@ implementation is used:
* `ZSTD`
* `GZIP`
- `ZSTD`
* - `iceberg.use-file-size-from-metadata`
- Read file sizes from metadata instead of file system. This property must
only be used as a workaround for [this
issue](https://github.com/apache/iceberg/issues/1980). The problem was fixed
in Iceberg version 0.11.0.
- `true`
* - `iceberg.max-partitions-per-writer`
- Maximum number of partitions handled per writer.
- `100`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
@DefunctConfig({
"iceberg.allow-legacy-snapshot-syntax",
"iceberg.experimental.extended-statistics.enabled",
"iceberg.use-file-size-from-metadata",
})
public class IcebergConfig
{
Expand All @@ -60,7 +61,6 @@ public class IcebergConfig

private IcebergFileFormat fileFormat = PARQUET;
private HiveCompressionCodec compressionCodec = ZSTD;
private boolean useFileSizeFromMetadata = true;
private int maxPartitionsPerWriter = 100;
private boolean uniqueTableLocation = true;
private CatalogType catalogType = HIVE_METASTORE;
Expand Down Expand Up @@ -130,27 +130,6 @@ public IcebergConfig setCompressionCodec(HiveCompressionCodec compressionCodec)
return this;
}

@Deprecated
public boolean isUseFileSizeFromMetadata()
{
return useFileSizeFromMetadata;
}

/**
* Some Iceberg writers populate incorrect file sizes in the metadata. When
* this property is set to false, Trino ignores the stored values and fetches
* them with a getFileStatus call. This means an additional call per split,
* so it is recommended for a Trino admin to fix the metadata, rather than
* relying on this property for too long.
*/
@Deprecated
@Config("iceberg.use-file-size-from-metadata")
public IcebergConfig setUseFileSizeFromMetadata(boolean useFileSizeFromMetadata)
{
this.useFileSizeFromMetadata = useFileSizeFromMetadata;
return this;
}

@Min(1)
public int getMaxPartitionsPerWriter()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@
import static io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy;
import static io.trino.plugin.iceberg.IcebergSessionProperties.isParquetIgnoreStatistics;
import static io.trino.plugin.iceberg.IcebergSessionProperties.isParquetVectorizedDecodingEnabled;
import static io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata;
import static io.trino.plugin.iceberg.IcebergSessionProperties.useParquetBloomFilter;
import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD;
import static io.trino.plugin.iceberg.IcebergSplitSource.partitionMatchesPredicate;
Expand Down Expand Up @@ -338,9 +337,7 @@ else if (identity.getId() == TRINO_MERGE_PARTITION_DATA) {
}

TrinoFileSystem fileSystem = fileSystemFactory.create(session.getIdentity(), fileIoProperties);
TrinoInputFile inputfile = isUseFileSizeFromMetadata(session)
? fileSystem.newInputFile(Location.of(path), fileSize)
: fileSystem.newInputFile(Location.of(path));
TrinoInputFile inputfile = fileSystem.newInputFile(Location.of(path));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fileSystem.newInputFile(Location.of(path), fileSize)


try {
if (effectivePredicate.isAll() &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ public final class IcebergSessionProperties
{
public static final String SPLIT_SIZE = "experimental_split_size";
private static final String COMPRESSION_CODEC = "compression_codec";
private static final String USE_FILE_SIZE_FROM_METADATA = "use_file_size_from_metadata";
private static final String ORC_BLOOM_FILTERS_ENABLED = "orc_bloom_filters_enabled";
private static final String ORC_MAX_MERGE_DISTANCE = "orc_max_merge_distance";
private static final String ORC_MAX_BUFFER_SIZE = "orc_max_buffer_size";
Expand Down Expand Up @@ -134,11 +133,6 @@ public IcebergSessionProperties(
HiveCompressionCodec.class,
icebergConfig.getCompressionCodec(),
false))
.add(booleanProperty(
USE_FILE_SIZE_FROM_METADATA,
"Use file size stored in Iceberg metadata",
icebergConfig.isUseFileSizeFromMetadata(),
false))
.add(booleanProperty(
ORC_BLOOM_FILTERS_ENABLED,
"ORC: Enable bloom filters for predicate pushdown",
Expand Down Expand Up @@ -498,11 +492,6 @@ public static HiveCompressionCodec getCompressionCodec(ConnectorSession session)
return session.getProperty(COMPRESSION_CODEC, HiveCompressionCodec.class);
}

public static boolean isUseFileSizeFromMetadata(ConnectorSession session)
{
return session.getProperty(USE_FILE_SIZE_FROM_METADATA, Boolean.class);
}

public static DataSize getParquetMaxReadBlockSize(ConnectorSession session)
{
return session.getProperty(PARQUET_MAX_READ_BLOCK_SIZE, DataSize.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4533,16 +4533,7 @@ public void testIncorrectIcebergFileSizes()
}
fileSystem.newOutputFile(Location.of(manifestFile)).createOrOverwrite(out.toByteArray());

// Ignoring Iceberg provided file size makes the query succeed
Session session = Session.builder(getSession())
.setCatalogSessionProperty("iceberg", "use_file_size_from_metadata", "false")
.build();
assertQuery(session, "SELECT * FROM test_iceberg_file_size", "VALUES (123), (456), (758)");

// Using Iceberg provided file size fails the query
assertQueryFails(
"SELECT * FROM test_iceberg_file_size",
"(Malformed ORC file\\. Invalid file metadata.*)|(.*Malformed Parquet file.*)");
assertQuery("SELECT * FROM test_iceberg_file_size", "VALUES (123), (456), (758)");

assertUpdate("DROP TABLE test_iceberg_file_size");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public void testCacheFileOperations()
assertFileSystemAccesses(
"SELECT * FROM test_cache_file_operations",
ImmutableMultiset.<CacheOperation>builder()
.addCopies(new CacheOperation("InputFile.length", DATA), 2)
.addCopies(new CacheOperation("Input.readFully", DATA), 2)
.addCopies(new CacheOperation("Alluxio.readCached", DATA), 2)
.addCopies(new CacheOperation("Alluxio.writeCache", DATA), 2)
Expand All @@ -105,6 +106,7 @@ public void testCacheFileOperations()
assertFileSystemAccesses(
"SELECT * FROM test_cache_file_operations",
ImmutableMultiset.<CacheOperation>builder()
.addCopies(new CacheOperation("InputFile.length", DATA), 2)
.addCopies(new CacheOperation("Alluxio.readCached", DATA), 2)
.add(new CacheOperation("Alluxio.readCached", METADATA_JSON))
.add(new CacheOperation("InputFile.length", METADATA_JSON))
Expand All @@ -120,6 +122,7 @@ public void testCacheFileOperations()
assertFileSystemAccesses(
"SELECT * FROM test_cache_file_operations",
ImmutableMultiset.<CacheOperation>builder()
.addCopies(new CacheOperation("InputFile.length", DATA), 5)
.addCopies(new CacheOperation("Input.readFully", DATA), 3)
.addCopies(new CacheOperation("Alluxio.readCached", DATA), 5)
.addCopies(new CacheOperation("Alluxio.writeCache", DATA), 3)
Expand All @@ -136,6 +139,7 @@ public void testCacheFileOperations()
assertFileSystemAccesses(
"SELECT * FROM test_cache_file_operations",
ImmutableMultiset.<CacheOperation>builder()
.addCopies(new CacheOperation("InputFile.length", DATA), 5)
.addCopies(new CacheOperation("Alluxio.readCached", DATA), 5)
.addCopies(new CacheOperation("Alluxio.readCached", METADATA_JSON), 2)
.addCopies(new CacheOperation("Alluxio.readCached", SNAPSHOT), 2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ public void testDefaults()
assertRecordedDefaults(recordDefaults(IcebergConfig.class)
.setFileFormat(PARQUET)
.setCompressionCodec(ZSTD)
.setUseFileSizeFromMetadata(true)
.setMaxPartitionsPerWriter(100)
.setUniqueTableLocation(true)
.setCatalogType(HIVE_METASTORE)
Expand Down Expand Up @@ -83,7 +82,6 @@ public void testExplicitPropertyMappings()
Map<String, String> properties = ImmutableMap.<String, String>builder()
.put("iceberg.file-format", "ORC")
.put("iceberg.compression-codec", "NONE")
.put("iceberg.use-file-size-from-metadata", "false")
.put("iceberg.max-partitions-per-writer", "222")
.put("iceberg.unique-table-location", "false")
.put("iceberg.catalog.type", "GLUE")
Expand Down Expand Up @@ -116,7 +114,6 @@ public void testExplicitPropertyMappings()
IcebergConfig expected = new IcebergConfig()
.setFileFormat(ORC)
.setCompressionCodec(HiveCompressionCodec.NONE)
.setUseFileSizeFromMetadata(false)
.setMaxPartitionsPerWriter(222)
.setUniqueTableLocation(false)
.setCatalogType(GLUE)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ public void testReadWholePartition()
.add(new FileOperation(SNAPSHOT, "InputFile.length"))
.add(new FileOperation(SNAPSHOT, "InputFile.newStream"))
.addCopies(new FileOperation(DATA, "InputFile.newInput"), 4)
.addCopies(new FileOperation(DATA, "InputFile.length"), 4)
.build());

// Read partition column only
Expand All @@ -336,6 +337,7 @@ public void testReadWholePartition()
.add(new FileOperation(METADATA_JSON, "InputFile.newStream"))
.add(new FileOperation(SNAPSHOT, "InputFile.length"))
.add(new FileOperation(SNAPSHOT, "InputFile.newStream"))
.addCopies(new FileOperation(DATA, "InputFile.length"), 4)
.build());

// Read partition column only, one partition only
Expand All @@ -347,6 +349,7 @@ public void testReadWholePartition()
.add(new FileOperation(METADATA_JSON, "InputFile.newStream"))
.add(new FileOperation(SNAPSHOT, "InputFile.length"))
.add(new FileOperation(SNAPSHOT, "InputFile.newStream"))
.addCopies(new FileOperation(DATA, "InputFile.length"), 2)
.build());

// Read partition and synthetic columns
Expand All @@ -361,6 +364,7 @@ public void testReadWholePartition()
// TODO return synthetic columns without opening the data files
.addCopies(new FileOperation(DATA, "InputFile.newInput"), 4)
.addCopies(new FileOperation(DATA, "InputFile.lastModified"), 4)
.addCopies(new FileOperation(DATA, "InputFile.length"), 4)
.build());

// Read only row count
Expand All @@ -372,6 +376,7 @@ public void testReadWholePartition()
.add(new FileOperation(METADATA_JSON, "InputFile.newStream"))
.add(new FileOperation(SNAPSHOT, "InputFile.length"))
.add(new FileOperation(SNAPSHOT, "InputFile.newStream"))
.addCopies(new FileOperation(DATA, "InputFile.length"), 4)
.build());

assertUpdate("DROP TABLE test_read_part_key");
Expand Down Expand Up @@ -408,6 +413,7 @@ public void testReadWholePartitionSplittableFile()
.add(new FileOperation(METADATA_JSON, "InputFile.newStream"))
.add(new FileOperation(SNAPSHOT, "InputFile.length"))
.add(new FileOperation(SNAPSHOT, "InputFile.newStream"))
.add(new FileOperation(DATA, "InputFile.length"))
.build());

// Read only row count
Expand All @@ -420,6 +426,7 @@ public void testReadWholePartitionSplittableFile()
.add(new FileOperation(METADATA_JSON, "InputFile.newStream"))
.add(new FileOperation(SNAPSHOT, "InputFile.length"))
.add(new FileOperation(SNAPSHOT, "InputFile.newStream"))
.add(new FileOperation(DATA, "InputFile.length"))
.build());

assertUpdate("DROP TABLE test_read_whole_splittable_file");
Expand Down Expand Up @@ -884,6 +891,7 @@ public void testV2TableEnsureEqualityDeleteFilesAreReadOnce()

ImmutableMultiset<FileOperation> expectedAccesses = ImmutableMultiset.<FileOperationUtils.FileOperation>builder()
.addCopies(new FileOperationUtils.FileOperation(DATA, "InputFile.newInput"), 2)
.addCopies(new FileOperationUtils.FileOperation(DATA, "InputFile.length"), 2)
.addCopies(new FileOperationUtils.FileOperation(DELETE, "InputFile.newInput"), 1)
.build();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ public void testCacheFileOperations()
"SELECT * FROM test_cache_file_operations",
ImmutableMultiset.<CacheOperation>builder()
.addCopies(new CacheOperation("Input.readTail", DATA), 2)
.addCopies(new CacheOperation("InputFile.length", DATA), 2)
.addCopies(new CacheOperation("FileSystemCache.cacheLength", DATA), 2)
.addCopies(new CacheOperation("FileSystemCache.cacheInput", DATA), 2)
.add(new CacheOperation("Input.readTail", METADATA_JSON))
.add(new CacheOperation("InputFile.length", METADATA_JSON))
Expand All @@ -96,6 +98,7 @@ public void testCacheFileOperations()
"SELECT * FROM test_cache_file_operations",
ImmutableMultiset.<CacheOperation>builder()
.addCopies(new CacheOperation("FileSystemCache.cacheInput", DATA), 2)
.addCopies(new CacheOperation("FileSystemCache.cacheLength", DATA), 2)
.add(new CacheOperation("FileSystemCache.cacheStream", METADATA_JSON))
.add(new CacheOperation("FileSystemCache.cacheLength", SNAPSHOT))
.add(new CacheOperation("FileSystemCache.cacheStream", SNAPSHOT))
Expand All @@ -110,7 +113,9 @@ public void testCacheFileOperations()
"SELECT * FROM test_cache_file_operations",
ImmutableMultiset.<CacheOperation>builder()
.addCopies(new CacheOperation("Input.readTail", DATA), 3)
.addCopies(new CacheOperation("InputFile.length", DATA), 3)
.addCopies(new CacheOperation("FileSystemCache.cacheInput", DATA), 5)
.addCopies(new CacheOperation("FileSystemCache.cacheLength", DATA), 5)
.add(new CacheOperation("Input.readTail", METADATA_JSON))
.add(new CacheOperation("InputFile.length", METADATA_JSON))
.add(new CacheOperation("FileSystemCache.cacheStream", METADATA_JSON))
Expand All @@ -124,6 +129,7 @@ public void testCacheFileOperations()
"SELECT * FROM test_cache_file_operations",
ImmutableMultiset.<CacheOperation>builder()
.addCopies(new CacheOperation("FileSystemCache.cacheInput", DATA), 5)
.addCopies(new CacheOperation("FileSystemCache.cacheLength", DATA), 5)
.add(new CacheOperation("FileSystemCache.cacheStream", METADATA_JSON))
.add(new CacheOperation("FileSystemCache.cacheLength", SNAPSHOT))
.add(new CacheOperation("FileSystemCache.cacheStream", SNAPSHOT))
Expand All @@ -146,7 +152,9 @@ public void testSelectWithFilter()
.add(new CacheOperation("FileSystemCache.cacheStream", MANIFEST))
.add(new CacheOperation("Input.readTail", MANIFEST))
.add(new CacheOperation("FileSystemCache.cacheInput", DATA))
.add(new CacheOperation("FileSystemCache.cacheLength", DATA))
.add(new CacheOperation("Input.readTail", DATA))
.add(new CacheOperation("InputFile.length", DATA))
.build());

assertFileSystemAccesses(
Expand All @@ -157,6 +165,7 @@ public void testSelectWithFilter()
.add(new CacheOperation("FileSystemCache.cacheLength", SNAPSHOT))
.add(new CacheOperation("FileSystemCache.cacheStream", MANIFEST))
.add(new CacheOperation("FileSystemCache.cacheInput", DATA))
.add(new CacheOperation("FileSystemCache.cacheLength", DATA))
.build());
}

Expand All @@ -176,7 +185,9 @@ public void testJoin()
.addCopies(new CacheOperation("Input.readTail", MANIFEST), 2)
.addCopies(new CacheOperation("FileSystemCache.cacheStream", MANIFEST), 4)
.addCopies(new CacheOperation("Input.readTail", DATA), 2)
.addCopies(new CacheOperation("InputFile.length", DATA), 2)
.addCopies(new CacheOperation("FileSystemCache.cacheInput", DATA), 2)
.addCopies(new CacheOperation("FileSystemCache.cacheLength", DATA), 2)
.build());

assertFileSystemAccesses("SELECT name, age FROM test_join_t1 JOIN test_join_t2 ON test_join_t2.id = test_join_t1.id",
Expand All @@ -186,6 +197,7 @@ public void testJoin()
.addCopies(new CacheOperation("FileSystemCache.cacheLength", SNAPSHOT), 2)
.addCopies(new CacheOperation("FileSystemCache.cacheStream", MANIFEST), 4)
.addCopies(new CacheOperation("FileSystemCache.cacheInput", DATA), 2)
.addCopies(new CacheOperation("FileSystemCache.cacheLength", DATA), 2)
.build());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import com.google.common.collect.ImmutableMap;
import com.google.common.io.Resources;
import io.trino.Session;
import io.trino.filesystem.Location;
import io.trino.testing.QueryRunner;
import io.trino.testing.containers.Minio;
Expand Down Expand Up @@ -137,11 +136,7 @@ private void testReadSingleIntegerColumnOrcFile(String orcFileResourceName, int
fileSystem.newOutputFile(Location.of(orcFilePath)).createOrOverwrite(orcFileData);
fileSystem.deleteFiles(List.of(Location.of(orcFilePath.replaceAll("/([^/]*)$", ".$1.crc"))));

Session ignoreFileSizeFromMetadata = Session.builder(getSession())
// The replaced and replacing file sizes may be different
.setCatalogSessionProperty(getSession().getCatalog().orElseThrow(), "use_file_size_from_metadata", "false")
.build();
assertThat(query(ignoreFileSizeFromMetadata, "TABLE " + table.getName()))
assertThat(query("TABLE " + table.getName()))
.matches("VALUES NULL, " + expectedValue);
}
}
Expand Down