Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] Make metadata.stats-dense-store default value is true #4617

Merged
merged 3 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/content/flink/sql-ddl.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ Paimon will automatically collect the statistics of the data file for speeding u
The statistics collector mode can be configured by `'metadata.stats-mode'`, by default is `'truncate(16)'`.
You can configure the field level by setting `'fields.{field_name}.stats-mode'`.

For the stats mode of `none`, we suggest that you configure `metadata.stats-dense-store` = `true`, which will
significantly reduce the storage size of the manifest.
For the stats mode of `none`, by default `metadata.stats-dense-store` is `true`, which will significantly reduce the
storage size of the manifest. But the Paimon sdk in reading engine requires at least version 0.9.1 or 1.0.0 or higher.

### Field Default Value

Expand Down
4 changes: 2 additions & 2 deletions docs/layouts/shortcodes/generated/core_configuration.html
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,9 @@
</tr>
<tr>
<td><h5>metadata.stats-dense-store</h5></td>
<td style="word-wrap: break-word;">false</td>
<td style="word-wrap: break-word;">true</td>
<td>Boolean</td>
<td>Whether to store statistic densely in metadata (manifest files), which will significantly reduce the storage size of metadata when the none statistic mode is set.<br />Note, when this mode is enabled, the Paimon sdk in reading engine requires at least version 0.9.1 or 1.0.0 or higher.</td>
<td>Whether to store statistic densely in metadata (manifest files), which will significantly reduce the storage size of metadata when the none statistic mode is set.<br />Note, when this mode is enabled with 'metadata.stats-mode:none', the Paimon sdk in reading engine requires at least version 0.9.1 or 1.0.0 or higher.</td>
</tr>
<tr>
<td><h5>metadata.stats-mode</h5></td>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1101,7 +1101,7 @@ public class CoreOptions implements Serializable {
public static final ConfigOption<Boolean> METADATA_STATS_DENSE_STORE =
key("metadata.stats-dense-store")
.booleanType()
.defaultValue(false)
.defaultValue(true)
.withDescription(
Description.builder()
.text(
Expand All @@ -1110,8 +1110,8 @@ public class CoreOptions implements Serializable {
+ " none statistic mode is set.")
.linebreak()
.text(
"Note, when this mode is enabled, the Paimon sdk in reading engine requires"
+ " at least version 0.9.1 or 1.0.0 or higher.")
"Note, when this mode is enabled with 'metadata.stats-mode:none', the Paimon sdk in"
+ " reading engine requires at least version 0.9.1 or 1.0.0 or higher.")
.build());

public static final ConfigOption<String> COMMIT_CALLBACKS =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,19 @@

import org.junit.jupiter.api.Test;

import static org.apache.paimon.CoreOptions.METADATA_STATS_DENSE_STORE;
import static org.apache.paimon.CoreOptions.METADATA_STATS_MODE;
import static org.assertj.core.api.Assertions.assertThat;

/** Test for table stats mode. */
public class StatsTableTest extends TableTestBase {

@Test
public void testPartitionStats() throws Exception {
public void testPartitionStatsNotDense() throws Exception {
Identifier identifier = identifier("T");
Options options = new Options();
options.set(METADATA_STATS_MODE, "NONE");
options.set(METADATA_STATS_DENSE_STORE, false);
options.set(CoreOptions.BUCKET, 1);
Schema schema =
Schema.newBuilder()
Expand Down Expand Up @@ -90,4 +92,52 @@ public void testPartitionStats() throws Exception {
assertThat(recordStats.maxValues().isNullAt(1)).isTrue();
assertThat(recordStats.maxValues().isNullAt(2)).isTrue();
}

@Test
public void testPartitionStatsDenseMode() throws Exception {
Identifier identifier = identifier("T");
Options options = new Options();
options.set(METADATA_STATS_MODE, "NONE");
options.set(CoreOptions.BUCKET, 1);
Schema schema =
Schema.newBuilder()
.column("pt", DataTypes.INT())
.column("pk", DataTypes.INT())
.column("col1", DataTypes.INT())
.partitionKeys("pt")
.primaryKey("pk", "pt")
.options(options.toMap())
.build();
catalog.createTable(identifier, schema, true);
Table table = catalog.getTable(identifier);

write(
table,
GenericRow.of(1, 1, 1),
GenericRow.of(1, 2, 1),
GenericRow.of(1, 3, 1),
GenericRow.of(2, 1, 1));

FileStoreTable storeTable = (FileStoreTable) table;
FileStore<?> store = storeTable.store();
String manifestListFile = storeTable.snapshotManager().latestSnapshot().deltaManifestList();

ManifestList manifestList = store.manifestListFactory().create();
ManifestFileMeta manifest = manifestList.read(manifestListFile).get(0);

// should have partition stats
SimpleStats partitionStats = manifest.partitionStats();
assertThat(partitionStats.minValues().getInt(0)).isEqualTo(1);
assertThat(partitionStats.maxValues().getInt(0)).isEqualTo(2);

// should not have record stats because of NONE mode
ManifestFile manifestFile = store.manifestFileFactory().create();
DataFileMeta file =
manifestFile.read(manifest.fileName(), manifest.fileSize()).get(0).file();
SimpleStats recordStats = file.valueStats();
assertThat(file.valueStatsCols()).isEmpty();
assertThat(recordStats.minValues().getFieldCount()).isEqualTo(0);
assertThat(recordStats.maxValues().getFieldCount()).isEqualTo(0);
assertThat(recordStats.nullCounts().size()).isEqualTo(0);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ public void testBatchProjection() throws Exception {
public void testBatchFilter(boolean statsDenseStore) throws Exception {
Consumer<Options> optionsSetter =
options -> {
options.set(CoreOptions.METADATA_STATS_DENSE_STORE, statsDenseStore);
if (statsDenseStore) {
options.set(CoreOptions.METADATA_STATS_MODE, "none");
options.set("fields.b.stats-mode", "full");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,6 @@ public void testBatchProjection() throws Exception {
public void testBatchFilter(boolean statsDenseStore) throws Exception {
Consumer<Options> optionsSetter =
options -> {
options.set(CoreOptions.METADATA_STATS_DENSE_STORE, statsDenseStore);
if (statsDenseStore) {
// pk table doesn't need value stats
options.set(CoreOptions.METADATA_STATS_MODE, "none");
Expand Down Expand Up @@ -1664,7 +1663,6 @@ public void testReadDeletionVectorTable(boolean statsDenseStore) throws Exceptio
options.set(TARGET_FILE_SIZE, new MemorySize(1));
options.set(DELETION_VECTORS_ENABLED, true);

options.set(CoreOptions.METADATA_STATS_DENSE_STORE, statsDenseStore);
if (statsDenseStore) {
options.set(CoreOptions.METADATA_STATS_MODE, "none");
options.set("fields.b.stats-mode", "full");
Expand Down
Loading