From 04a8ea85afad831360b80dd6b4129be6b6ee8604 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 26 Dec 2018 15:19:24 +0300 Subject: [PATCH 1/3] added index interface and factory --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 1 + dbms/src/Storages/MergeTree/MergeTreeData.h | 1 + .../Storages/MergeTree/MergeTreeIndexes.cpp | 31 ++++++++++ .../src/Storages/MergeTree/MergeTreeIndexes.h | 60 +++++++++++++++++++ .../MergeTree/registerStorageMergeTree.cpp | 12 +++- dbms/src/Storages/StorageMergeTree.cpp | 3 +- dbms/src/Storages/StorageMergeTree.h | 1 + 7 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexes.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b65d23f47e17..b8d80efde664 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -94,6 +94,7 @@ MergeTreeData::MergeTreeData( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, + const ASTs & indexes_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index b8f01c400778..2587a7ad0852 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -306,6 +306,7 @@ class MergeTreeData : public ITableDeclaration const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. + const ASTs & indexes_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp new file mode 100644 index 000000000000..c685ebc1b715 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -0,0 +1,31 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; +} + +void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) { + if (!indexes.emplace(name, std::move(creator)).second) + throw Exception("MergeTreeIndexFactory: the Index creator name '" + name + "' is not unique", + ErrorCodes::LOGICAL_ERROR); +} + +IMergeTreeIndex MergeTreeIndexFactory::get(const ASTIndexDeclaration & node) const { + if (!node.type) + throw Exception( + "for INDEX TYPE is required", + ErrorCodes::INCORRECT_QUERY); + auto it = indexes.find(node.type->name); + if (it == indexes.end()) + throw Exception( + "Unknown Index type '" + node.type->name + "'", + ErrorCodes::INCORRECT_QUERY); + return it->second(node); +} + +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h new file mode 100644 index 000000000000..b6bb008d808b --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class IMergeTreeIndex; +using MergeTreeIndexes = std::vector; + + +/// Interface for secondary MergeTree indexes +class IMergeTreeIndex +{ +public: + virtual void load(const MergeTreeData & storage, const String & part_path) = 0; + virtual void store(const MergeTreeData & storage, const String & part_path, + MergeTreeDataPartChecksums & checksums) const = 0; + + virtual void update(const Block & block, const Names & column_names) = 0; + virtual void merge(const IMergeTreeIndex & other) = 0; + + virtual bool alwaysUnknownOrTrue() const = 0; + virtual bool maybeTrue() const = 0; + + String name; + ExpressionActionsPtr expr; + Block header; +}; + + +class MergeTreeIndexFactory : public ext::singleton +{ + friend class ext::singleton; + +public: + using Creator = std::function; + +protected: + MergeTreeIndexFactory() {}; + + IMergeTreeIndex get(const ASTIndexDeclaration & node) const; + + void registerIndex(const std::string & name, Creator creator); + + const auto & getAllIndexes() const { + return indexes; + } + +private: + using Indexes = std::unordered_map; + Indexes indexes; +}; + +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 54b092fdb620..a3da18857714 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -336,7 +336,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) */ bool is_extended_storage_def = - args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by || args.storage_def->sample_by || args.storage_def->settings; + args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by + || args.storage_def->sample_by || !args.storage_def->indexes.empty() || args.storage_def->settings; String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree")); @@ -559,6 +560,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) ASTPtr order_by_ast; ASTPtr primary_key_ast; ASTPtr sample_by_ast; + ASTs indexes_ast; MergeTreeSettings storage_settings = args.context.getMergeTreeSettings(); if (is_extended_storage_def) @@ -579,6 +581,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->sample_by) sample_by_ast = args.storage_def->sample_by->ptr(); + for (auto& index : args.storage_def->indexes) { + indexes_ast.push_back(index->ptr()); + } + storage_settings.loadFromQuery(*args.storage_def); } else @@ -615,13 +621,13 @@ static StoragePtr create(const StorageFactory::Arguments & args) zookeeper_path, replica_name, args.attach, args.data_path, args.database_name, args.table_name, args.columns, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, merging_params, storage_settings, + sample_by_ast, indexes_ast, merging_params, storage_settings, args.has_force_restore_data_flag); else return StorageMergeTree::create( args.data_path, args.database_name, args.table_name, args.columns, args.attach, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, merging_params, storage_settings, + sample_by_ast, indexes_ast, merging_params, storage_settings, args.has_force_restore_data_flag); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 6ee1e7ca9c9b..28d157b6e496 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -51,6 +51,7 @@ StorageMergeTree::StorageMergeTree( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. + const ASTs & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag) @@ -59,7 +60,7 @@ StorageMergeTree::StorageMergeTree( data(database_name, table_name, full_path, columns_, context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_, - sample_by_ast_, merging_params_, + sample_by_ast_, indexes_ast_, merging_params_, settings_, false, attach), reader(data), writer(data), merger_mutator(data, context.getBackgroundPool()), log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)")) diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index c80c06a9758e..7354829bfc78 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -174,6 +174,7 @@ class StorageMergeTree : public ext::shared_ptr_helper, public const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. + const ASTs & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag); From b62197b1ad4830cd771aa8abc92843a23b13aa2a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 26 Dec 2018 15:49:05 +0300 Subject: [PATCH 2/3] fixed compilation --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- dbms/src/Storages/StorageReplicatedMergeTree.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b8d80efde664..42df9ec56c11 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -94,7 +94,7 @@ MergeTreeData::MergeTreeData( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTs & indexes_ast_, + const ASTs &, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index afe8cbc02abf..3e93b3f75288 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -203,6 +203,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, + const ASTs & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag) @@ -214,7 +215,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( data(database_name, table_name, full_path, columns_, context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_, - sample_by_ast_, merging_params_, + sample_by_ast_, indexes_ast_, merging_params_, settings_, true, attach, [this] (const std::string & name) { enqueuePartForCheck(name); }), reader(data), writer(data), merger_mutator(data, context.getBackgroundPool()), queue(*this), diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 753be7f088b7..b91646443359 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -548,6 +548,7 @@ class StorageReplicatedMergeTree : public ext::shared_ptr_helper Date: Wed, 26 Dec 2018 20:34:44 +0300 Subject: [PATCH 3/3] ptrs --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 8 ++++- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 +++ .../Storages/MergeTree/MergeTreeIndexes.cpp | 8 ++--- .../src/Storages/MergeTree/MergeTreeIndexes.h | 32 +++++++++++-------- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 42df9ec56c11..9ddb17f70390 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -94,7 +94,7 @@ MergeTreeData::MergeTreeData( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTs &, + const ASTs & indexes_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, @@ -186,6 +186,12 @@ MergeTreeData::MergeTreeData( throw Exception( "MergeTree data format version on disk doesn't support custom partitioning", ErrorCodes::METADATA_MISMATCH); + + for (const auto & index_ast : indexes_ast_) { + indexes.push_back( + std::move(MergeTreeIndexFactory::instance().get( + std::dynamic_pointer_cast(index_ast)))); + } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 2587a7ad0852..a49648ad6829 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -579,6 +580,9 @@ class MergeTreeData : public ITableDeclaration Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column. Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column. + /// Secondary indexes for MergeTree + MergeTreeIndexes indexes; + /// Names of columns for primary key + secondary sorting columns. Names sorting_key_columns; ASTPtr sorting_key_expr_ast; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index c685ebc1b715..de665bedc5fc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -15,15 +15,15 @@ void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creat ErrorCodes::LOGICAL_ERROR); } -IMergeTreeIndex MergeTreeIndexFactory::get(const ASTIndexDeclaration & node) const { - if (!node.type) +std::unique_ptr MergeTreeIndexFactory::get(std::shared_ptr node) const { + if (!node->type) throw Exception( "for INDEX TYPE is required", ErrorCodes::INCORRECT_QUERY); - auto it = indexes.find(node.type->name); + auto it = indexes.find(node->type->name); if (it == indexes.end()) throw Exception( - "Unknown Index type '" + node.type->name + "'", + "Unknown Index type '" + node->type->name + "'", ErrorCodes::INCORRECT_QUERY); return it->second(node); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index b6bb008d808b..860e5747f3d4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -3,27 +3,28 @@ #include #include #include -#include +#include +#include +#include +#include #include #include namespace DB { -class IMergeTreeIndex; -using MergeTreeIndexes = std::vector; - /// Interface for secondary MergeTree indexes -class IMergeTreeIndex +class MergeTreeIndex { public: - virtual void load(const MergeTreeData & storage, const String & part_path) = 0; - virtual void store(const MergeTreeData & storage, const String & part_path, - MergeTreeDataPartChecksums & checksums) const = 0; + virtual ~MergeTreeIndex() {}; + + virtual void load(const String & part_path) = 0; + virtual void store(const String & part_path, MergeTreeDataPartChecksums & checksums) const = 0; virtual void update(const Block & block, const Names & column_names) = 0; - virtual void merge(const IMergeTreeIndex & other) = 0; + virtual void merge(const MergeTreeIndex & other) = 0; virtual bool alwaysUnknownOrTrue() const = 0; virtual bool maybeTrue() const = 0; @@ -33,18 +34,18 @@ class IMergeTreeIndex Block header; }; +using MergeTreeIndexPtr = std::unique_ptr; +using MergeTreeIndexes = std::vector; + class MergeTreeIndexFactory : public ext::singleton { friend class ext::singleton; public: - using Creator = std::function; + using Creator = std::function(std::shared_ptr node)>; -protected: - MergeTreeIndexFactory() {}; - - IMergeTreeIndex get(const ASTIndexDeclaration & node) const; + std::unique_ptr get(std::shared_ptr node) const; void registerIndex(const std::string & name, Creator creator); @@ -52,6 +53,9 @@ class MergeTreeIndexFactory : public ext::singleton return indexes; } +protected: + MergeTreeIndexFactory() {}; + private: using Indexes = std::unordered_map; Indexes indexes;