diff --git a/CMakeLists.txt b/CMakeLists.txt index dd8af71b2..671bbb98f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -347,48 +347,48 @@ set(DUCKDB_SRC_FILES src/duckdb/third_party/zstd/dict/divsufsort.cpp src/duckdb/third_party/zstd/dict/fastcover.cpp src/duckdb/third_party/zstd/dict/zdict.cpp - src/duckdb/extension/core_functions/core_functions_extension.cpp - src/duckdb/extension/core_functions/function_list.cpp src/duckdb/extension/core_functions/lambda_functions.cpp - src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp + src/duckdb/extension/core_functions/function_list.cpp + src/duckdb/extension/core_functions/core_functions_extension.cpp + src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp src/duckdb/ub_extension_core_functions_aggregate_nested.cpp - src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp src/duckdb/ub_extension_core_functions_aggregate_regression.cpp - src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp - src/duckdb/ub_extension_core_functions_scalar_generic.cpp - src/duckdb/ub_extension_core_functions_scalar_array.cpp - src/duckdb/ub_extension_core_functions_scalar_random.cpp - src/duckdb/ub_extension_core_functions_scalar_list.cpp - src/duckdb/ub_extension_core_functions_scalar_struct.cpp - src/duckdb/ub_extension_core_functions_scalar_date.cpp - src/duckdb/ub_extension_core_functions_scalar_enum.cpp + src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp + src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp + src/duckdb/ub_extension_core_functions_scalar_string.cpp + src/duckdb/ub_extension_core_functions_scalar_bit.cpp src/duckdb/ub_extension_core_functions_scalar_operators.cpp + src/duckdb/ub_extension_core_functions_scalar_enum.cpp + src/duckdb/ub_extension_core_functions_scalar_map.cpp + src/duckdb/ub_extension_core_functions_scalar_random.cpp src/duckdb/ub_extension_core_functions_scalar_math.cpp - src/duckdb/ub_extension_core_functions_scalar_string.cpp + src/duckdb/ub_extension_core_functions_scalar_union.cpp + src/duckdb/ub_extension_core_functions_scalar_generic.cpp + src/duckdb/ub_extension_core_functions_scalar_struct.cpp + src/duckdb/ub_extension_core_functions_scalar_list.cpp + src/duckdb/ub_extension_core_functions_scalar_array.cpp src/duckdb/ub_extension_core_functions_scalar_debug.cpp - src/duckdb/ub_extension_core_functions_scalar_bit.cpp src/duckdb/ub_extension_core_functions_scalar_blob.cpp - src/duckdb/ub_extension_core_functions_scalar_union.cpp - src/duckdb/ub_extension_core_functions_scalar_map.cpp - src/duckdb/extension/parquet/parquet_crypto.cpp - src/duckdb/extension/parquet/parquet_reader.cpp - src/duckdb/extension/parquet/parquet_metadata.cpp + src/duckdb/ub_extension_core_functions_scalar_date.cpp src/duckdb/extension/parquet/parquet_writer.cpp src/duckdb/extension/parquet/zstd_file_system.cpp + src/duckdb/extension/parquet/parquet_crypto.cpp + src/duckdb/extension/parquet/parquet_reader.cpp src/duckdb/extension/parquet/parquet_timestamp.cpp + src/duckdb/extension/parquet/parquet_float16.cpp + src/duckdb/extension/parquet/parquet_statistics.cpp src/duckdb/extension/parquet/parquet_multi_file_info.cpp - src/duckdb/extension/parquet/column_writer.cpp + src/duckdb/extension/parquet/column_reader.cpp src/duckdb/extension/parquet/geo_parquet.cpp - src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp - src/duckdb/extension/parquet/parquet_statistics.cpp src/duckdb/extension/parquet/parquet_extension.cpp - src/duckdb/extension/parquet/parquet_float16.cpp + src/duckdb/extension/parquet/column_writer.cpp + src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp src/duckdb/extension/parquet/serialize_parquet.cpp - src/duckdb/extension/parquet/column_reader.cpp + src/duckdb/extension/parquet/parquet_metadata.cpp src/duckdb/ub_extension_parquet_decoder.cpp - src/duckdb/ub_extension_parquet_writer.cpp src/duckdb/ub_extension_parquet_reader.cpp src/duckdb/ub_extension_parquet_reader_variant.cpp + src/duckdb/ub_extension_parquet_writer.cpp src/duckdb/third_party/parquet/parquet_types.cpp src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp @@ -427,32 +427,32 @@ set(DUCKDB_SRC_FILES src/duckdb/third_party/brotli/enc/metablock.cpp src/duckdb/third_party/brotli/enc/static_dict.cpp src/duckdb/third_party/brotli/enc/utf8_util.cpp - src/duckdb/extension/icu/./icu-datetrunc.cpp - src/duckdb/extension/icu/./icu-timezone.cpp - src/duckdb/extension/icu/./icu-current.cpp - src/duckdb/extension/icu/./icu-list-range.cpp - src/duckdb/extension/icu/./icu-datefunc.cpp - src/duckdb/extension/icu/./icu-strptime.cpp - src/duckdb/extension/icu/./icu-dateadd.cpp src/duckdb/extension/icu/./icu_extension.cpp - src/duckdb/extension/icu/./icu-timebucket.cpp - src/duckdb/extension/icu/./icu-table-range.cpp + src/duckdb/extension/icu/./icu-strptime.cpp src/duckdb/extension/icu/./icu-datepart.cpp src/duckdb/extension/icu/./icu-makedate.cpp + src/duckdb/extension/icu/./icu-datefunc.cpp + src/duckdb/extension/icu/./icu-current.cpp + src/duckdb/extension/icu/./icu-timezone.cpp + src/duckdb/extension/icu/./icu-table-range.cpp + src/duckdb/extension/icu/./icu-dateadd.cpp + src/duckdb/extension/icu/./icu-timebucket.cpp src/duckdb/extension/icu/./icu-datesub.cpp + src/duckdb/extension/icu/./icu-datetrunc.cpp + src/duckdb/extension/icu/./icu-list-range.cpp src/duckdb/ub_extension_icu_third_party_icu_common.cpp src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp + src/duckdb/extension/json/json_reader.cpp src/duckdb/extension/json/json_functions.cpp + src/duckdb/extension/json/json_extension.cpp + src/duckdb/extension/json/json_scan.cpp src/duckdb/extension/json/json_multi_file_info.cpp + src/duckdb/extension/json/json_enums.cpp src/duckdb/extension/json/serialize_json.cpp src/duckdb/extension/json/json_common.cpp - src/duckdb/extension/json/json_scan.cpp - src/duckdb/extension/json/json_enums.cpp - src/duckdb/extension/json/json_reader.cpp - src/duckdb/extension/json/json_serializer.cpp - src/duckdb/extension/json/json_extension.cpp src/duckdb/extension/json/json_deserializer.cpp + src/duckdb/extension/json/json_serializer.cpp src/duckdb/ub_extension_json_json_functions.cpp) set(JEMALLOC_SRC_FILES diff --git a/src/duckdb/extension/parquet/column_writer.cpp b/src/duckdb/extension/parquet/column_writer.cpp index 90d800162..7cdd51bc5 100644 --- a/src/duckdb/extension/parquet/column_writer.cpp +++ b/src/duckdb/extension/parquet/column_writer.cpp @@ -187,9 +187,12 @@ void ColumnWriter::HandleRepeatLevels(ColumnWriterState &state, ColumnWriterStat // no repeat levels without a parent node return; } - while (state.repetition_levels.size() < parent->repetition_levels.size()) { - state.repetition_levels.push_back(parent->repetition_levels[state.repetition_levels.size()]); + if (state.repetition_levels.size() >= parent->repetition_levels.size()) { + return; } + state.repetition_levels.insert(state.repetition_levels.end(), + parent->repetition_levels.begin() + state.repetition_levels.size(), + parent->repetition_levels.end()); } void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity, @@ -200,36 +203,41 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat while (state.definition_levels.size() < parent->definition_levels.size()) { idx_t current_index = state.definition_levels.size(); if (parent->definition_levels[current_index] != PARQUET_DEFINE_VALID) { + //! Inherit nulls from parent state.definition_levels.push_back(parent->definition_levels[current_index]); state.parent_null_count++; } else if (validity.RowIsValid(vector_index)) { + //! Produce a non-null define state.definition_levels.push_back(define_value); } else { + //! Produce a null define if (!can_have_nulls) { throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); } state.null_count++; state.definition_levels.push_back(null_value); } + D_ASSERT(parent->is_empty.empty() || current_index < parent->is_empty.size()); if (parent->is_empty.empty() || !parent->is_empty[current_index]) { vector_index++; } } + return; + } + + // no parent: set definition levels only from this validity mask + if (validity.AllValid()) { + state.definition_levels.insert(state.definition_levels.end(), count, define_value); } else { - // no parent: set definition levels only from this validity mask - if (validity.AllValid()) { - state.definition_levels.insert(state.definition_levels.end(), count, define_value); - } else { - for (idx_t i = 0; i < count; i++) { - const auto is_null = !validity.RowIsValid(i); - state.definition_levels.emplace_back(is_null ? null_value : define_value); - state.null_count += is_null; - } - } - if (!can_have_nulls && state.null_count != 0) { - throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); + for (idx_t i = 0; i < count; i++) { + const auto is_null = !validity.RowIsValid(i); + state.definition_levels.emplace_back(is_null ? null_value : define_value); + state.null_count += is_null; } } + if (!can_have_nulls && state.null_count != 0) { + throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); + } } //===--------------------------------------------------------------------===// @@ -368,6 +376,7 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector definition_levels; unsafe_vector repetition_levels; - vector is_empty; + unsafe_vector is_empty; idx_t parent_null_count = 0; idx_t null_count = 0; diff --git a/src/duckdb/extension/parquet/include/writer/array_column_writer.hpp b/src/duckdb/extension/parquet/include/writer/array_column_writer.hpp index 630bfd17f..1ebb16c04 100644 --- a/src/duckdb/extension/parquet/include/writer/array_column_writer.hpp +++ b/src/duckdb/extension/parquet/include/writer/array_column_writer.hpp @@ -25,6 +25,10 @@ class ArrayColumnWriter : public ListColumnWriter { void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count, bool vector_can_span_multiple_pages) override; void Write(ColumnWriterState &state, Vector &vector, idx_t count) override; + +protected: + void WriteArrayState(ListColumnWriterState &state, idx_t array_size, uint16_t first_repeat_level, + idx_t define_value, const bool is_empty = false); }; } // namespace duckdb diff --git a/src/duckdb/extension/parquet/writer/array_column_writer.cpp b/src/duckdb/extension/parquet/writer/array_column_writer.cpp index 024dbe819..60284ff28 100644 --- a/src/duckdb/extension/parquet/writer/array_column_writer.cpp +++ b/src/duckdb/extension/parquet/writer/array_column_writer.cpp @@ -9,6 +9,22 @@ void ArrayColumnWriter::Analyze(ColumnWriterState &state_p, ColumnWriterState *p child_writer->Analyze(*state.child_state, &state_p, array_child, array_size * count); } +void ArrayColumnWriter::WriteArrayState(ListColumnWriterState &state, idx_t array_size, uint16_t first_repeat_level, + idx_t define_value, const bool is_empty) { + state.definition_levels.push_back(define_value); + state.repetition_levels.push_back(first_repeat_level); + state.is_empty.push_back(is_empty); + + if (is_empty) { + return; + } + for (idx_t k = 1; k < array_size; k++) { + state.repetition_levels.push_back(MaxRepeat() + 1); + state.definition_levels.push_back(define_value); + state.is_empty.push_back(false); + } +} + void ArrayColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count, bool vector_can_span_multiple_pages) { auto &state = state_p.Cast(); @@ -25,42 +41,20 @@ void ArrayColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *p for (idx_t i = start; i < vcount; i++) { idx_t parent_index = state.parent_index + i; if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index]) { - state.definition_levels.push_back(parent->definition_levels[parent_index]); - state.repetition_levels.push_back(parent->repetition_levels[parent_index]); - state.is_empty.push_back(true); + WriteArrayState(state, array_size, parent->repetition_levels[parent_index], + parent->definition_levels[parent_index], true); continue; } auto first_repeat_level = parent && !parent->repetition_levels.empty() ? parent->repetition_levels[parent_index] : MaxRepeat(); if (parent && parent->definition_levels[parent_index] != PARQUET_DEFINE_VALID) { - state.definition_levels.push_back(parent->definition_levels[parent_index]); - state.repetition_levels.push_back(first_repeat_level); - state.is_empty.push_back(false); - for (idx_t k = 1; k < array_size; k++) { - state.repetition_levels.push_back(MaxRepeat() + 1); - state.definition_levels.push_back(parent->definition_levels[parent_index]); - state.is_empty.push_back(false); - } + WriteArrayState(state, array_size, first_repeat_level, parent->definition_levels[parent_index]); } else if (validity.RowIsValid(vector_index)) { // push the repetition levels - state.definition_levels.push_back(PARQUET_DEFINE_VALID); - state.is_empty.push_back(false); - - state.repetition_levels.push_back(first_repeat_level); - for (idx_t k = 1; k < array_size; k++) { - state.repetition_levels.push_back(MaxRepeat() + 1); - state.definition_levels.push_back(PARQUET_DEFINE_VALID); - state.is_empty.push_back(false); - } + WriteArrayState(state, array_size, first_repeat_level, PARQUET_DEFINE_VALID); } else { - state.definition_levels.push_back(MaxDefine() - 1); - state.repetition_levels.push_back(first_repeat_level); - state.is_empty.push_back(false); - for (idx_t k = 1; k < array_size; k++) { - state.repetition_levels.push_back(MaxRepeat() + 1); - state.definition_levels.push_back(MaxDefine() - 1); - state.is_empty.push_back(false); - } + //! Produce a null + WriteArrayState(state, array_size, first_repeat_level, MaxDefine() - 1); } vector_index++; } diff --git a/src/duckdb/extension/parquet/writer/struct_column_writer.cpp b/src/duckdb/extension/parquet/writer/struct_column_writer.cpp index c70c35ba2..e65515ad5 100644 --- a/src/duckdb/extension/parquet/writer/struct_column_writer.cpp +++ b/src/duckdb/extension/parquet/writer/struct_column_writer.cpp @@ -62,8 +62,9 @@ void StructColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState * auto &validity = FlatVector::Validity(vector); if (parent) { // propagate empty entries from the parent - while (state.is_empty.size() < parent->is_empty.size()) { - state.is_empty.push_back(parent->is_empty[state.is_empty.size()]); + if (state.is_empty.size() < parent->is_empty.size()) { + state.is_empty.insert(state.is_empty.end(), parent->is_empty.begin() + state.is_empty.size(), + parent->is_empty.end()); } } HandleRepeatLevels(state_p, parent, count, MaxRepeat()); diff --git a/src/duckdb/src/catalog/catalog.cpp b/src/duckdb/src/catalog/catalog.cpp index 60469e9cc..08e27f28f 100644 --- a/src/duckdb/src/catalog/catalog.cpp +++ b/src/duckdb/src/catalog/catalog.cpp @@ -1135,6 +1135,9 @@ vector> Catalog::GetAllSchemas(ClientContext &cont auto &db_manager = DatabaseManager::Get(context); auto databases = db_manager.GetDatabases(context); for (auto &database : databases) { + if (database->GetVisibility() == AttachVisibility::HIDDEN) { + continue; + } auto &catalog = database->GetCatalog(); auto new_schemas = catalog.GetSchemas(context); result.insert(result.end(), new_schemas.begin(), new_schemas.end()); diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp index f61146995..324ba7004 100644 --- a/src/duckdb/src/common/enum_util.cpp +++ b/src/duckdb/src/common/enum_util.cpp @@ -60,6 +60,7 @@ #include "duckdb/common/enums/set_scope.hpp" #include "duckdb/common/enums/set_type.hpp" #include "duckdb/common/enums/statement_type.hpp" +#include "duckdb/common/enums/storage_block_prefetch.hpp" #include "duckdb/common/enums/stream_execution_result.hpp" #include "duckdb/common/enums/subquery_type.hpp" #include "duckdb/common/enums/tableref_type.hpp" @@ -117,6 +118,7 @@ #include "duckdb/function/table/arrow/enum/arrow_variable_size_type.hpp" #include "duckdb/function/table_function.hpp" #include "duckdb/function/window/window_merge_sort_tree.hpp" +#include "duckdb/logging/log_storage.hpp" #include "duckdb/logging/logging.hpp" #include "duckdb/main/appender.hpp" #include "duckdb/main/capi/capi_internal.hpp" @@ -2458,6 +2460,25 @@ LogMode EnumUtil::FromString(const char *value) { return static_cast(StringUtil::StringToEnum(GetLogModeValues(), 3, "LogMode", value)); } +const StringUtil::EnumStringLiteral *GetLoggingTargetTableValues() { + static constexpr StringUtil::EnumStringLiteral values[] { + { static_cast(LoggingTargetTable::ALL_LOGS), "ALL_LOGS" }, + { static_cast(LoggingTargetTable::LOG_ENTRIES), "LOG_ENTRIES" }, + { static_cast(LoggingTargetTable::LOG_CONTEXTS), "LOG_CONTEXTS" } + }; + return values; +} + +template<> +const char* EnumUtil::ToChars(LoggingTargetTable value) { + return StringUtil::EnumToString(GetLoggingTargetTableValues(), 3, "LoggingTargetTable", static_cast(value)); +} + +template<> +LoggingTargetTable EnumUtil::FromString(const char *value) { + return static_cast(StringUtil::StringToEnum(GetLoggingTargetTableValues(), 3, "LoggingTargetTable", value)); +} + const StringUtil::EnumStringLiteral *GetLogicalOperatorTypeValues() { static constexpr StringUtil::EnumStringLiteral values[] { { static_cast(LogicalOperatorType::LOGICAL_INVALID), "LOGICAL_INVALID" }, @@ -4235,6 +4256,26 @@ StatsInfo EnumUtil::FromString(const char *value) { return static_cast(StringUtil::StringToEnum(GetStatsInfoValues(), 5, "StatsInfo", value)); } +const StringUtil::EnumStringLiteral *GetStorageBlockPrefetchValues() { + static constexpr StringUtil::EnumStringLiteral values[] { + { static_cast(StorageBlockPrefetch::REMOTE_ONLY), "REMOTE_ONLY" }, + { static_cast(StorageBlockPrefetch::NEVER), "NEVER" }, + { static_cast(StorageBlockPrefetch::ALWAYS_PREFETCH), "ALWAYS_PREFETCH" }, + { static_cast(StorageBlockPrefetch::DEBUG_FORCE_ALWAYS), "DEBUG_FORCE_ALWAYS" } + }; + return values; +} + +template<> +const char* EnumUtil::ToChars(StorageBlockPrefetch value) { + return StringUtil::EnumToString(GetStorageBlockPrefetchValues(), 4, "StorageBlockPrefetch", static_cast(value)); +} + +template<> +StorageBlockPrefetch EnumUtil::FromString(const char *value) { + return static_cast(StringUtil::StringToEnum(GetStorageBlockPrefetchValues(), 4, "StorageBlockPrefetch", value)); +} + const StringUtil::EnumStringLiteral *GetStrTimeSpecifierValues() { static constexpr StringUtil::EnumStringLiteral values[] { { static_cast(StrTimeSpecifier::ABBREVIATED_WEEKDAY_NAME), "ABBREVIATED_WEEKDAY_NAME" }, diff --git a/src/duckdb/src/execution/join_hashtable.cpp b/src/duckdb/src/execution/join_hashtable.cpp index cfa845a88..f991ead7e 100644 --- a/src/duckdb/src/execution/join_hashtable.cpp +++ b/src/duckdb/src/execution/join_hashtable.cpp @@ -114,6 +114,11 @@ JoinHashTable::JoinHashTable(ClientContext &context_p, const PhysicalOperator &o single_join_error_on_multiple_rows = DBConfig::GetSetting(context); } + if (conditions.size() == 1 && + (join_type == JoinType::SEMI || join_type == JoinType::ANTI || join_type == JoinType::MARK)) { + insert_duplicate_keys = false; + } + InitializePartitionMasks(); } @@ -562,16 +567,18 @@ static inline void InsertMatchesAndIncrementMisses(atomic entries[], } // Insert the rows that match - for (idx_t i = 0; i < key_match_count; i++) { - const auto need_compare_idx = state.key_match_sel.get_index(i); - const auto entry_index = state.keys_to_compare_sel.get_index(need_compare_idx); + if (ht.insert_duplicate_keys) { + for (idx_t i = 0; i < key_match_count; i++) { + const auto need_compare_idx = state.key_match_sel.get_index(i); + const auto entry_index = state.keys_to_compare_sel.get_index(need_compare_idx); - const auto &ht_offset = ht_offsets[entry_index]; - auto &entry = entries[ht_offset]; - const auto row_ptr_to_insert = lhs_row_locations[entry_index]; + const auto &ht_offset = ht_offsets[entry_index]; + auto &entry = entries[ht_offset]; + const auto row_ptr_to_insert = lhs_row_locations[entry_index]; - const auto salt = hash_salts[entry_index]; - InsertRowToEntry(entry, row_ptr_to_insert, salt, ht.pointer_offset); + const auto salt = hash_salts[entry_index]; + InsertRowToEntry(entry, row_ptr_to_insert, salt, ht.pointer_offset); + } } // Linear probing: each of the entries that do not match move to the next entry in the HT diff --git a/src/duckdb/src/execution/operator/helper/physical_reset.cpp b/src/duckdb/src/execution/operator/helper/physical_reset.cpp index 711cc1b3b..1f5baf75d 100644 --- a/src/duckdb/src/execution/operator/helper/physical_reset.cpp +++ b/src/duckdb/src/execution/operator/helper/physical_reset.cpp @@ -65,9 +65,9 @@ SourceResultType PhysicalReset::GetData(ExecutionContext &context, DataChunk &ch } if (variable_scope == SetScope::SESSION) { auto &client_config = ClientConfig::GetConfig(context.client); - client_config.set_variables.erase(name.ToStdString()); + client_config.set_variables.erase(option->name); } else { - config.ResetGenericOption(name); + config.ResetGenericOption(option->name); } return SourceResultType::FINISHED; } diff --git a/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp b/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp index d3fa408bb..95b519d4d 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp @@ -70,7 +70,8 @@ class CollectionMerger { } auto result_collection_index = collection_indexes[0]; - auto &result_collection = data_table.GetOptimisticCollection(context, result_collection_index); + auto &optimistic_collection = data_table.GetOptimisticCollection(context, result_collection_index); + auto &result_collection = *optimistic_collection.collection; if (collection_indexes.size() > 1) { // Merge all collections into one result collection. @@ -89,7 +90,7 @@ class CollectionMerger { auto &collection = data_table.GetOptimisticCollection(context, collection_indexes[i]); TableScanState scan_state; scan_state.Initialize(column_ids); - collection.InitializeScan(scan_state.local_state, column_ids, nullptr); + collection.collection->InitializeScan(scan_state.local_state, column_ids, nullptr); while (true) { scan_chunk.Reset(); @@ -99,15 +100,15 @@ class CollectionMerger { } auto new_row_group = result_collection.Append(scan_chunk, append_state); if (new_row_group) { - writer.WriteNewRowGroup(result_collection); + writer.WriteNewRowGroup(optimistic_collection); } } data_table.ResetOptimisticCollection(context, collection_indexes[i]); } result_collection.FinalizeAppend(TransactionData(0, 0), append_state); - writer.WriteLastRowGroup(result_collection); + writer.WriteLastRowGroup(optimistic_collection); } else if (batch_type == RowGroupBatchType::NOT_FLUSHED) { - writer.WriteLastRowGroup(result_collection); + writer.WriteLastRowGroup(optimistic_collection); } collection_indexes.clear(); @@ -116,12 +117,12 @@ class CollectionMerger { }; struct RowGroupBatchEntry { - RowGroupBatchEntry(RowGroupCollection &collection, const idx_t batch_idx, const PhysicalIndex collection_index, - const RowGroupBatchType type) - : batch_idx(batch_idx), total_rows(collection.GetTotalRows()), unflushed_memory(0), + RowGroupBatchEntry(OptimisticWriteCollection &collection, const idx_t batch_idx, + const PhysicalIndex collection_index, const RowGroupBatchType type) + : batch_idx(batch_idx), total_rows(collection.collection->GetTotalRows()), unflushed_memory(0), collection_index(collection_index), type(type) { if (type == RowGroupBatchType::NOT_FLUSHED) { - unflushed_memory = collection.GetAllocationSize(); + unflushed_memory = collection.collection->GetAllocationSize(); } } @@ -193,14 +194,10 @@ class BatchInsertLocalState : public LocalSinkState { void CreateNewCollection(ClientContext &context, DuckTableEntry &table_entry, const vector &insert_types) { - auto table_info = table_entry.GetStorage().GetDataTableInfo(); - auto &io_manager = TableIOManager::Get(table_entry.GetStorage()); - - // Create the local row group collection. - auto max_row_id = NumericCast(MAX_ROW_ID); - auto collection = make_uniq(std::move(table_info), io_manager, insert_types, max_row_id); - collection->InitializeEmpty(); - collection->InitializeAppend(current_append_state); + auto collection = OptimisticDataWriter::CreateCollection(table_entry.GetStorage(), insert_types); + auto &row_collection = *collection->collection; + row_collection.InitializeEmpty(); + row_collection.InitializeAppend(current_append_state); auto &data_table = table_entry.GetStorage(); collection_index = data_table.CreateOptimisticCollection(context, std::move(collection)); @@ -375,16 +372,17 @@ void BatchInsertGlobalState::AddCollection(ClientContext &context, const idx_t b throw InternalException("Batch index of the added collection (%llu) is smaller than the min batch index (%llu)", batch_index, min_batch_index); } - auto &collection = table.GetStorage().GetOptimisticCollection(context, collection_index); + auto &optimistic_collection = table.GetStorage().GetOptimisticCollection(context, collection_index); + auto &collection = *optimistic_collection.collection; auto new_count = collection.GetTotalRows(); auto batch_type = new_count < row_group_size ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED; if (batch_type == RowGroupBatchType::FLUSHED && writer) { - writer->WriteLastRowGroup(collection); + writer->WriteLastRowGroup(optimistic_collection); } lock_guard l(lock); insert_count += new_count; // add the collection to the batch index - RowGroupBatchEntry new_entry(collection, batch_index, collection_index, batch_type); + RowGroupBatchEntry new_entry(optimistic_collection, batch_index, collection_index, batch_type); if (batch_type == RowGroupBatchType::NOT_FLUSHED) { memory_manager.IncreaseUnflushedMemory(new_entry.unflushed_memory); } @@ -466,7 +464,9 @@ SinkNextBatchType PhysicalBatchInsert::NextBatch(ExecutionContext &context, Oper } // batch index has changed: move the old collection to the global state and create a new collection TransactionData tdata(0, 0); - auto &collection = gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index); + auto &optimistic_collection = + gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index); + auto &collection = *optimistic_collection.collection; collection.FinalizeAppend(tdata, lstate.current_append_state); gstate.AddCollection(context.client, lstate.current_index, lstate.partition_info.min_batch_index.GetIndex(), lstate.collection_index, lstate.optimistic_writer); @@ -545,11 +545,12 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, DataChunk &i storage.VerifyAppendConstraints(*lstate.constraint_state, context.client, insert_chunk, local_table_storage, nullptr); - auto &collection = table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index); + auto &optimistic_collection = table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index); + auto &collection = *optimistic_collection.collection; auto new_row_group = collection.Append(insert_chunk, lstate.current_append_state); if (new_row_group) { // we have already written to disk - flush the next row group as well - lstate.optimistic_writer->WriteNewRowGroup(collection); + lstate.optimistic_writer->WriteNewRowGroup(optimistic_collection); } return SinkResultType::NEED_MORE_INPUT; } @@ -569,7 +570,9 @@ SinkCombineResultType PhysicalBatchInsert::Combine(ExecutionContext &context, Op if (lstate.collection_index.IsValid()) { TransactionData tdata(0, 0); - auto &collection = gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index); + auto &optimistic_collection = + gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index); + auto &collection = *optimistic_collection.collection; collection.FinalizeAppend(tdata, lstate.current_append_state); if (collection.GetTotalRows() > 0) { auto batch_index = lstate.partition_info.min_batch_index.GetIndex(); @@ -667,7 +670,8 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event, } memory_manager.ReduceUnflushedMemory(entry.unflushed_memory); - auto &collection = data_table.GetOptimisticCollection(context, entry.collection_index); + auto &optimistic_collection = data_table.GetOptimisticCollection(context, entry.collection_index); + auto &collection = *optimistic_collection.collection; collection.Scan(transaction, [&](DataChunk &insert_chunk) { data_table.LocalAppend(append_state, context, insert_chunk, false); return true; diff --git a/src/duckdb/src/execution/operator/persistent/physical_insert.cpp b/src/duckdb/src/execution/operator/persistent/physical_insert.cpp index 1875d86c1..97c31c4ba 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_insert.cpp @@ -651,27 +651,26 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &insert D_ASSERT(!return_chunk); auto &data_table = gstate.table.GetStorage(); if (!lstate.collection_index.IsValid()) { - auto table_info = storage.GetDataTableInfo(); - auto &io_manager = TableIOManager::Get(table.GetStorage()); - // Create the local row group collection. - auto max_row_id = NumericCast(MAX_ROW_ID); - auto collection = make_uniq(std::move(table_info), io_manager, insert_types, max_row_id); - collection->InitializeEmpty(); - collection->InitializeAppend(lstate.local_append_state); + auto optimistic_collection = OptimisticDataWriter::CreateCollection(storage, insert_types); + auto &collection = *optimistic_collection->collection; + collection.InitializeEmpty(); + collection.InitializeAppend(lstate.local_append_state); lock_guard l(gstate.lock); lstate.optimistic_writer = make_uniq(context.client, data_table); - lstate.collection_index = data_table.CreateOptimisticCollection(context.client, std::move(collection)); + lstate.collection_index = + data_table.CreateOptimisticCollection(context.client, std::move(optimistic_collection)); } OnConflictHandling(table, context, gstate, lstate, insert_chunk); D_ASSERT(action_type != OnConflictAction::UPDATE); - auto &collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index); + auto &optimistic_collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index); + auto &collection = *optimistic_collection.collection; auto new_row_group = collection.Append(insert_chunk, lstate.local_append_state); if (new_row_group) { - lstate.optimistic_writer->WriteNewRowGroup(collection); + lstate.optimistic_writer->WriteNewRowGroup(optimistic_collection); } return SinkResultType::NEED_MORE_INPUT; } @@ -694,7 +693,8 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato // parallel append: finalize the append TransactionData tdata(0, 0); auto &data_table = gstate.table.GetStorage(); - auto &collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index); + auto &optimistic_collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index); + auto &collection = *optimistic_collection.collection; collection.FinalizeAppend(tdata, lstate.local_append_state); auto append_count = collection.GetTotalRows(); @@ -713,9 +713,9 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato storage.FinalizeLocalAppend(append_state); } else { // we have written rows to disk optimistically - merge directly into the transaction-local storage - lstate.optimistic_writer->WriteLastRowGroup(collection); + lstate.optimistic_writer->WriteLastRowGroup(optimistic_collection); lstate.optimistic_writer->FinalFlush(); - gstate.table.GetStorage().LocalMerge(context.client, collection); + gstate.table.GetStorage().LocalMerge(context.client, optimistic_collection); auto &optimistic_writer = gstate.table.GetStorage().GetOptimisticWriter(context.client); optimistic_writer.Merge(*lstate.optimistic_writer); } diff --git a/src/duckdb/src/execution/operator/schema/physical_attach.cpp b/src/duckdb/src/execution/operator/schema/physical_attach.cpp index cfb737a30..48e687703 100644 --- a/src/duckdb/src/execution/operator/schema/physical_attach.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_attach.cpp @@ -61,20 +61,7 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c } } - // attach the database. - auto attached_db = db_manager.AttachDatabase(context.client, *info, options); - if (!attached_db) { - return SourceResultType::FINISHED; - } - - //! Initialize the database. - attached_db->Initialize(context.client); - if (!options.default_table.name.empty()) { - attached_db->GetCatalog().SetDefaultTable(options.default_table.schema, options.default_table.name); - } - attached_db->FinalizeLoad(context.client); - - db_manager.FinalizeAttach(context.client, *info, std::move(attached_db)); + db_manager.AttachDatabase(context.client, *info, options); return SourceResultType::FINISHED; } diff --git a/src/duckdb/src/execution/operator/set/physical_union.cpp b/src/duckdb/src/execution/operator/set/physical_union.cpp index a0846ef33..e43117c92 100644 --- a/src/duckdb/src/execution/operator/set/physical_union.cpp +++ b/src/duckdb/src/execution/operator/set/physical_union.cpp @@ -6,12 +6,14 @@ namespace duckdb { -PhysicalUnion::PhysicalUnion(PhysicalPlan &physical_plan, vector types, PhysicalOperator &top, - PhysicalOperator &bottom, idx_t estimated_cardinality, bool allow_out_of_order) - : PhysicalOperator(physical_plan, PhysicalOperatorType::UNION, std::move(types), estimated_cardinality), +PhysicalUnion::PhysicalUnion(PhysicalPlan &physical_plan, vector types_p, + const ArenaLinkedList> &children_p, + idx_t estimated_cardinality, bool allow_out_of_order) + : PhysicalOperator(physical_plan, PhysicalOperatorType::UNION, std::move(types_p), estimated_cardinality), allow_out_of_order(allow_out_of_order) { - children.push_back(top); - children.push_back(bottom); + for (auto &child : children_p) { + children.push_back(child); + } } //===--------------------------------------------------------------------===// @@ -56,41 +58,46 @@ void PhysicalUnion::BuildPipelines(Pipeline ¤t, MetaPipeline &meta_pipelin } } - // create a union pipeline that has identical dependencies to 'current' - auto &union_pipeline = meta_pipeline.CreateUnionPipeline(current, order_matters); - + // create union pipelines that has identical dependencies to 'current' + vector> union_pipelines; + for (idx_t i = 0; i + 1 < children.size(); i++) { + auto &union_pipeline = meta_pipeline.CreateUnionPipeline(current, order_matters); + union_pipelines.push_back(union_pipeline); + } // continue with the current pipeline children[0].get().BuildPipelines(current, meta_pipeline); - - vector> dependencies; - optional_ptr last_child_ptr; - // users commonly UNION ALL together a bunch of cheap scan pipelines (e.g., instead of a multi file list) - // in these cases, we don't want to avoid breadth-first plan evaluation, - // as it doesn't pose a threat to memory usage (it's just a bunch of straight scans) - const auto can_saturate_threads = - ContainsSink(children[0]) && children[0].get().CanSaturateThreads(current.GetClientContext()); - if (order_matters || can_saturate_threads) { - // we add dependencies if order matters: union_pipeline comes after all pipelines created by building current - dependencies = meta_pipeline.AddDependenciesFrom(union_pipeline, union_pipeline, false); - // we also add dependencies if the LHS child can saturate all available threads - // in that case, we recursively make all RHS children depend on the LHS. - // This prevents breadth-first plan evaluation - if (can_saturate_threads) { - last_child_ptr = meta_pipeline.GetLastChild(); + bool can_saturate_threads = + ContainsSink(children[0].get()) && children[0].get().CanSaturateThreads(current.GetClientContext()); + for (idx_t i = 1; i < children.size(); i++) { + auto &union_pipeline = union_pipelines[children.size() - i - 1].get(); + vector> dependencies; + optional_ptr last_child_ptr; + if (ContainsSink(children[i - 1].get()) && + children[i - 1].get().CanSaturateThreads(current.GetClientContext())) { + can_saturate_threads = true; } - } - - // build the union pipeline - children[1].get().BuildPipelines(union_pipeline, meta_pipeline); + if (order_matters || can_saturate_threads) { + // we add dependencies if order matters: union_pipeline comes after all pipelines created by building + // current + dependencies = meta_pipeline.AddDependenciesFrom(union_pipeline, union_pipeline, false); + // we also add dependencies if the LHS child can saturate all available threads + // in that case, we recursively make all RHS children depend on the LHS. + // This prevents breadth-first plan evaluation + if (can_saturate_threads) { + last_child_ptr = meta_pipeline.GetLastChild(); + } + } + // build the union pipeline + children[i].get().BuildPipelines(union_pipeline, meta_pipeline); - if (last_child_ptr) { - // the pointer was set, set up the dependencies - meta_pipeline.AddRecursiveDependencies(dependencies, *last_child_ptr); + if (last_child_ptr) { + // the pointer was set, set up the dependencies + meta_pipeline.AddRecursiveDependencies(dependencies, *last_child_ptr); + } + // Assign proper batch index to the union pipeline + // This needs to happen after the pipelines have been built because unions can be nested + meta_pipeline.AssignNextBatchIndex(union_pipeline); } - - // Assign proper batch index to the union pipeline - // This needs to happen after the pipelines have been built because unions can be nested - meta_pipeline.AssignNextBatchIndex(union_pipeline); } vector> PhysicalUnion::GetSources() const { diff --git a/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp b/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp index aa274f16d..2d7427861 100644 --- a/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +++ b/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp @@ -32,23 +32,29 @@ static JoinCondition CreateNotDistinctComparison(const LogicalType &type, idx_t } PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalSetOperation &op) { - D_ASSERT(op.children.size() == 2); - - reference left = CreatePlan(*op.children[0]); - reference right = CreatePlan(*op.children[1]); - - if (left.get().GetTypes() != right.get().GetTypes()) { - throw InvalidInputException("Type mismatch for SET OPERATION"); + ArenaLinkedList> children(physical_plan->ArenaRef()); + for (auto &child : op.children) { + children.push_back(CreatePlan(*child)); + } + for (idx_t i = 1; i < children.size(); i++) { + if (children[i].get().GetTypes() != children[0].get().GetTypes()) { + throw InvalidInputException("Type mismatch for SET OPERATION"); + } } optional_ptr result; switch (op.type) { case LogicalOperatorType::LOGICAL_UNION: // UNION - result = Make(op.types, left, right, op.estimated_cardinality, op.allow_out_of_order); + result = Make(op.types, std::move(children), op.estimated_cardinality, op.allow_out_of_order); break; case LogicalOperatorType::LOGICAL_EXCEPT: case LogicalOperatorType::LOGICAL_INTERSECT: { + if (children.size() != 2) { + throw InternalException("EXCEPT / INTERSECT must have exactly two children"); + } + auto &left = children[0]; + auto &right = children[1]; auto &types = left.get().GetTypes(); vector conditions; // create equality condition for all columns diff --git a/src/duckdb/src/function/table/read_csv.cpp b/src/duckdb/src/function/table/read_csv.cpp index 415bb3237..28a58afa0 100644 --- a/src/duckdb/src/function/table/read_csv.cpp +++ b/src/duckdb/src/function/table/read_csv.cpp @@ -1,4 +1,5 @@ #include "duckdb/function/table/read_csv.hpp" +#include "duckdb/function/table/read_duckdb.hpp" #include "duckdb/common/enum_util.hpp" #include "duckdb/common/multi_file/multi_file_reader.hpp" @@ -192,8 +193,10 @@ unique_ptr ReadCSVReplacement(ClientContext &context, ReplacementScanI void BuiltinFunctions::RegisterReadFunctions() { CSVCopyFunction::RegisterFunction(*this); ReadCSVTableFunction::RegisterFunction(*this); + AddFunction(MultiFileReader::CreateFunctionSet(ReadDuckDBTableFunction::GetFunction())); auto &config = DBConfig::GetConfig(*transaction.db); config.replacement_scans.emplace_back(ReadCSVReplacement); + config.replacement_scans.emplace_back(ReadDuckDBTableFunction::ReplacementScan); } } // namespace duckdb diff --git a/src/duckdb/src/function/table/read_duckdb.cpp b/src/duckdb/src/function/table/read_duckdb.cpp new file mode 100644 index 000000000..c68f1c32e --- /dev/null +++ b/src/duckdb/src/function/table/read_duckdb.cpp @@ -0,0 +1,530 @@ +#include "duckdb/function/table/read_duckdb.hpp" +#include "duckdb/common/multi_file/multi_file_reader.hpp" +#include "duckdb/common/multi_file/multi_file_function.hpp" +#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" +#include "duckdb/main/attached_database.hpp" +#include "duckdb/function/replacement_scan.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/function_expression.hpp" +#include "duckdb/parser/tableref/table_function_ref.hpp" + +namespace duckdb { + +struct DuckDBMultiFileInfo : MultiFileReaderInterface { + static unique_ptr CreateInterface(ClientContext &context); + + unique_ptr InitializeOptions(ClientContext &context, + optional_ptr info) override; + bool ParseCopyOption(ClientContext &context, const string &key, const vector &values, + BaseFileReaderOptions &options, vector &expected_names, + vector &expected_types) override; + bool ParseOption(ClientContext &context, const string &key, const Value &val, MultiFileOptions &file_options, + BaseFileReaderOptions &options) override; + void FinalizeCopyBind(ClientContext &context, BaseFileReaderOptions &options, const vector &expected_names, + const vector &expected_types) override; + void FinalizeBindData(MultiFileBindData &multi_file_data) override; + + unique_ptr InitializeBindData(MultiFileBindData &multi_file_data, + unique_ptr options) override; + void BindReader(ClientContext &context, vector &return_types, vector &names, + MultiFileBindData &bind_data) override; + unique_ptr InitializeGlobalState(ClientContext &context, MultiFileBindData &bind_data, + MultiFileGlobalState &global_state) override; + unique_ptr InitializeLocalState(ExecutionContext &, GlobalTableFunctionState &) override; + shared_ptr CreateReader(ClientContext &context, GlobalTableFunctionState &gstate, + BaseUnionData &union_data, const MultiFileBindData &bind_data_p) override; + shared_ptr CreateReader(ClientContext &context, GlobalTableFunctionState &gstate, + const OpenFileInfo &file, idx_t file_idx, + const MultiFileBindData &bind_data) override; + shared_ptr CreateReader(ClientContext &context, const OpenFileInfo &file, + BaseFileReaderOptions &options, + const MultiFileOptions &file_options) override; + void FinishReading(ClientContext &context, GlobalTableFunctionState &global_state, + LocalTableFunctionState &local_state) override; + unique_ptr GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override; + void GetVirtualColumns(ClientContext &, MultiFileBindData &, virtual_column_map_t &result) override; + unique_ptr Copy() override; + FileGlobInput GetGlobInput() override; +}; + +class DuckDBFileReaderOptions : public BaseFileReaderOptions { +public: + string schema_name; + string table_name; + + bool Matches(TableCatalogEntry &table) const; + bool HasSelection() const; + string PrintOptions() const; + string GetCandidates(const vector> &tables) const; +}; + +struct DuckDBReadBindData : TableFunctionData { + unique_ptr options; + optional_idx initial_file_cardinality; + + unique_ptr Copy() const override { + auto result = make_uniq(); + result->options = make_uniq(*options); + result->initial_file_cardinality = initial_file_cardinality; + return std::move(result); + } +}; + +struct AttachedDatabaseWrapper { + AttachedDatabaseWrapper(ClientContext &context, shared_ptr attached_database_p); + ~AttachedDatabaseWrapper(); + + ClientContext &context; + shared_ptr attached_database; + optional_ptr table_entry; +}; + +class DuckDBReader : public BaseFileReader { +public: + DuckDBReader(ClientContext &context, OpenFileInfo file, const DuckDBFileReaderOptions &options); + ~DuckDBReader() override; + +public: + bool TryInitializeScan(ClientContext &context, GlobalTableFunctionState &gstate, + LocalTableFunctionState &lstate) override; + void Scan(ClientContext &context, GlobalTableFunctionState &global_state, LocalTableFunctionState &local_state, + DataChunk &chunk) override; + shared_ptr GetUnionData(idx_t file_idx) override; + void FinishFile(ClientContext &context, GlobalTableFunctionState &gstate) override; + double GetProgressInFile(ClientContext &context) override; + unique_ptr GetStatistics(ClientContext &context, const string &name) override; + void AddVirtualColumn(column_t virtual_column_id) override; + string GetReaderType() const override { + return "duckdb"; + } + optional_idx NumRows(); + AttachedDatabase &GetAttachedDatabase(); + TableCatalogEntry &GetTableEntry(); + +private: + ClientContext &context; + shared_ptr db_wrapper; + TableFunction scan_function; + unique_ptr bind_data; + unique_ptr global_state; + atomic finished; + idx_t column_count; + string schema_name; + string table_name; +}; + +struct DuckDBReadGlobalState : GlobalTableFunctionState {}; + +struct DuckDBReadLocalState : LocalTableFunctionState { + unique_ptr local_state; + shared_ptr attached_database; +}; + +string DuckDBFileReaderOptions::GetCandidates(const vector> &tables) const { + if (tables.empty()) { + return string(); + } + case_insensitive_map_t table_names; + for (auto &table : tables) { + table_names[table.get().name]++; + } + vector candidate_list; + for (auto &table_ref : tables) { + auto &table = table_ref.get(); + if (table_names[table.name] > 1) { + // name conflicts across schemas - add the schema name + auto &schema = table.ParentSchema(); + candidate_list.push_back(schema.name + "." + table.name); + } else { + candidate_list.push_back(table.name); + } + } + string search_term = schema_name; + if (!search_term.empty()) { + search_term += "."; + } + search_term += table_name; + return StringUtil::CandidatesErrorMessage(candidate_list, search_term, "Candidates"); +} + +bool DuckDBFileReaderOptions::HasSelection() const { + if (!table_name.empty()) { + return true; + } + if (!schema_name.empty()) { + return true; + } + return false; +} + +string DuckDBFileReaderOptions::PrintOptions() const { + string options; + if (!schema_name.empty()) { + options += "schema_name=\"" + schema_name + "\""; + } + if (!options.empty()) { + options += ", "; + } + if (!table_name.empty()) { + options += "table_name=\"" + table_name + "\""; + } + return options; +} + +bool DuckDBFileReaderOptions::Matches(TableCatalogEntry &table) const { + if (!schema_name.empty() && !StringUtil::CIEquals(table.ParentSchema().name, schema_name)) { + return false; + } + if (!table_name.empty() && !StringUtil::CIEquals(table.name, table_name)) { + return false; + } + return true; +} + +AttachedDatabaseWrapper::AttachedDatabaseWrapper(ClientContext &context, + shared_ptr attached_database_p) + : context(context), attached_database(std::move(attached_database_p)) { +} + +AttachedDatabaseWrapper::~AttachedDatabaseWrapper() { + if (attached_database) { + auto &db_manager = DatabaseManager::Get(context); + db_manager.DetachDatabase(context, attached_database->GetName(), OnEntryNotFound::RETURN_NULL); + attached_database.reset(); + } +} +DuckDBReader::DuckDBReader(ClientContext &context_p, OpenFileInfo file_p, const DuckDBFileReaderOptions &options) + : BaseFileReader(std::move(file_p)), context(context_p), finished(false) { + auto &attached = GetAttachedDatabase(); + auto &catalog = attached.GetCatalog(); + vector> tables; + vector> candidate_tables; + catalog.ScanSchemas(context, [&](SchemaCatalogEntry &schema) { + schema.Scan(CatalogType::TABLE_ENTRY, [&](CatalogEntry &entry) { + if (entry.type != CatalogType::TABLE_ENTRY) { + return; + } + auto &table = entry.Cast(); + if (options.Matches(table)) { + tables.push_back(table); + } + candidate_tables.push_back(table); + }); + }); + if (tables.size() != 1) { + string error_msg = tables.empty() ? "does not have any tables" : "has multiple tables"; + string extra_info; + if (!options.HasSelection()) { + extra_info = "\nSelect a table using `table_name=''"; + } else { + extra_info = " matching " + options.PrintOptions(); + } + string candidate_str = options.GetCandidates(candidate_tables); + throw BinderException("Database \"%s\" %s%s%s", file.path, error_msg, extra_info, candidate_str); + } + auto &table = tables[0].get(); + for (auto &col : table.GetColumns().Logical()) { + columns.emplace_back(col.Name(), col.Type()); + } + column_count = columns.size(); + schema_name = table.ParentSchema().name; + table_name = table.name; + db_wrapper->table_entry = table; +} + +DuckDBReader::~DuckDBReader() { +} + +AttachedDatabase &DuckDBReader::GetAttachedDatabase() { + if (!db_wrapper) { + auto &db_manager = DatabaseManager::Get(context); + AttachInfo info; + info.path = file.path; + // use invalid UTF-8 so that a conflicting database name cannot be attached by a user + info.name = "\x80__duckdb_reader_" + info.path; + + info.on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT; + unordered_map attach_kv; + AttachOptions attach_options(attach_kv, AccessMode::READ_ONLY); + attach_options.visibility = AttachVisibility::HIDDEN; + + auto attached = db_manager.AttachDatabase(context, info, attach_options); + db_wrapper = make_shared_ptr(context, std::move(attached)); + } + return *db_wrapper->attached_database; +} + +TableCatalogEntry &DuckDBReader::GetTableEntry() { + auto &attached = GetAttachedDatabase(); + if (!db_wrapper->table_entry) { + auto &catalog = attached.GetCatalog(); + db_wrapper->table_entry = + catalog.GetEntry(context, schema_name, table_name, OnEntryNotFound::THROW_EXCEPTION); + } + return *db_wrapper->table_entry; +} + +bool DuckDBReader::TryInitializeScan(ClientContext &context, GlobalTableFunctionState &gstate, + LocalTableFunctionState &lstate_p) { + auto &lstate = lstate_p.Cast(); + if (finished) { + lstate.attached_database.reset(); + return false; + } + if (!global_state) { + lstate.attached_database.reset(); + auto &table_entry = GetTableEntry(); + scan_function = table_entry.GetScanFunction(context, bind_data); + for (auto &col : column_indexes) { + if (col.GetPrimaryIndex() >= column_count) { + col = ColumnIndex(COLUMN_IDENTIFIER_ROW_ID); + } else { + auto &column = table_entry.GetColumn(LogicalIndex(col.GetPrimaryIndex())); + if (column.Generated()) { + throw NotImplementedException("Unsupported: read_duckdb cannot read generated column %s", + column.Name()); + } + } + } + + // initialize the scan over this table + TableFunctionInitInput input(bind_data.get(), column_indexes, vector(), filters.get()); + global_state = scan_function.init_global(context, input); + } + AssignSharedPointer(lstate.attached_database, db_wrapper); + // initialize the local scan + ThreadContext thread(context); + ExecutionContext exec_context(context, thread, nullptr); + TableFunctionInitInput input(bind_data.get(), column_indexes, vector(), filters.get()); + lstate.local_state = scan_function.init_local(exec_context, input, global_state.get()); + return true; +} + +void DuckDBReader::Scan(ClientContext &context, GlobalTableFunctionState &gstate_p, LocalTableFunctionState &lstate_p, + DataChunk &chunk) { + chunk.Reset(); + auto &lstate = lstate_p.Cast(); + TableFunctionInput input(bind_data.get(), lstate.local_state, global_state); + scan_function.function(context, input, chunk); + if (chunk.size() == 0) { + finished = true; + } +} + +void DuckDBReader::FinishFile(ClientContext &context, GlobalTableFunctionState &gstate) { + db_wrapper.reset(); +} + +optional_idx DuckDBReader::NumRows() { + auto &table_entry = GetTableEntry(); + return table_entry.GetStorage().GetTotalRows(); +} + +unique_ptr DuckDBReader::GetStatistics(ClientContext &context, const string &name) { + if (!scan_function.statistics) { + return BaseFileReader::GetStatistics(context, name); + } + auto &table_entry = GetTableEntry(); + if (!table_entry.ColumnExists(name)) { + return nullptr; + } + return scan_function.statistics(context, bind_data.get(), table_entry.GetColumn(name).Logical().index); +} + +double DuckDBReader::GetProgressInFile(ClientContext &context) { + if (!scan_function.table_scan_progress || !global_state) { + return BaseFileReader::GetProgressInFile(context); + } + return scan_function.table_scan_progress(context, bind_data.get(), global_state.get()); +} + +void DuckDBReader::AddVirtualColumn(column_t virtual_column_id) { + if (virtual_column_id != COLUMN_IDENTIFIER_ROW_ID) { + throw InternalException("Unsupported virtual column id %d for duckdb reader", virtual_column_id); + } +} + +unique_ptr DuckDBMultiFileInfo::CreateInterface(ClientContext &context) { + return make_uniq(); +} + +unique_ptr DuckDBMultiFileInfo::InitializeOptions(ClientContext &context, + optional_ptr info) { + return make_uniq(); +} + +bool DuckDBMultiFileInfo::ParseCopyOption(ClientContext &context, const string &key, const vector &values, + BaseFileReaderOptions &options, vector &expected_names, + vector &expected_types) { + return false; +} + +bool DuckDBMultiFileInfo::ParseOption(ClientContext &context, const string &key, const Value &val, + MultiFileOptions &file_options, BaseFileReaderOptions &options_p) { + auto &options = options_p.Cast(); + if (key == "schema_name") { + options.schema_name = StringValue::Get(val); + return true; + } + if (key == "table_name") { + options.table_name = StringValue::Get(val); + return true; + } + return false; +} + +void DuckDBMultiFileInfo::FinalizeCopyBind(ClientContext &context, BaseFileReaderOptions &options, + const vector &expected_names, + const vector &expected_types) { + throw InternalException("Unimplemented method in DuckDBMultiFileInfo"); +} + +void DuckDBMultiFileInfo::FinalizeBindData(MultiFileBindData &multi_file_data) { + auto &bind_data = multi_file_data.bind_data->Cast(); + if (multi_file_data.initial_reader) { + auto &initial_reader = multi_file_data.initial_reader->Cast(); + bind_data.initial_file_cardinality = initial_reader.NumRows(); + } +} + +unique_ptr DuckDBMultiFileInfo::InitializeBindData(MultiFileBindData &multi_file_data, + unique_ptr options_p) { + auto result = make_uniq(); + result->options = unique_ptr_cast(std::move(options_p)); + return std::move(result); +} + +void DuckDBMultiFileInfo::BindReader(ClientContext &context, vector &return_types, vector &names, + MultiFileBindData &bind_data) { + auto &duckdb_bind_data = bind_data.bind_data->Cast(); + bind_data.reader_bind = + bind_data.multi_file_reader->BindReader(context, return_types, names, *bind_data.file_list, bind_data, + *duckdb_bind_data.options, bind_data.file_options); +} + +unique_ptr DuckDBMultiFileInfo::InitializeGlobalState(ClientContext &context, + MultiFileBindData &bind_data, + MultiFileGlobalState &global_state) { + return make_uniq(); +} + +unique_ptr DuckDBMultiFileInfo::InitializeLocalState(ExecutionContext &, + GlobalTableFunctionState &) { + return make_uniq(); +} + +struct DuckDBReaderUnionData : BaseUnionData { + explicit DuckDBReaderUnionData(OpenFileInfo file_p) : BaseUnionData(std::move(file_p)) { + } +}; + +shared_ptr DuckDBMultiFileInfo::CreateReader(ClientContext &context, GlobalTableFunctionState &gstate, + BaseUnionData &union_data_p, + const MultiFileBindData &multi_bind_data) { + auto &union_data = union_data_p.Cast(); + auto &bind_data = multi_bind_data.bind_data->Cast(); + return make_shared_ptr(context, union_data.file, *bind_data.options); +} + +shared_ptr DuckDBMultiFileInfo::CreateReader(ClientContext &context, GlobalTableFunctionState &gstate, + const OpenFileInfo &file, idx_t file_idx, + const MultiFileBindData &multi_bind_data) { + auto &bind_data = multi_bind_data.bind_data->Cast(); + return make_shared_ptr(context, file, *bind_data.options); +} + +shared_ptr DuckDBMultiFileInfo::CreateReader(ClientContext &context, const OpenFileInfo &file, + BaseFileReaderOptions &options, + const MultiFileOptions &file_options) { + return make_shared_ptr(context, file, options.Cast()); +} + +shared_ptr DuckDBReader::GetUnionData(idx_t file_idx) { + auto result = make_uniq(file); + for (auto &column : columns) { + result->names.push_back(column.name); + result->types.push_back(column.type); + } + result->reader = shared_from_this(); + return std::move(result); +} + +void DuckDBMultiFileInfo::FinishReading(ClientContext &context, GlobalTableFunctionState &global_state, + LocalTableFunctionState &lstate_p) { + auto &lstate = lstate_p.Cast(); + lstate.attached_database.reset(); +} + +unique_ptr DuckDBMultiFileInfo::GetCardinality(const MultiFileBindData &bind_data_p, idx_t file_count) { + auto &bind_data = bind_data_p.bind_data->Cast(); + idx_t estimated_cardinality = file_count; + if (bind_data.initial_file_cardinality.IsValid()) { + estimated_cardinality = file_count * bind_data.initial_file_cardinality.GetIndex(); + } + return make_uniq(estimated_cardinality); +} + +unique_ptr DuckDBMultiFileInfo::Copy() { + return make_uniq(); +} + +FileGlobInput DuckDBMultiFileInfo::GetGlobInput() { + return FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "db"); +} + +void DuckDBMultiFileInfo::GetVirtualColumns(ClientContext &, MultiFileBindData &, virtual_column_map_t &result) { + result.insert(make_pair(COLUMN_IDENTIFIER_ROW_ID, TableColumn("rowid", LogicalType::BIGINT))); +} + +void ReadDuckDBAddNamedParameters(TableFunction &table_function) { + table_function.named_parameters["schema_name"] = LogicalType::VARCHAR; + table_function.named_parameters["table_name"] = LogicalType::VARCHAR; + + MultiFileReader::AddParameters(table_function); +} + +static vector DuckDBGetRowIdColumns(ClientContext &, optional_ptr) { + vector result; + result.emplace_back(MultiFileReader::COLUMN_IDENTIFIER_FILE_INDEX); + result.emplace_back(COLUMN_IDENTIFIER_ROW_ID); + return result; +} + +static bool DuckDBScanPushdownExpression(ClientContext &context, const LogicalGet &get, Expression &expr) { + return true; +} + +TableFunction ReadDuckDBTableFunction::GetFunction() { + MultiFileFunction read_duckdb("read_duckdb"); + read_duckdb.statistics = MultiFileFunction::MultiFileScanStats; + read_duckdb.get_row_id_columns = DuckDBGetRowIdColumns; + read_duckdb.pushdown_expression = DuckDBScanPushdownExpression; + read_duckdb.filter_pushdown = true; + read_duckdb.filter_prune = true; + read_duckdb.late_materialization = true; + ReadDuckDBAddNamedParameters(read_duckdb); + return static_cast(read_duckdb); +} + +unique_ptr ReadDuckDBTableFunction::ReplacementScan(ClientContext &context, ReplacementScanInput &input, + optional_ptr) { + auto table_name = ReplacementScan::GetFullPath(input); + auto lower_name = StringUtil::Lower(table_name); + if (!StringUtil::EndsWith(lower_name, ".db") && !StringUtil::Contains(lower_name, ".db?") && + !StringUtil::EndsWith(lower_name, ".ddb") && !StringUtil::Contains(lower_name, ".ddb?") && + !StringUtil::EndsWith(lower_name, ".duckdb") && !StringUtil::Contains(lower_name, ".duckdb?")) { + return nullptr; + } + auto table_function = make_uniq(); + vector> children; + children.push_back(make_uniq(Value(table_name))); + table_function->function = make_uniq("read_duckdb", std::move(children)); + + if (!FileSystem::HasGlob(table_name)) { + auto &fs = FileSystem::GetFileSystem(context); + table_function->alias = fs.ExtractBaseName(table_name); + } + return std::move(table_function); +} + +} // namespace duckdb diff --git a/src/duckdb/src/function/table/system/duckdb_databases.cpp b/src/duckdb/src/function/table/system/duckdb_databases.cpp index ee74fdec8..0e705eb69 100644 --- a/src/duckdb/src/function/table/system/duckdb_databases.cpp +++ b/src/duckdb/src/function/table/system/duckdb_databases.cpp @@ -67,9 +67,11 @@ void DuckDBDatabasesFunction(ClientContext &context, TableFunctionInput &data_p, idx_t count = 0; while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) { auto &entry = data.entries[data.offset++]; - auto &attached = *entry; auto &catalog = attached.GetCatalog(); + if (attached.GetVisibility() == AttachVisibility::HIDDEN) { + continue; + } // return values: idx_t col = 0; diff --git a/src/duckdb/src/function/table/system/duckdb_settings.cpp b/src/duckdb/src/function/table/system/duckdb_settings.cpp index 3ec34d908..9908854bd 100644 --- a/src/duckdb/src/function/table/system/duckdb_settings.cpp +++ b/src/duckdb/src/function/table/system/duckdb_settings.cpp @@ -12,6 +12,10 @@ struct DuckDBSettingValue { string input_type; string scope; vector aliases; + + inline bool operator<(const DuckDBSettingValue &rhs) const { + return name < rhs.name; + }; }; struct DuckDBSettingsData : public GlobalTableFunctionState { @@ -79,7 +83,12 @@ unique_ptr DuckDBSettingsInit(ClientContext &context, if (entry != aliases.end()) { value.aliases = std::move(entry->second); } - + for (auto &alias : value.aliases) { + DuckDBSettingValue alias_value = value; + alias_value.name = StringValue::Get(alias); + alias_value.aliases.clear(); + result->settings.push_back(std::move(alias_value)); + } result->settings.push_back(std::move(value)); } for (auto &ext_param : config.extension_parameters) { @@ -98,6 +107,7 @@ unique_ptr DuckDBSettingsInit(ClientContext &context, result->settings.push_back(std::move(value)); } + std::sort(result->settings.begin(), result->settings.end()); return std::move(result); } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index b3508f3ea..bf61a3aa3 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,17 +1,17 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "1-dev205" +#define DUCKDB_PATCH_VERSION "0-dev372" #endif #ifndef DUCKDB_MINOR_VERSION -#define DUCKDB_MINOR_VERSION 4 +#define DUCKDB_MINOR_VERSION 5 #endif #ifndef DUCKDB_MAJOR_VERSION #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.4.1-dev205" +#define DUCKDB_VERSION "v1.5.0-dev372" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "d52dd4e3df" +#define DUCKDB_SOURCE_ID "2259ad7316" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb.h b/src/duckdb/src/include/duckdb.h index a6cd9e265..ccf5ad5ac 100644 --- a/src/duckdb/src/include/duckdb.h +++ b/src/duckdb/src/include/duckdb.h @@ -241,6 +241,20 @@ typedef enum duckdb_error_type { //! An enum over DuckDB's different cast modes. typedef enum duckdb_cast_mode { DUCKDB_CAST_NORMAL = 0, DUCKDB_CAST_TRY = 1 } duckdb_cast_mode; +typedef enum duckdb_file_flag { + DUCKDB_FILE_FLAG_INVALID = 0, + // Open the file with "read" capabilities. + DUCKDB_FILE_FLAG_READ = 1, + // Open the file with "write" capabilities. + DUCKDB_FILE_FLAG_WRITE = 2, + // Create a new file, or open if it already exists. + DUCKDB_FILE_FLAG_CREATE = 3, + // Create a new file, or fail if it already exists. + DUCKDB_FILE_FLAG_CREATE_NEW = 4, + // Open the file in "append" mode. + DUCKDB_FILE_FLAG_APPEND = 5, +} duckdb_file_flag; + //===--------------------------------------------------------------------===// // General type definitions //===--------------------------------------------------------------------===// @@ -756,6 +770,22 @@ typedef struct _duckdb_arrow_options { void *internal_ptr; } * duckdb_arrow_options; +//===--------------------------------------------------------------------===// +// Virtual File System Access +//===--------------------------------------------------------------------===// + +typedef struct _duckdb_file_open_options { + void *internal_ptr; +} * duckdb_file_open_options; + +typedef struct _duckdb_file_system { + void *internal_ptr; +} * duckdb_file_system; + +typedef struct _duckdb_file_handle { + void *internal_ptr; +} * duckdb_file_handle; + //===--------------------------------------------------------------------===// // DuckDB extension access //===--------------------------------------------------------------------===// @@ -5161,6 +5191,150 @@ Folds an expression creating a folded value. DUCKDB_C_API duckdb_error_data duckdb_expression_fold(duckdb_client_context context, duckdb_expression expr, duckdb_value *out_value); +//===--------------------------------------------------------------------===// +// File System Interface +//===--------------------------------------------------------------------===// + +/*! +Get a file system instance associated with the given client context. + +* @param context The client context. +* @return The resulting file system instance. Must be destroyed with `duckdb_destroy_file_system`. +*/ +DUCKDB_C_API duckdb_file_system duckdb_client_context_get_file_system(duckdb_client_context context); + +/*! +Destroys the given file system instance. +* @param file_system The file system instance to destroy. +*/ +DUCKDB_C_API void duckdb_destroy_file_system(duckdb_file_system *file_system); + +/*! +Retrieves the last error that occurred on the given file system instance. + +* @param file_system The file system instance. +* @return The error data. +*/ +DUCKDB_C_API duckdb_error_data duckdb_file_system_error_data(duckdb_file_system file_system); + +/*! +Opens a file at the given path with the specified options. + +* @param file_system The file system instance. +* @param path The path to the file. +* @param options The file open options specifying how to open the file. +* @param out_file The resulting file handle instance, or `nullptr` if the open failed. Must be destroyed with +`duckdb_destroy_file_handle`. +* @return Whether the operation was successful. If not, the error data can be retrieved using +`duckdb_file_system_error_data`. +*/ +DUCKDB_C_API duckdb_state duckdb_file_system_open(duckdb_file_system file_system, const char *path, + duckdb_file_open_options options, duckdb_file_handle *out_file); + +/*! +Creates a new file open options instance with blank settings. + +* @return The new file open options instance. Must be destroyed with `duckdb_destroy_file_open_options`. +*/ +DUCKDB_C_API duckdb_file_open_options duckdb_create_file_open_options(); + +/*! +Sets a specific flag in the file open options. + +* @param options The file open options instance. +* @param flag The flag to set (e.g., read, write). +* @param value If the flag is enabled or disabled. +* @return `DuckDBSuccess` on success or `DuckDBError` if the flag is unrecognized or unsupported by this version of +DuckDB. +*/ +DUCKDB_C_API duckdb_state duckdb_file_open_options_set_flag(duckdb_file_open_options options, duckdb_file_flag flag, + bool value); + +/*! +Destroys the given file open options instance. +* @param options The file open options instance to destroy. +*/ +DUCKDB_C_API void duckdb_destroy_file_open_options(duckdb_file_open_options *options); + +/*! +Destroys the given file handle and deallocates all associated resources. +This will also close the file if it is still open. + +* @param file_handle The file handle to destroy. +*/ +DUCKDB_C_API void duckdb_destroy_file_handle(duckdb_file_handle *file_handle); + +/*! +Retrieves the last error that occurred on the given file handle. + +* @param file_handle The file handle. +* @return The error data. Must be destroyed with `duckdb_destroy_error_data` +*/ +DUCKDB_C_API duckdb_error_data duckdb_file_handle_error_data(duckdb_file_handle file_handle); + +/*! +Reads data from the file into the buffer. + +* @param file_handle The file handle to read from. +* @param buffer The buffer to read data into. +* @param size The number of bytes to read. +* @return The number of bytes actually read, or negative on error. +*/ +DUCKDB_C_API int64_t duckdb_file_handle_read(duckdb_file_handle file_handle, void *buffer, int64_t size); + +/*! +Writes data from the buffer to the file. + +* @param file_handle The file handle to write to. +* @param buffer The buffer containing data to write. +* @param size The number of bytes to write. +* @return The number of bytes actually written, or negative on error. +*/ +DUCKDB_C_API int64_t duckdb_file_handle_write(duckdb_file_handle file_handle, const void *buffer, int64_t size); + +/*! +Tells the current position in the file. + +* @param file_handle The file handle to tell the position of. +* @return The current position in the file, or negative on error. +*/ +DUCKDB_C_API int64_t duckdb_file_handle_tell(duckdb_file_handle file_handle); + +/*! +Gets the size of the file. + +* @param file_handle The file handle to get the size of. +* @return The size of the file in bytes, or negative on error. +*/ +DUCKDB_C_API int64_t duckdb_file_handle_size(duckdb_file_handle file_handle); + +/*! +Seeks to a specific position in the file. + +* @param file_handle The file handle to seek in. +* @return `DuckDBSuccess` on success or `DuckDBError` on failure. If unsuccessful, the error data can be retrieved using +`duckdb_file_handle_error_data`. +*/ +DUCKDB_C_API duckdb_state duckdb_file_handle_seek(duckdb_file_handle file_handle, int64_t position); + +/*! +Synchronizes the file's state with the underlying storage. + +* @param file_handle The file handle to synchronize. +* @return `DuckDBSuccess` on success or `DuckDBError` on failure. If unsuccessful, the error data can be retrieved using +`duckdb_file_handle_error_data`. +*/ +DUCKDB_C_API duckdb_state duckdb_file_handle_sync(duckdb_file_handle file_handle); + +/*! +Closes the given file handle. + +* @param file_handle The file handle to close. +* @return `DuckDBSuccess` on success or `DuckDBError` on failure. If unsuccessful, the error data can be retrieved using +`duckdb_file_handle_error_data`. +*/ +DUCKDB_C_API duckdb_state duckdb_file_handle_close(duckdb_file_handle file_handle); + #endif #ifdef __cplusplus diff --git a/src/duckdb/src/include/duckdb/common/enum_util.hpp b/src/duckdb/src/include/duckdb/common/enum_util.hpp index 85082ae85..d07e93d02 100644 --- a/src/duckdb/src/include/duckdb/common/enum_util.hpp +++ b/src/duckdb/src/include/duckdb/common/enum_util.hpp @@ -236,6 +236,8 @@ enum class LogLevel : uint8_t; enum class LogMode : uint8_t; +enum class LoggingTargetTable : uint8_t; + enum class LogicalOperatorType : uint8_t; enum class LogicalTypeId : uint8_t; @@ -380,6 +382,8 @@ enum class StatisticsType : uint8_t; enum class StatsInfo : uint8_t; +enum class StorageBlockPrefetch : uint8_t; + enum class StrTimeSpecifier : uint8_t; enum class StreamExecutionResult : uint8_t; @@ -753,6 +757,9 @@ const char* EnumUtil::ToChars(LogLevel value); template<> const char* EnumUtil::ToChars(LogMode value); +template<> +const char* EnumUtil::ToChars(LoggingTargetTable value); + template<> const char* EnumUtil::ToChars(LogicalOperatorType value); @@ -969,6 +976,9 @@ const char* EnumUtil::ToChars(StatisticsType value); template<> const char* EnumUtil::ToChars(StatsInfo value); +template<> +const char* EnumUtil::ToChars(StorageBlockPrefetch value); + template<> const char* EnumUtil::ToChars(StrTimeSpecifier value); @@ -1375,6 +1385,9 @@ LogLevel EnumUtil::FromString(const char *value); template<> LogMode EnumUtil::FromString(const char *value); +template<> +LoggingTargetTable EnumUtil::FromString(const char *value); + template<> LogicalOperatorType EnumUtil::FromString(const char *value); @@ -1591,6 +1604,9 @@ StatisticsType EnumUtil::FromString(const char *value); template<> StatsInfo EnumUtil::FromString(const char *value); +template<> +StorageBlockPrefetch EnumUtil::FromString(const char *value); + template<> StrTimeSpecifier EnumUtil::FromString(const char *value); diff --git a/src/duckdb/src/include/duckdb/common/enums/storage_block_prefetch.hpp b/src/duckdb/src/include/duckdb/common/enums/storage_block_prefetch.hpp new file mode 100644 index 000000000..273d3fffa --- /dev/null +++ b/src/duckdb/src/include/duckdb/common/enums/storage_block_prefetch.hpp @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/storage_block_prefetch.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/constants.hpp" + +namespace duckdb { + +enum class StorageBlockPrefetch : uint8_t { REMOTE_ONLY = 0, NEVER = 1, ALWAYS_PREFETCH = 2, DEBUG_FORCE_ALWAYS = 3 }; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp b/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp index d2a55529a..4d0e6ae47 100644 --- a/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +++ b/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp @@ -277,6 +277,8 @@ class JoinHashTable { uint64_t bitmask = DConstants::INVALID_INDEX; //! Whether or not we error on multiple rows found per match in a SINGLE join bool single_join_error_on_multiple_rows = true; + //! Whether or not to perform deduplication based on join_keys when building ht + bool insert_duplicate_keys = true; struct { mutex mj_lock; diff --git a/src/duckdb/src/include/duckdb/execution/operator/set/physical_union.hpp b/src/duckdb/src/include/duckdb/execution/operator/set/physical_union.hpp index 36bc3d58c..8873858bd 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/set/physical_union.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/set/physical_union.hpp @@ -17,8 +17,9 @@ class PhysicalUnion : public PhysicalOperator { static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::UNION; public: - PhysicalUnion(PhysicalPlan &physical_plan, vector types, PhysicalOperator &top, - PhysicalOperator &bottom, idx_t estimated_cardinality, bool allow_out_of_order); + PhysicalUnion(PhysicalPlan &physical_plan, vector types, + const ArenaLinkedList> &children_p, idx_t estimated_cardinality, + bool allow_out_of_order); bool allow_out_of_order; diff --git a/src/duckdb/src/include/duckdb/function/table/read_duckdb.hpp b/src/duckdb/src/include/duckdb/function/table/read_duckdb.hpp new file mode 100644 index 000000000..3bf2fd4e8 --- /dev/null +++ b/src/duckdb/src/include/duckdb/function/table/read_duckdb.hpp @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/function/table/read_duckdb.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/function/table_function.hpp" + +namespace duckdb { +struct ReplacementScanInput; +struct ReplacementScanData; + +struct ReadDuckDBTableFunction { + static TableFunction GetFunction(); + static unique_ptr ReplacementScan(ClientContext &context, ReplacementScanInput &input, + optional_ptr data); +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/logging/log_storage.hpp b/src/duckdb/src/include/duckdb/logging/log_storage.hpp index 10c06dddc..9a8a67405 100644 --- a/src/duckdb/src/include/duckdb/logging/log_storage.hpp +++ b/src/duckdb/src/include/duckdb/logging/log_storage.hpp @@ -33,7 +33,7 @@ struct CSVReaderOptions; //! Logging storage can store entries normalized or denormalized. This enum describes what a single table/file/etc //! contains -enum class LoggingTargetTable { +enum class LoggingTargetTable : uint8_t { ALL_LOGS, // Denormalized: log entries consisting of both the full log entry and the context LOG_ENTRIES, // Normalized: contains only the log entries and a context_id LOG_CONTEXTS, // Normalized: contains only the log contexts @@ -165,7 +165,7 @@ class BufferingLogStorage : public LogStorage { //! Debug option for testing buffering behaviour bool only_flush_on_full_buffer = false; //! The buffers used for each table - unordered_map> buffers; + map> buffers; //! This flag is set whenever a new context_is written to the entry buffer. It means that the next flush of //! LoggingTargetTable::LOG_ENTRIES also requires a flush of LoggingTargetTable::LOG_CONTEXTS bool flush_contexts_on_next_entry_flush = false; @@ -218,9 +218,9 @@ class CSVLogStorage : public BufferingLogStorage { void InitializeCastChunk(LoggingTargetTable table); //! The cast buffers used to cast from the original types to the VARCHAR types ready to write to CSV format - unordered_map> cast_buffers; + map> cast_buffers; //! The writers to be registered by child classes - unordered_map> writers; + map> writers; //! CSV Options to initialize the CSVWriters with. TODO: cleanup, this is now a little bit of a mixed bag of //! settings @@ -302,7 +302,7 @@ class FileLogStorage : public CSVLogStorage { }; //! The table info per table - unordered_map tables; + map tables; //! Base path to generate the file paths from string base_path; @@ -349,7 +349,7 @@ class InMemoryLogStorage : public BufferingLogStorage { //! Helper function to get the buffer ColumnDataCollection &GetBuffer(LoggingTargetTable table) const; - unordered_map> log_storage_buffers; + map> log_storage_buffers; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/logging/log_type.hpp b/src/duckdb/src/include/duckdb/logging/log_type.hpp index c843fe1b0..23d901c4e 100644 --- a/src/duckdb/src/include/duckdb/logging/log_type.hpp +++ b/src/duckdb/src/include/duckdb/logging/log_type.hpp @@ -17,6 +17,9 @@ struct FileHandle; struct BaseRequest; struct HTTPResponse; class PhysicalOperator; +class AttachedDatabase; +class RowGroup; +struct DataTableInfo; //! Log types provide some structure to the formats that the different log messages can have //! For now, this holds a type that the VARCHAR value will be auto-cast into. @@ -54,9 +57,7 @@ class QueryLogType : public LogType { QueryLogType() : LogType(NAME, LEVEL) {}; - static string ConstructLogMessage(const string &str) { - return str; - } + static string ConstructLogMessage(const string &str); }; class FileSystemLogType : public LogType { @@ -105,4 +106,26 @@ class PhysicalOperatorLogType : public LogType { const vector> &info); }; +class CheckpointLogType : public LogType { +public: + static constexpr const char *NAME = "Checkpoint"; + static constexpr LogLevel LEVEL = LogLevel::LOG_DEBUG; + + //! Construct the log type + CheckpointLogType(); + + static LogicalType GetLogType(); + + //! Vacuum + static string ConstructLogMessage(const AttachedDatabase &db, DataTableInfo &table, idx_t segment_idx, + idx_t merge_count, idx_t target_count, idx_t merge_rows, idx_t row_start); + //! Checkpoint + static string ConstructLogMessage(const AttachedDatabase &db, DataTableInfo &table, idx_t segment_idx, + RowGroup &row_group); + +private: + static string CreateLog(const AttachedDatabase &db, DataTableInfo &table, const char *op, vector map_keys, + vector map_values); +}; + } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/main/attached_database.hpp b/src/duckdb/src/include/duckdb/main/attached_database.hpp index d15fede62..7333d9adb 100644 --- a/src/duckdb/src/include/duckdb/main/attached_database.hpp +++ b/src/duckdb/src/include/duckdb/main/attached_database.hpp @@ -32,6 +32,8 @@ enum class AttachedDatabaseType { TEMP_DATABASE, }; +enum class AttachVisibility { SHOWN, HIDDEN }; + class DatabaseFilePathManager; struct StoredDatabasePath { @@ -58,6 +60,10 @@ struct AttachOptions { unordered_map options; //! (optionally) a catalog can be provided with a default table QualifiedName default_table; + //! Whether or not this is the main database + bool is_main_database = false; + //! The visibility of the attached database + AttachVisibility visibility = AttachVisibility::SHOWN; //! The stored database path (in the path manager) unique_ptr stored_database_path; }; @@ -106,6 +112,9 @@ class AttachedDatabase : public CatalogEntry, public enable_shared_from_this parent_catalog; optional_ptr storage_extension; + AttachVisibility visibility = AttachVisibility::SHOWN; bool is_initial_database = false; bool is_closed = false; }; diff --git a/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp b/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp index 1eef6b589..2ce10061a 100644 --- a/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +++ b/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp @@ -501,6 +501,26 @@ typedef struct { bool (*duckdb_expression_is_foldable)(duckdb_expression expr); duckdb_error_data (*duckdb_expression_fold)(duckdb_client_context context, duckdb_expression expr, duckdb_value *out_value); + // API to manage file system operations + + duckdb_file_system (*duckdb_client_context_get_file_system)(duckdb_client_context context); + void (*duckdb_destroy_file_system)(duckdb_file_system *file_system); + duckdb_state (*duckdb_file_system_open)(duckdb_file_system file_system, const char *path, + duckdb_file_open_options options, duckdb_file_handle *out_file); + duckdb_error_data (*duckdb_file_system_error_data)(duckdb_file_system file_system); + duckdb_file_open_options (*duckdb_create_file_open_options)(); + duckdb_state (*duckdb_file_open_options_set_flag)(duckdb_file_open_options options, duckdb_file_flag flag, + bool value); + void (*duckdb_destroy_file_open_options)(duckdb_file_open_options *options); + void (*duckdb_destroy_file_handle)(duckdb_file_handle *file_handle); + duckdb_error_data (*duckdb_file_handle_error_data)(duckdb_file_handle file_handle); + duckdb_state (*duckdb_file_handle_close)(duckdb_file_handle file_handle); + int64_t (*duckdb_file_handle_read)(duckdb_file_handle file_handle, void *buffer, int64_t size); + int64_t (*duckdb_file_handle_write)(duckdb_file_handle file_handle, const void *buffer, int64_t size); + duckdb_state (*duckdb_file_handle_seek)(duckdb_file_handle file_handle, int64_t position); + int64_t (*duckdb_file_handle_tell)(duckdb_file_handle file_handle); + duckdb_state (*duckdb_file_handle_sync)(duckdb_file_handle file_handle); + int64_t (*duckdb_file_handle_size)(duckdb_file_handle file_handle); // New functions around the client context idx_t (*duckdb_client_context_get_connection_id)(duckdb_client_context context); @@ -987,6 +1007,22 @@ inline duckdb_ext_api_v1 CreateAPIv1() { result.duckdb_expression_return_type = duckdb_expression_return_type; result.duckdb_expression_is_foldable = duckdb_expression_is_foldable; result.duckdb_expression_fold = duckdb_expression_fold; + result.duckdb_client_context_get_file_system = duckdb_client_context_get_file_system; + result.duckdb_destroy_file_system = duckdb_destroy_file_system; + result.duckdb_file_system_open = duckdb_file_system_open; + result.duckdb_file_system_error_data = duckdb_file_system_error_data; + result.duckdb_create_file_open_options = duckdb_create_file_open_options; + result.duckdb_file_open_options_set_flag = duckdb_file_open_options_set_flag; + result.duckdb_destroy_file_open_options = duckdb_destroy_file_open_options; + result.duckdb_destroy_file_handle = duckdb_destroy_file_handle; + result.duckdb_file_handle_error_data = duckdb_file_handle_error_data; + result.duckdb_file_handle_close = duckdb_file_handle_close; + result.duckdb_file_handle_read = duckdb_file_handle_read; + result.duckdb_file_handle_write = duckdb_file_handle_write; + result.duckdb_file_handle_seek = duckdb_file_handle_seek; + result.duckdb_file_handle_tell = duckdb_file_handle_tell; + result.duckdb_file_handle_sync = duckdb_file_handle_sync; + result.duckdb_file_handle_size = duckdb_file_handle_size; result.duckdb_client_context_get_connection_id = duckdb_client_context_get_connection_id; result.duckdb_destroy_client_context = duckdb_destroy_client_context; result.duckdb_connection_get_client_context = duckdb_connection_get_client_context; diff --git a/src/duckdb/src/include/duckdb/main/client_context.hpp b/src/duckdb/src/include/duckdb/main/client_context.hpp index 5291481be..ddb14518c 100644 --- a/src/duckdb/src/include/duckdb/main/client_context.hpp +++ b/src/duckdb/src/include/duckdb/main/client_context.hpp @@ -304,6 +304,8 @@ class ClientContext : public enable_shared_from_this { CreatePreparedStatementInternal(ClientContextLock &lock, const string &query, unique_ptr statement, optional_ptr> values); + SettingLookupResult TryGetCurrentSettingInternal(const string &key, Value &result) const; + private: //! Lock on using the ClientContext in parallel mutex context_lock; diff --git a/src/duckdb/src/include/duckdb/main/database.hpp b/src/duckdb/src/include/duckdb/main/database.hpp index 11936d5f7..2486d1e0e 100644 --- a/src/duckdb/src/include/duckdb/main/database.hpp +++ b/src/duckdb/src/include/duckdb/main/database.hpp @@ -115,14 +115,14 @@ class DuckDB { void LoadStaticExtension() { T extension; auto &manager = ExtensionManager::Get(*instance); - auto info = manager.BeginLoad(extension.Name()); - if (!info) { + auto load_info = manager.BeginLoad(extension.Name()); + if (!load_info) { // already loaded - return return; } // Instantiate a new loader - ExtensionLoader loader(*instance, extension.Name()); + ExtensionLoader loader(*load_info); // Call the Load method of the extension extension.Load(loader); @@ -133,7 +133,7 @@ class DuckDB { ExtensionInstallInfo install_info; install_info.mode = ExtensionInstallMode::STATICALLY_LINKED; install_info.version = extension.Version(); - info->FinishLoad(install_info); + load_info->FinishLoad(install_info); } DUCKDB_API FileSystem &GetFileSystem(); diff --git a/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp b/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp index a4368d4f1..1912a90bf 100644 --- a/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +++ b/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp @@ -19,21 +19,27 @@ struct AttachOptions; enum class InsertDatabasePathResult { SUCCESS, ALREADY_EXISTS }; +struct DatabasePathInfo { + explicit DatabasePathInfo(string name_p) : name(std::move(name_p)) { + } + + string name; +}; + //! The DatabaseFilePathManager is used to ensure we only ever open a single database file once class DatabaseFilePathManager { public: idx_t ApproxDatabaseCount() const; InsertDatabasePathResult InsertDatabasePath(const string &path, const string &name, OnCreateConflict on_conflict, AttachOptions &options); + //! Erase a database path - indicating we are done with using it void EraseDatabasePath(const string &path); private: - //! The lock to add entries to the database path map + //! The lock to add entries to the db_paths map mutable mutex db_paths_lock; - //! A set containing all attached database path - //! This allows to attach many databases efficiently, and to avoid attaching the - //! same file path twice - case_insensitive_map_t db_paths_to_name; + //! A set containing all attached database paths mapped to their attached database name + case_insensitive_map_t db_paths; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/main/database_manager.hpp b/src/duckdb/src/include/duckdb/main/database_manager.hpp index 346c8fb4d..b516e858f 100644 --- a/src/duckdb/src/include/duckdb/main/database_manager.hpp +++ b/src/duckdb/src/include/duckdb/main/database_manager.hpp @@ -53,8 +53,6 @@ class DatabaseManager { //! Attach a new database shared_ptr AttachDatabase(ClientContext &context, AttachInfo &info, AttachOptions &options); - optional_ptr FinalizeAttach(ClientContext &context, AttachInfo &info, - shared_ptr database); //! Detach an existing database void DetachDatabase(ClientContext &context, const string &name, OnEntryNotFound if_not_found); //! Alter operation dispatcher @@ -109,6 +107,10 @@ class DatabaseManager { shared_ptr GetDatabaseInternal(const lock_guard &, const string &name); +private: + optional_ptr FinalizeAttach(ClientContext &context, AttachInfo &info, + shared_ptr database); + private: //! The system database is a special database that holds system entries (e.g. functions) shared_ptr system; diff --git a/src/duckdb/src/include/duckdb/main/settings.hpp b/src/duckdb/src/include/duckdb/main/settings.hpp index 5f90ad3d0..383d5533b 100644 --- a/src/duckdb/src/include/duckdb/main/settings.hpp +++ b/src/duckdb/src/include/duckdb/main/settings.hpp @@ -18,6 +18,7 @@ #include "duckdb/common/enums/output_type.hpp" #include "duckdb/common/enums/thread_pin_mode.hpp" #include "duckdb/common/enums/arrow_format_version.hpp" +#include "duckdb/common/enums/storage_block_prefetch.hpp" namespace duckdb { @@ -1181,6 +1182,16 @@ struct SecretDirectorySetting { static Value GetSetting(const ClientContext &context); }; +struct StorageBlockPrefetchSetting { + using RETURN_TYPE = StorageBlockPrefetch; + static constexpr const char *Name = "storage_block_prefetch"; + static constexpr const char *Description = "In which scenarios to use storage block prefetching"; + static constexpr const char *InputType = "VARCHAR"; + static constexpr const char *DefaultValue = "REMOTE_ONLY"; + static constexpr SetScope DefaultScope = SetScope::GLOBAL; + static void OnSet(SettingCallbackInfo &info, Value &input); +}; + struct StorageCompatibilityVersionSetting { using RETURN_TYPE = string; static constexpr const char *Name = "storage_compatibility_version"; @@ -1242,6 +1253,16 @@ struct UsernameSetting { static Value GetSetting(const ClientContext &context); }; +struct WriteBufferRowGroupCountSetting { + using RETURN_TYPE = idx_t; + static constexpr const char *Name = "write_buffer_row_group_count"; + static constexpr const char *Description = "The amount of row groups to buffer in bulk ingestion prior to flushing " + "them together. Reducing this setting can reduce memory consumption."; + static constexpr const char *InputType = "UBIGINT"; + static constexpr const char *DefaultValue = "5"; + static constexpr SetScope DefaultScope = SetScope::GLOBAL; +}; + struct ZstdMinStringLengthSetting { using RETURN_TYPE = idx_t; static constexpr const char *Name = "zstd_min_string_length"; diff --git a/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp b/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp index 652ae47d6..c1f2c4586 100644 --- a/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +++ b/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp @@ -65,7 +65,8 @@ class RelationStatisticsHelper { static RelationStats CombineStatsOfReorderableOperator(vector &bindings, vector relation_stats); //! Called after reordering a query plan with potentially 2+ relations. - static RelationStats CombineStatsOfNonReorderableOperator(LogicalOperator &op, vector child_stats); + static RelationStats CombineStatsOfNonReorderableOperator(LogicalOperator &op, + const vector &child_stats); static void CopyRelationStats(RelationStats &to, const RelationStats &from); private: diff --git a/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp b/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp index 6f7744a9f..19feda673 100644 --- a/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +++ b/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp @@ -113,6 +113,8 @@ class StatisticsPropagator { bool ExpressionIsConstant(Expression &expr, const Value &val); bool ExpressionIsConstantOrNull(Expression &expr, const Value &val); + unique_ptr PropagateUnion(LogicalSetOperation &setop, unique_ptr &node_ptr); + private: Optimizer &optimizer; ClientContext &context; diff --git a/src/duckdb/src/include/duckdb/parser/parser_extension.hpp b/src/duckdb/src/include/duckdb/parser/parser_extension.hpp index 164a45c34..61c071307 100644 --- a/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +++ b/src/duckdb/src/include/duckdb/parser/parser_extension.hpp @@ -11,6 +11,7 @@ #include "duckdb/common/common.hpp" #include "duckdb/common/enums/statement_type.hpp" #include "duckdb/function/table_function.hpp" +#include "duckdb/parser/sql_statement.hpp" namespace duckdb { @@ -76,6 +77,25 @@ struct ParserExtensionPlanResult { // NOLINT: work-around bug in clang-tidy typedef ParserExtensionPlanResult (*plan_function_t)(ParserExtensionInfo *info, ClientContext &context, unique_ptr parse_data); +//===--------------------------------------------------------------------===// +// Parser override +//===--------------------------------------------------------------------===// +struct ParserOverrideResult { + explicit ParserOverrideResult() : type(ParserExtensionResultType::DISPLAY_ORIGINAL_ERROR) {}; + + explicit ParserOverrideResult(vector> statements_p) + : type(ParserExtensionResultType::PARSE_SUCCESSFUL), statements(std::move(statements_p)) {}; + + explicit ParserOverrideResult(const string &error_p) + : type(ParserExtensionResultType::DISPLAY_EXTENSION_ERROR), error(error_p) {}; + + ParserExtensionResultType type; + vector> statements; + string error; +}; + +typedef ParserOverrideResult (*parser_override_function_t)(ParserExtensionInfo *info, const string &query); + //===--------------------------------------------------------------------===// // ParserExtension //===--------------------------------------------------------------------===// @@ -89,6 +109,9 @@ class ParserExtension { //! Takes as input the result of the parse_function, and outputs various properties of the resulting plan plan_function_t plan_function; + //! Override the current parser with a new parser and return a vector of SQL statements + parser_override_function_t parser_override; + //! Additional parser info passed to the parse function shared_ptr parser_info; }; diff --git a/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp b/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp index de36f3cad..960f6c2d6 100644 --- a/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +++ b/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp @@ -26,14 +26,10 @@ class SetOperationNode : public QueryNode { SetOperationType setop_type = SetOperationType::NONE; //! whether the ALL modifier was used or not bool setop_all = false; - //! The left side of the set operation - unique_ptr left; - //! The right side of the set operation - unique_ptr right; + //! The children of the set operation + vector> children; - const vector> &GetSelectList() const override { - return left->GetSelectList(); - } + const vector> &GetSelectList() const override; public: //! Convert the query node to a string @@ -54,7 +50,8 @@ class SetOperationNode : public QueryNode { SetOperationNode(SetOperationType setop_type, unique_ptr left, unique_ptr right, vector> children, bool setop_all); - vector> SerializeChildNodes() const; + unique_ptr SerializeChildNode(Serializer &serializer, idx_t index) const; + bool SerializeChildList(Serializer &serializer) const; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/parser/transformer.hpp b/src/duckdb/src/include/duckdb/parser/transformer.hpp index 7811bbe86..59e4f0419 100644 --- a/src/duckdb/src/include/duckdb/parser/transformer.hpp +++ b/src/duckdb/src/include/duckdb/parser/transformer.hpp @@ -217,6 +217,8 @@ class Transformer { unique_ptr TransformSelectNodeInternal(duckdb_libpgquery::PGSelectStmt &select, bool is_select = true); unique_ptr TransformSelectInternal(duckdb_libpgquery::PGSelectStmt &select); void TransformModifiers(duckdb_libpgquery::PGSelectStmt &stmt, QueryNode &node); + bool SetOperationsMatch(duckdb_libpgquery::PGSelectStmt &root, duckdb_libpgquery::PGNode &node); + void TransformSetOperationChildren(duckdb_libpgquery::PGSelectStmt &stmt, SetOperationNode &result); //===--------------------------------------------------------------------===// // Expression Transform diff --git a/src/duckdb/src/include/duckdb/planner/binder.hpp b/src/duckdb/src/include/duckdb/planner/binder.hpp index bc81db9bd..5a664f2dc 100644 --- a/src/duckdb/src/include/duckdb/planner/binder.hpp +++ b/src/duckdb/src/include/duckdb/planner/binder.hpp @@ -117,9 +117,7 @@ class Binder : public enable_shared_from_this { //! The client context ClientContext &context; //! A mapping of names to common table expressions - case_insensitive_map_t> CTE_bindings; // NOLINT - //! The CTEs that have already been bound - reference_set_t bound_ctes; + case_insensitive_set_t CTE_bindings; // NOLINT //! The bind context BindContext bind_context; //! The set of correlated columns bound by this binder (FIXME: this should probably be an unordered_set and not a @@ -183,11 +181,11 @@ class Binder : public enable_shared_from_this { const EntryLookupInfo &lookup_info, OnEntryNotFound on_entry_not_found); //! Add a common table expression to the binder - void AddCTE(const string &name, CommonTableExpressionInfo &cte); + void AddCTE(const string &name); //! Find all candidate common table expression by name; returns empty vector if none exists - vector> FindCTE(const string &name, bool skip = false); + vector> FindCTE(const string &name, bool skip = false); - bool CTEIsAlreadyBound(CommonTableExpressionInfo &cte); + bool CTEExists(const string &name); //! Add the view to the set of currently bound views - used for detecting recursive view definitions void AddBoundView(ViewCatalogEntry &view); @@ -367,7 +365,7 @@ class Binder : public enable_shared_from_this { unique_ptr Bind(BaseTableRef &ref); unique_ptr Bind(BoundRefWrapper &ref); unique_ptr Bind(JoinRef &ref); - unique_ptr Bind(SubqueryRef &ref, optional_ptr cte = nullptr); + unique_ptr Bind(SubqueryRef &ref); unique_ptr Bind(TableFunctionRef &ref); unique_ptr Bind(EmptyTableRef &ref); unique_ptr Bind(DelimGetRef &ref); diff --git a/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp b/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp index a2c0b90bb..0d2e79be4 100644 --- a/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +++ b/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp @@ -20,9 +20,9 @@ class LogicalCTERef : public LogicalOperator { public: LogicalCTERef(idx_t table_index, idx_t cte_index, vector types, vector colnames, - CTEMaterialize materialized_cte, bool is_recurring = false) + bool is_recurring = false) : LogicalOperator(LogicalOperatorType::LOGICAL_CTE_REF), table_index(table_index), cte_index(cte_index), - correlated_columns(0), materialized_cte(materialized_cte), is_recurring(is_recurring) { + correlated_columns(0), is_recurring(is_recurring) { D_ASSERT(!types.empty()); chunk_types = std::move(types); bound_columns = std::move(colnames); @@ -37,8 +37,6 @@ class LogicalCTERef : public LogicalOperator { vector chunk_types; //! Number of correlated columns idx_t correlated_columns; - //! Does this operator read a materialized CTE? - CTEMaterialize materialized_cte; //! Does this operator read the recurring CTE table bool is_recurring = false; diff --git a/src/duckdb/src/include/duckdb/planner/operator/logical_prepare.hpp b/src/duckdb/src/include/duckdb/planner/operator/logical_prepare.hpp index 58279c08a..4a1abb40e 100644 --- a/src/duckdb/src/include/duckdb/planner/operator/logical_prepare.hpp +++ b/src/duckdb/src/include/duckdb/planner/operator/logical_prepare.hpp @@ -46,7 +46,7 @@ class LogicalPrepare : public LogicalOperator { } bool RequireOptimizer() const override { - if (!prepared->properties.bound_all_parameters) { + if (!prepared->properties.bound_all_parameters || prepared->properties.always_require_rebind) { return false; } return children[0]->RequireOptimizer(); diff --git a/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp b/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp index 0dbfc43cd..8900916db 100644 --- a/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +++ b/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp @@ -14,10 +14,7 @@ namespace duckdb { class LogicalSetOperation : public LogicalOperator { LogicalSetOperation(idx_t table_index, idx_t column_count, LogicalOperatorType type, bool setop_all, - bool allow_out_of_order) - : LogicalOperator(type), table_index(table_index), column_count(column_count), setop_all(setop_all), - allow_out_of_order(allow_out_of_order) { - } + bool allow_out_of_order); public: static constexpr const LogicalOperatorType TYPE = LogicalOperatorType::LOGICAL_INVALID; @@ -25,14 +22,9 @@ class LogicalSetOperation : public LogicalOperator { public: LogicalSetOperation(idx_t table_index, idx_t column_count, unique_ptr top, unique_ptr bottom, LogicalOperatorType type, bool setop_all, - bool allow_out_of_order = true) - : LogicalOperator(type), table_index(table_index), column_count(column_count), setop_all(setop_all), - allow_out_of_order(allow_out_of_order) { - D_ASSERT(type == LogicalOperatorType::LOGICAL_UNION || type == LogicalOperatorType::LOGICAL_EXCEPT || - type == LogicalOperatorType::LOGICAL_INTERSECT); - children.push_back(std::move(top)); - children.push_back(std::move(bottom)); - } + bool allow_out_of_order = true); + LogicalSetOperation(idx_t table_index, idx_t column_count, vector> children, + LogicalOperatorType type, bool setop_all, bool allow_out_of_order = true); idx_t table_index; idx_t column_count; diff --git a/src/duckdb/src/include/duckdb/planner/query_node/bound_set_operation_node.hpp b/src/duckdb/src/include/duckdb/planner/query_node/bound_set_operation_node.hpp index e3ce0457e..01fa37caf 100644 --- a/src/duckdb/src/include/duckdb/planner/query_node/bound_set_operation_node.hpp +++ b/src/duckdb/src/include/duckdb/planner/query_node/bound_set_operation_node.hpp @@ -14,6 +14,13 @@ namespace duckdb { +struct BoundSetOpChild { + unique_ptr node; + shared_ptr binder; + //! Exprs used by the UNION BY NAME operations to add a new projection + vector> reorder_expressions; +}; + //! Bound equivalent of SetOperationNode class BoundSetOperationNode : public BoundQueryNode { public: @@ -27,27 +34,11 @@ class BoundSetOperationNode : public BoundQueryNode { SetOperationType setop_type = SetOperationType::NONE; //! whether the ALL modifier was used or not bool setop_all = false; - //! The left side of the set operation - unique_ptr left; - //! The right side of the set operation - unique_ptr right; + //! The bound children + vector bound_children; //! Index used by the set operation idx_t setop_index; - //! The binder used by the left side of the set operation - shared_ptr left_binder; - //! The binder used by the right side of the set operation - shared_ptr right_binder; - - //! Exprs used by the UNION BY NAME opeartons to add a new projection - vector> left_reorder_exprs; - vector> right_reorder_exprs; - - //! The exprs of the child node may be rearranged(UNION BY NAME), - //! this vector records the new index of the expression after rearrangement - //! used by GatherAlias(...) function to create new reorder index - vector left_reorder_idx; - vector right_reorder_idx; public: idx_t GetRootIndex() override { diff --git a/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp b/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp index e79a35b64..781402fbe 100644 --- a/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +++ b/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp @@ -18,14 +18,13 @@ class BoundCTERef : public BoundTableRef { static constexpr const TableReferenceType TYPE = TableReferenceType::CTE; public: - BoundCTERef(idx_t bind_index, idx_t cte_index, CTEMaterialize materialized_cte) - : BoundTableRef(TableReferenceType::CTE), bind_index(bind_index), cte_index(cte_index), - materialized_cte(materialized_cte) { + BoundCTERef(idx_t bind_index, idx_t cte_index) + : BoundTableRef(TableReferenceType::CTE), bind_index(bind_index), cte_index(cte_index) { } - BoundCTERef(idx_t bind_index, idx_t cte_index, CTEMaterialize materialized_cte, bool is_recurring) + BoundCTERef(idx_t bind_index, idx_t cte_index, bool is_recurring) : BoundTableRef(TableReferenceType::CTE), bind_index(bind_index), cte_index(cte_index), - materialized_cte(materialized_cte), is_recurring(is_recurring) { + is_recurring(is_recurring) { } //! The set of columns bound to this base table reference vector bound_columns; @@ -35,8 +34,6 @@ class BoundCTERef : public BoundTableRef { idx_t bind_index; //! The index of the cte idx_t cte_index; - //! Is this a reference to a materialized CTE? - CTEMaterialize materialized_cte; //! Is this a reference to the recurring table of a CTE bool is_recurring = false; }; diff --git a/src/duckdb/src/include/duckdb/storage/block_manager.hpp b/src/duckdb/src/include/duckdb/storage/block_manager.hpp index bfea18bea..0fd9df675 100644 --- a/src/duckdb/src/include/duckdb/storage/block_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/block_manager.hpp @@ -82,6 +82,10 @@ class BlockManager { } //! Whether or not the attached database is in-memory virtual bool InMemory() = 0; + //! Whether or not to prefetch + virtual bool Prefetch() { + return false; + } //! Sync changes made to the block manager virtual void FileSync() = 0; diff --git a/src/duckdb/src/include/duckdb/storage/data_table.hpp b/src/duckdb/src/include/duckdb/storage/data_table.hpp index 1c947e279..bc8727a18 100644 --- a/src/duckdb/src/include/duckdb/storage/data_table.hpp +++ b/src/duckdb/src/include/duckdb/storage/data_table.hpp @@ -42,6 +42,7 @@ struct TableDeleteState; struct ConstraintState; struct TableUpdateState; enum class VerifyExistenceType : uint8_t; +struct OptimisticWriteCollection; enum class DataTableVersion { MAIN_TABLE, // this is the newest version of the table - it has not been altered or dropped @@ -124,12 +125,12 @@ class DataTable : public enable_shared_from_this { const vector> &bound_constraints, optional_ptr> column_ids); //! Merge a row group collection into the transaction-local storage - void LocalMerge(ClientContext &context, RowGroupCollection &collection); + void LocalMerge(ClientContext &context, OptimisticWriteCollection &collection); //! Create an optimistic row group collection for this table. Used for optimistically writing parallel appends. //! Returns the index into the optimistic_collections vector for newly created collection. - PhysicalIndex CreateOptimisticCollection(ClientContext &context, unique_ptr collection); + PhysicalIndex CreateOptimisticCollection(ClientContext &context, unique_ptr collection); //! Returns the optimistic row group collection corresponding to the index. - RowGroupCollection &GetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index); + OptimisticWriteCollection &GetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index); //! Resets the optimistic row group collection corresponding to the index. void ResetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index); //! Returns the optimistic writer of the corresponding local table. @@ -258,6 +259,7 @@ class DataTable : public enable_shared_from_this { void VacuumIndexes(); void VerifyIndexBuffers(); void CleanupAppend(transaction_t lowest_transaction, idx_t start, idx_t count); + void Destroy(); string GetTableName() const; void SetTableName(string new_name); diff --git a/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp b/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp index 6abfb5d5b..cd63a96b8 100644 --- a/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp @@ -92,17 +92,19 @@ class MetadataManager { protected: BlockManager &block_manager; BufferManager &buffer_manager; + mutable mutex block_lock; unordered_map blocks; unordered_map modified_blocks; protected: - block_id_t AllocateNewBlock(); - block_id_t PeekNextBlockId(); - block_id_t GetNextBlockId(); - - void AddBlock(MetadataBlock new_block, bool if_exists = false); - void AddAndRegisterBlock(MetadataBlock block); - void ConvertToTransient(MetadataBlock &block); + block_id_t AllocateNewBlock(unique_lock &block_lock); + block_id_t PeekNextBlockId() const; + block_id_t GetNextBlockId() const; + + void AddBlock(unique_lock &block_lock, MetadataBlock new_block, bool if_exists = false); + void AddAndRegisterBlock(unique_lock &block_lock, MetadataBlock block); + void ConvertToTransient(unique_lock &block_lock, MetadataBlock &block); + MetadataPointer FromDiskPointerInternal(unique_lock &block_lock, MetaBlockPointer pointer); }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp b/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp index aa855e0f1..1ded8bba6 100644 --- a/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp +++ b/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp @@ -13,20 +13,29 @@ namespace duckdb { class PartialBlockManager; +struct OptimisticWriteCollection { + shared_ptr collection; + idx_t last_flushed = 0; + idx_t complete_row_groups = 0; +}; + class OptimisticDataWriter { public: OptimisticDataWriter(ClientContext &context, DataTable &table); OptimisticDataWriter(DataTable &table, OptimisticDataWriter &parent); ~OptimisticDataWriter(); + //! Creates a collection to write to + static unique_ptr CreateCollection(DataTable &storage, + const vector &insert_types); //! Write a new row group to disk (if possible) - void WriteNewRowGroup(RowGroupCollection &row_groups); + void WriteNewRowGroup(OptimisticWriteCollection &row_groups); //! Write the last row group of a collection to disk - void WriteLastRowGroup(RowGroupCollection &row_groups); + void WriteLastRowGroup(OptimisticWriteCollection &row_groups); //! Final flush of the optimistic writer - fully flushes the partial block manager void FinalFlush(); //! Flushes a specific row group to disk - void FlushToDisk(RowGroup &row_group); + void FlushToDisk(const vector> &row_groups); //! Merge the partially written blocks from one optimistic writer into another void Merge(OptimisticDataWriter &other); //! Rollback diff --git a/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp b/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp index d58349123..ae7d74fd3 100644 --- a/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp @@ -115,6 +115,8 @@ class SingleFileBlockManager : public BlockManager { idx_t FreeBlocks() override; //! Whether or not the attached database is a remote file bool IsRemote() override; + //! Whether or not to prefetch + bool Prefetch() override; //! Return the checkpoint iteration of the file. uint64_t GetCheckpointIteration() const { diff --git a/src/duckdb/src/include/duckdb/storage/storage_manager.hpp b/src/duckdb/src/include/duckdb/storage/storage_manager.hpp index 0a080a1d6..c96a76ff7 100644 --- a/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/storage_manager.hpp @@ -104,6 +104,7 @@ class StorageManager { virtual vector GetMetadataInfo() = 0; virtual shared_ptr GetTableIOManager(BoundCreateTableInfo *info) = 0; virtual BlockManager &GetBlockManager() = 0; + virtual void Destroy(); void SetStorageVersion(idx_t version) { storage_version = version; @@ -193,6 +194,7 @@ class SingleFileStorageManager : public StorageManager { vector GetMetadataInfo() override; shared_ptr GetTableIOManager(BoundCreateTableInfo *info) override; BlockManager &GetBlockManager() override; + void Destroy() override; protected: void LoadDatabase(QueryContext context) override; diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp index 403646793..242e19121 100644 --- a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp @@ -149,6 +149,8 @@ class RowGroup : public SegmentBase { //! Delete the given set of rows in the version manager idx_t Delete(TransactionData transaction, DataTable &table, row_t *row_ids, idx_t count); + static vector WriteToDisk(RowGroupWriteInfo &info, + const vector> &row_groups); RowGroupWriteData WriteToDisk(RowGroupWriteInfo &info); //! Returns the number of committed rows (count - committed deletes) idx_t GetCommittedRowCount(); diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp index 7e5ae568a..32808ff4c 100644 --- a/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp @@ -57,8 +57,9 @@ class RowGroupCollection { void AppendRowGroup(SegmentLock &l, idx_t start_row); //! Get the nth row-group, negative numbers start from the back (so -1 is the last row group, etc) - RowGroup *GetRowGroup(int64_t index); + optional_ptr GetRowGroup(int64_t index); void Verify(); + void Destroy(); void InitializeScan(CollectionScanState &state, const vector &column_ids, optional_ptr table_filters); diff --git a/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp b/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp index a1c203db2..f427a5275 100644 --- a/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp @@ -76,6 +76,9 @@ class SegmentTree { auto l = Lock(); return ReferenceSegments(l); } + vector> &ReferenceLoadedSegmentsMutable(SegmentLock &l) { + return nodes; + } const vector> &ReferenceLoadedSegments(SegmentLock &l) const { return nodes; } diff --git a/src/duckdb/src/include/duckdb/transaction/local_storage.hpp b/src/duckdb/src/include/duckdb/transaction/local_storage.hpp index 1e043a3b0..5d29da46c 100644 --- a/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +++ b/src/duckdb/src/include/duckdb/transaction/local_storage.hpp @@ -44,7 +44,7 @@ class LocalTableStorage : public enable_shared_from_this { Allocator &allocator; //! The main row group collection. - shared_ptr row_groups; + unique_ptr row_groups; //! The set of unique append indexes. TableIndexList append_indexes; //! The set of delete indexes. @@ -55,7 +55,7 @@ class LocalTableStorage : public enable_shared_from_this { idx_t deleted_rows; //! The optimistic row group collections associated with this table. - vector> optimistic_collections; + vector> optimistic_collections; //! The main optimistic data writer associated with this table. OptimisticDataWriter optimistic_writer; @@ -79,14 +79,16 @@ class LocalTableStorage : public enable_shared_from_this { //! Create an optimistic row group collection for this table. //! Returns the index into the optimistic_collections vector for newly created collection. - PhysicalIndex CreateOptimisticCollection(unique_ptr collection); + PhysicalIndex CreateOptimisticCollection(unique_ptr collection); //! Returns the optimistic row group collection corresponding to the index. - RowGroupCollection &GetOptimisticCollection(const PhysicalIndex collection_index); + OptimisticWriteCollection &GetOptimisticCollection(const PhysicalIndex collection_index); //! Resets the optimistic row group collection corresponding to the index. void ResetOptimisticCollection(const PhysicalIndex collection_index); //! Returns the optimistic writer. OptimisticDataWriter &GetOptimisticWriter(); + RowGroupCollection &GetCollection(); + private: mutex collections_lock; }; @@ -141,12 +143,12 @@ class LocalStorage { //! Finish appending to the local storage static void FinalizeAppend(LocalAppendState &state); //! Merge a row group collection into the transaction-local storage - void LocalMerge(DataTable &table, RowGroupCollection &collection); + void LocalMerge(DataTable &table, OptimisticWriteCollection &collection); //! Create an optimistic row group collection for this table. //! Returns the index into the optimistic_collections vector for newly created collection. - PhysicalIndex CreateOptimisticCollection(DataTable &table, unique_ptr collection); + PhysicalIndex CreateOptimisticCollection(DataTable &table, unique_ptr collection); //! Returns the optimistic row group collection corresponding to the index. - RowGroupCollection &GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index); + OptimisticWriteCollection &GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index); //! Resets the optimistic row group collection corresponding to the index. void ResetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index); //! Returns the optimistic writer. diff --git a/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp b/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp index 794015089..71693ee14 100644 --- a/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +++ b/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp @@ -76,6 +76,7 @@ class MetaTransaction { return all_transactions; } optional_ptr GetReferencedDatabase(const string &name); + shared_ptr GetReferencedDatabaseOwning(const string &name); AttachedDatabase &UseDatabase(shared_ptr &database); void DetachDatabase(AttachedDatabase &database); diff --git a/src/duckdb/src/include/duckdb_extension.h b/src/duckdb/src/include/duckdb_extension.h index 2d89b4b96..7c5136059 100644 --- a/src/duckdb/src/include/duckdb_extension.h +++ b/src/duckdb/src/include/duckdb_extension.h @@ -578,6 +578,28 @@ typedef struct { duckdb_value *out_value); #endif +// API to manage file system operations +#ifdef DUCKDB_EXTENSION_API_VERSION_UNSTABLE + duckdb_file_system (*duckdb_client_context_get_file_system)(duckdb_client_context context); + void (*duckdb_destroy_file_system)(duckdb_file_system *file_system); + duckdb_state (*duckdb_file_system_open)(duckdb_file_system file_system, const char *path, + duckdb_file_open_options options, duckdb_file_handle *out_file); + duckdb_error_data (*duckdb_file_system_error_data)(duckdb_file_system file_system); + duckdb_file_open_options (*duckdb_create_file_open_options)(); + duckdb_state (*duckdb_file_open_options_set_flag)(duckdb_file_open_options options, duckdb_file_flag flag, + bool value); + void (*duckdb_destroy_file_open_options)(duckdb_file_open_options *options); + void (*duckdb_destroy_file_handle)(duckdb_file_handle *file_handle); + duckdb_error_data (*duckdb_file_handle_error_data)(duckdb_file_handle file_handle); + duckdb_state (*duckdb_file_handle_close)(duckdb_file_handle file_handle); + int64_t (*duckdb_file_handle_read)(duckdb_file_handle file_handle, void *buffer, int64_t size); + int64_t (*duckdb_file_handle_write)(duckdb_file_handle file_handle, const void *buffer, int64_t size); + duckdb_state (*duckdb_file_handle_seek)(duckdb_file_handle file_handle, int64_t position); + int64_t (*duckdb_file_handle_tell)(duckdb_file_handle file_handle); + duckdb_state (*duckdb_file_handle_sync)(duckdb_file_handle file_handle); + int64_t (*duckdb_file_handle_size)(duckdb_file_handle file_handle); +#endif + // New functions around the client context #ifdef DUCKDB_EXTENSION_API_VERSION_UNSTABLE idx_t (*duckdb_client_context_get_connection_id)(duckdb_client_context context); @@ -1093,6 +1115,24 @@ typedef struct { #define duckdb_expression_is_foldable duckdb_ext_api.duckdb_expression_is_foldable #define duckdb_expression_fold duckdb_ext_api.duckdb_expression_fold +// Version unstable_new_file_system_api +#define duckdb_client_context_get_file_system duckdb_ext_api.duckdb_client_context_get_file_system +#define duckdb_destroy_file_system duckdb_ext_api.duckdb_destroy_file_system +#define duckdb_file_system_error_data duckdb_ext_api.duckdb_file_system_error_data +#define duckdb_file_system_open duckdb_ext_api.duckdb_file_system_open +#define duckdb_create_file_open_options duckdb_ext_api.duckdb_create_file_open_options +#define duckdb_file_open_options_set_flag duckdb_ext_api.duckdb_file_open_options_set_flag +#define duckdb_destroy_file_open_options duckdb_ext_api.duckdb_destroy_file_open_options +#define duckdb_destroy_file_handle duckdb_ext_api.duckdb_destroy_file_handle +#define duckdb_file_handle_error_data duckdb_ext_api.duckdb_file_handle_error_data +#define duckdb_file_handle_read duckdb_ext_api.duckdb_file_handle_read +#define duckdb_file_handle_write duckdb_ext_api.duckdb_file_handle_write +#define duckdb_file_handle_tell duckdb_ext_api.duckdb_file_handle_tell +#define duckdb_file_handle_size duckdb_ext_api.duckdb_file_handle_size +#define duckdb_file_handle_seek duckdb_ext_api.duckdb_file_handle_seek +#define duckdb_file_handle_sync duckdb_ext_api.duckdb_file_handle_sync +#define duckdb_file_handle_close duckdb_ext_api.duckdb_file_handle_close + // Version unstable_new_open_connect_functions #define duckdb_connection_get_client_context duckdb_ext_api.duckdb_connection_get_client_context #define duckdb_connection_get_arrow_options duckdb_ext_api.duckdb_connection_get_arrow_options diff --git a/src/duckdb/src/logging/log_types.cpp b/src/duckdb/src/logging/log_types.cpp index 6f2d4bacd..f78abae59 100644 --- a/src/duckdb/src/logging/log_types.cpp +++ b/src/duckdb/src/logging/log_types.cpp @@ -1,3 +1,4 @@ +#include "duckdb/main/attached_database.hpp" #include "duckdb/logging/file_system_logger.hpp" #include "duckdb/logging/log_type.hpp" #include "duckdb/common/file_opener.hpp" @@ -13,7 +14,18 @@ constexpr LogLevel FileSystemLogType::LEVEL; constexpr LogLevel QueryLogType::LEVEL; constexpr LogLevel HTTPLogType::LEVEL; constexpr LogLevel PhysicalOperatorLogType::LEVEL; +constexpr LogLevel CheckpointLogType::LEVEL; +//===--------------------------------------------------------------------===// +// QueryLogType +//===--------------------------------------------------------------------===// +string QueryLogType::ConstructLogMessage(const string &str) { + return str; +} + +//===--------------------------------------------------------------------===// +// FileSystemLogType +//===--------------------------------------------------------------------===// FileSystemLogType::FileSystemLogType() : LogType(NAME, LEVEL, GetLogType()) { } @@ -36,6 +48,9 @@ LogicalType FileSystemLogType::GetLogType() { return LogicalType::STRUCT(child_list); } +//===--------------------------------------------------------------------===// +// HTTPLogType +//===--------------------------------------------------------------------===// HTTPLogType::HTTPLogType() : LogType(NAME, LEVEL, GetLogType()) { } @@ -92,6 +107,9 @@ string HTTPLogType::ConstructLogMessage(BaseRequest &request, optional_ptr child_list = { + {"database", LogicalType::VARCHAR}, + {"schema", LogicalType::VARCHAR}, + {"table", LogicalType::VARCHAR}, + {"type", LogicalType::VARCHAR}, + {"info", LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR)}, + }; + return LogicalType::STRUCT(child_list); +} + +string CheckpointLogType::CreateLog(const AttachedDatabase &db, DataTableInfo &table, const char *op_name, + vector map_keys, vector map_values) { + child_list_t child_list = { + {"database", db.name}, + {"schema", table.GetSchemaName()}, + {"table", table.GetTableName()}, + {"type", op_name}, + {"info", Value::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR, std::move(map_keys), std::move(map_values))}, + }; + + return Value::STRUCT(std::move(child_list)).ToString(); +} + +string CheckpointLogType::ConstructLogMessage(const AttachedDatabase &db, DataTableInfo &table, idx_t segment_idx, + idx_t merge_count, idx_t target_count, idx_t merge_rows, + idx_t row_start) { + vector map_keys = {"segment_idx", "merge_count", "target_count", "merge_rows", "row_start"}; + vector map_values = {to_string(segment_idx), to_string(merge_count), to_string(target_count), + to_string(merge_rows), to_string(row_start)}; + return CreateLog(db, table, "vacuum", std::move(map_keys), std::move(map_values)); +} + +string CheckpointLogType::ConstructLogMessage(const AttachedDatabase &db, DataTableInfo &table, idx_t segment_idx, + RowGroup &row_group) { + vector map_keys = {"segment_idx", "start", "count"}; + vector map_values = {to_string(segment_idx), to_string(row_group.start), to_string(row_group.count.load())}; + return CreateLog(db, table, "checkpoint", std::move(map_keys), std::move(map_values)); +} } // namespace duckdb diff --git a/src/duckdb/src/main/attached_database.cpp b/src/duckdb/src/main/attached_database.cpp index 6a4180f09..e98070c18 100644 --- a/src/duckdb/src/main/attached_database.cpp +++ b/src/duckdb/src/main/attached_database.cpp @@ -99,6 +99,7 @@ AttachedDatabase::AttachedDatabase(DatabaseInstance &db, Catalog &catalog_p, str } else { type = AttachedDatabaseType::READ_WRITE_DATABASE; } + visibility = options.visibility; // We create the storage after the catalog to guarantee we allow extensions to instantiate the DuckCatalog. catalog = make_uniq(*this); stored_database_path = std::move(options.stored_database_path); @@ -116,6 +117,7 @@ AttachedDatabase::AttachedDatabase(DatabaseInstance &db, Catalog &catalog_p, Sto } else { type = AttachedDatabaseType::READ_WRITE_DATABASE; } + visibility = options.visibility; optional_ptr storage_info = storage_extension->storage_info.get(); catalog = storage_extension->attach(storage_info, context, *this, name, info, options); @@ -227,10 +229,9 @@ void AttachedDatabase::SetReadOnlyDatabase() { } void AttachedDatabase::OnDetach(ClientContext &context) { - if (!catalog) { - return; + if (catalog) { + catalog->OnDetach(context); } - catalog->OnDetach(context); } void AttachedDatabase::Close() { @@ -242,14 +243,21 @@ void AttachedDatabase::Close() { // shutting down: attempt to checkpoint the database // but only if we are not cleaning up as part of an exception unwind - if (!Exception::UncaughtException() && storage && !storage->InMemory() && !ValidChecker::IsInvalidated(db)) { - try { - auto &config = DBConfig::GetConfig(db); - if (config.options.checkpoint_on_shutdown) { - CheckpointOptions options; - options.wal_action = CheckpointWALAction::DELETE_WAL; - storage->CreateCheckpoint(QueryContext(), options); + if (!Exception::UncaughtException() && storage && !ValidChecker::IsInvalidated(db)) { + if (!storage->InMemory()) { + try { + auto &config = DBConfig::GetConfig(db); + if (config.options.checkpoint_on_shutdown) { + CheckpointOptions options; + options.wal_action = CheckpointWALAction::DELETE_WAL; + storage->CreateCheckpoint(QueryContext(), options); + } + } catch (...) { // NOLINT } + } + try { + // destroy the storage + storage->Destroy(); } catch (...) { // NOLINT } } diff --git a/src/duckdb/src/main/capi/file_system-c.cpp b/src/duckdb/src/main/capi/file_system-c.cpp new file mode 100644 index 000000000..af82daa6c --- /dev/null +++ b/src/duckdb/src/main/capi/file_system-c.cpp @@ -0,0 +1,274 @@ +#include "duckdb/main/capi/capi_internal.hpp" + +namespace duckdb { +namespace { +struct CFileSystem { + + FileSystem &fs; + ErrorData error_data; + + explicit CFileSystem(FileSystem &fs_p) : fs(fs_p) { + } + + void SetError(const char *message) { + error_data = ErrorData(ExceptionType::IO, message); + } + void SetError(const std::exception &ex) { + error_data = ErrorData(ex); + } +}; + +struct CFileOpenOptions { + duckdb::FileOpenFlags flags; +}; + +struct CFileHandle { + ErrorData error_data; + unique_ptr handle; + + void SetError(const char *message) { + error_data = ErrorData(ExceptionType::IO, message); + } + void SetError(const std::exception &ex) { + error_data = ErrorData(ex); + } +}; + +} // namespace +} // namespace duckdb + +duckdb_file_system duckdb_client_context_get_file_system(duckdb_client_context context) { + if (!context) { + return nullptr; + } + auto ctx = reinterpret_cast(context); + auto wrapper = new duckdb::CFileSystem(duckdb::FileSystem::GetFileSystem(ctx->context)); + return reinterpret_cast(wrapper); +} + +void duckdb_destroy_file_system(duckdb_file_system *file_system) { + if (!file_system || !*file_system) { + return; + } + const auto fs = reinterpret_cast(*file_system); + delete fs; + *file_system = nullptr; +} + +duckdb_file_open_options duckdb_create_file_open_options() { + auto options = new duckdb::CFileOpenOptions(); + return reinterpret_cast(options); +} + +duckdb_state duckdb_file_open_options_set_flag(duckdb_file_open_options options, duckdb_file_flag flag, bool value) { + if (!options) { + return DuckDBError; + } + auto coptions = reinterpret_cast(options); + + switch (flag) { + case DUCKDB_FILE_FLAG_READ: + coptions->flags |= duckdb::FileOpenFlags::FILE_FLAGS_READ; + break; + case DUCKDB_FILE_FLAG_WRITE: + coptions->flags |= duckdb::FileOpenFlags::FILE_FLAGS_WRITE; + break; + case DUCKDB_FILE_FLAG_APPEND: + coptions->flags |= duckdb::FileOpenFlags::FILE_FLAGS_APPEND; + break; + case DUCKDB_FILE_FLAG_CREATE: + coptions->flags |= duckdb::FileOpenFlags::FILE_FLAGS_FILE_CREATE; + break; + case DUCKDB_FILE_FLAG_CREATE_NEW: + coptions->flags |= duckdb::FileOpenFlags::FILE_FLAGS_EXCLUSIVE_CREATE; + break; + default: + return DuckDBError; + } + return DuckDBSuccess; +} + +void duckdb_destroy_file_open_options(duckdb_file_open_options *options) { + if (!options || !*options) { + return; + } + auto coptions = reinterpret_cast(*options); + delete coptions; + *options = nullptr; +} + +duckdb_state duckdb_file_system_open(duckdb_file_system fs, const char *path, duckdb_file_open_options options, + duckdb_file_handle *out_file) { + if (!fs) { + *out_file = nullptr; + return DuckDBError; + } + auto cfs = reinterpret_cast(fs); + if (!path || !options || !out_file) { + cfs->SetError("Invalid input to duckdb_file_system_open"); + *out_file = nullptr; + return DuckDBError; + } + + try { + auto coptions = reinterpret_cast(options); + auto handle = cfs->fs.OpenFile(duckdb::string(path), coptions->flags); + auto wrapper = new duckdb::CFileHandle(); + wrapper->handle = std::move(handle); + *out_file = reinterpret_cast(wrapper); + return DuckDBSuccess; + } catch (const std::exception &ex) { + cfs->SetError(ex); + *out_file = nullptr; + return DuckDBError; + } catch (...) { + cfs->SetError("Unknown error occurred during file open"); + *out_file = nullptr; + return DuckDBError; + } +} + +duckdb_error_data duckdb_file_system_error_data(duckdb_file_system fs) { + auto wrapper = new duckdb::ErrorDataWrapper(); + if (!fs) { + return reinterpret_cast(wrapper); + } + auto cfs = reinterpret_cast(fs); + wrapper->error_data = cfs->error_data; + return reinterpret_cast(wrapper); +} + +void duckdb_destroy_file_handle(duckdb_file_handle *file) { + if (!file || !*file) { + return; + } + auto cfile = reinterpret_cast(*file); + cfile->handle->Close(); // Ensure the file is closed before destroying + delete cfile; + *file = nullptr; +} + +duckdb_error_data duckdb_file_handle_error_data(duckdb_file_handle file) { + auto wrapper = new duckdb::ErrorDataWrapper(); + if (!file) { + return reinterpret_cast(wrapper); + } + auto cfile = reinterpret_cast(file); + wrapper->error_data = cfile->error_data; + return reinterpret_cast(wrapper); +} + +int64_t duckdb_file_handle_read(duckdb_file_handle file, void *buffer, int64_t size) { + if (!file || !buffer || size < 0) { + return -1; + } + auto cfile = reinterpret_cast(file); + try { + return cfile->handle->Read(buffer, static_cast(size)); + } catch (std::exception &ex) { + cfile->SetError(ex); + return -1; + } catch (...) { + cfile->SetError("Unknown error occurred during file read"); + return -1; + } +} + +int64_t duckdb_file_handle_write(duckdb_file_handle file, const void *buffer, int64_t size) { + if (!file || !buffer || size < 0) { + return -1; + } + auto cfile = reinterpret_cast(file); + try { + return cfile->handle->Write(const_cast(buffer), static_cast(size)); + } catch (std::exception &ex) { + cfile->SetError(ex); + return -1; + } catch (...) { + cfile->SetError("Unknown error occurred during file write"); + return -1; + } +} + +int64_t duckdb_file_handle_tell(duckdb_file_handle file) { + if (!file) { + return -1; + } + auto cfile = reinterpret_cast(file); + try { + return static_cast(cfile->handle->SeekPosition()); + } catch (std::exception &ex) { + cfile->SetError(ex); + return -1; + } catch (...) { + cfile->SetError("Unknown error occurred when getting file position"); + return -1; + } +} + +int64_t duckdb_file_handle_size(duckdb_file_handle file) { + if (!file) { + return -1; + } + auto cfile = reinterpret_cast(file); + try { + return static_cast(cfile->handle->GetFileSize()); + } catch (std::exception &ex) { + cfile->SetError(ex); + return -1; + } catch (...) { + cfile->SetError("Unknown error occurred when getting file size"); + return -1; + } +} + +duckdb_state duckdb_file_handle_seek(duckdb_file_handle file, int64_t position) { + if (!file || position < 0) { + return DuckDBError; + } + auto cfile = reinterpret_cast(file); + try { + cfile->handle->Seek(static_cast(position)); + return DuckDBSuccess; + } catch (std::exception &ex) { + cfile->SetError(ex); + return DuckDBError; + } catch (...) { + cfile->SetError("Unknown error occurred when seeking in file"); + return DuckDBError; + } +} + +duckdb_state duckdb_file_handle_sync(duckdb_file_handle file) { + if (!file) { + return DuckDBError; + } + auto cfile = reinterpret_cast(file); + try { + cfile->handle->Sync(); + return DuckDBSuccess; + } catch (std::exception &ex) { + cfile->SetError(ex); + return DuckDBError; + } catch (...) { + cfile->SetError("Unknown error occurred when syncing file"); + return DuckDBError; + } +} + +duckdb_state duckdb_file_handle_close(duckdb_file_handle file) { + if (!file) { + return DuckDBError; + } + auto cfile = reinterpret_cast(file); + try { + cfile->handle->Close(); + return DuckDBSuccess; + } catch (std::exception &ex) { + cfile->SetError(ex); + return DuckDBError; + } catch (...) { + cfile->SetError("Unknown error occurred when closing file"); + return DuckDBError; + } +} diff --git a/src/duckdb/src/main/client_context.cpp b/src/duckdb/src/main/client_context.cpp index 46bf97b82..f52fbabdd 100644 --- a/src/duckdb/src/main/client_context.cpp +++ b/src/duckdb/src/main/client_context.cpp @@ -217,15 +217,15 @@ void ClientContext::BeginQueryInternal(ClientContextLock &lock, const string &qu state->QueryBegin(*this); } - // Flush the old Logger + // Flush the old logger. logger->Flush(); - // Refresh the logger to ensure we are in sync with global log settings - LoggingContext context(LogContextScope::CONNECTION); - context.connection_id = connection_id; - context.transaction_id = transaction.ActiveTransaction().global_transaction_id; - context.query_id = transaction.GetActiveQuery(); - logger = db->GetLogManager().CreateLogger(context, true); + // Refresh the logger to ensure we are in sync with the global log settings. + LoggingContext logging_context(LogContextScope::CONNECTION); + logging_context.connection_id = connection_id; + logging_context.transaction_id = transaction.ActiveTransaction().global_transaction_id; + logging_context.query_id = transaction.GetActiveQuery(); + logger = db->GetLogManager().CreateLogger(logging_context, true); DUCKDB_LOG(*this, QueryLogType, query); } @@ -889,6 +889,10 @@ unique_ptr ClientContext::PendingStatementOrPreparedStatemen shared_ptr &prepared, const PendingQueryParameters ¶meters) { unique_ptr pending; + // Start the profiler. + auto &profiler = QueryProfiler::Get(*this); + profiler.StartQuery(query, IsExplainAnalyze(statement ? statement.get() : prepared->unbound_statement.get())); + try { BeginQueryInternal(lock, query); } catch (std::exception &ex) { @@ -900,9 +904,6 @@ unique_ptr ClientContext::PendingStatementOrPreparedStatemen } return ErrorResult(std::move(error), query); } - // start the profiler - auto &profiler = QueryProfiler::Get(*this); - profiler.StartQuery(query, IsExplainAnalyze(statement ? statement.get() : prepared->unbound_statement.get())); bool invalidate_query = true; try { @@ -1406,15 +1407,7 @@ unique_ptr ClientContext::Execute(const shared_ptr &relat return ErrorResult(ErrorData(err_str)); } -SettingLookupResult ClientContext::TryGetCurrentSetting(const std::string &key, Value &result) const { - // first check the built-in settings - auto &db_config = DBConfig::GetConfig(*this); - auto option = db_config.GetOptionByName(key); - if (option && option->get_setting) { - result = option->get_setting(*this); - return SettingLookupResult(SettingScope::LOCAL); - } - +SettingLookupResult ClientContext::TryGetCurrentSettingInternal(const string &key, Value &result) const { // check the client session values const auto &session_config_map = config.set_variables; @@ -1428,6 +1421,21 @@ SettingLookupResult ClientContext::TryGetCurrentSetting(const std::string &key, return db->TryGetCurrentSetting(key, result); } +SettingLookupResult ClientContext::TryGetCurrentSetting(const string &key, Value &result) const { + // first check the built-in settings + auto &db_config = DBConfig::GetConfig(*this); + auto option = db_config.GetOptionByName(key); + if (option) { + if (option->get_setting) { + result = option->get_setting(*this); + return SettingLookupResult(SettingScope::LOCAL); + } + // alias - search for the default key + return TryGetCurrentSettingInternal(option->name, result); + } + return TryGetCurrentSettingInternal(key, result); +} + ParserOptions ClientContext::GetParserOptions() const { auto &client_config = ClientConfig::GetConfig(*this); ParserOptions options; diff --git a/src/duckdb/src/main/config.cpp b/src/duckdb/src/main/config.cpp index 57539ab2e..78b174902 100644 --- a/src/duckdb/src/main/config.cpp +++ b/src/duckdb/src/main/config.cpp @@ -168,21 +168,23 @@ static const ConfigurationOption internal_options[] = { DUCKDB_LOCAL(SchemaSetting), DUCKDB_LOCAL(SearchPathSetting), DUCKDB_GLOBAL(SecretDirectorySetting), + DUCKDB_SETTING_CALLBACK(StorageBlockPrefetchSetting), DUCKDB_GLOBAL(StorageCompatibilityVersionSetting), DUCKDB_LOCAL(StreamingBufferSizeSetting), DUCKDB_GLOBAL(TempDirectorySetting), DUCKDB_GLOBAL(TempFileEncryptionSetting), DUCKDB_GLOBAL(ThreadsSetting), DUCKDB_GLOBAL(UsernameSetting), + DUCKDB_SETTING(WriteBufferRowGroupCountSetting), DUCKDB_GLOBAL(ZstdMinStringLengthSetting), FINAL_SETTING}; static const ConfigurationAlias setting_aliases[] = {DUCKDB_SETTING_ALIAS("memory_limit", 83), DUCKDB_SETTING_ALIAS("null_order", 33), DUCKDB_SETTING_ALIAS("profiling_output", 102), - DUCKDB_SETTING_ALIAS("user", 116), + DUCKDB_SETTING_ALIAS("user", 117), DUCKDB_SETTING_ALIAS("wal_autocheckpoint", 20), - DUCKDB_SETTING_ALIAS("worker_threads", 115), + DUCKDB_SETTING_ALIAS("worker_threads", 116), FINAL_ALIAS}; vector DBConfig::GetOptions() { diff --git a/src/duckdb/src/main/database.cpp b/src/duckdb/src/main/database.cpp index 419d3304b..3d644d408 100644 --- a/src/duckdb/src/main/database.cpp +++ b/src/duckdb/src/main/database.cpp @@ -199,10 +199,8 @@ void DatabaseInstance::CreateMainDatabase() { Connection con(*this); con.BeginTransaction(); AttachOptions options(config.options); - auto initial_database = db_manager->AttachDatabase(*con.context, info, options); - initial_database->SetInitialDatabase(); - initial_database->Initialize(*con.context); - db_manager->FinalizeAttach(*con.context, info, std::move(initial_database)); + options.is_main_database = true; + db_manager->AttachDatabase(*con.context, info, options); con.Commit(); } diff --git a/src/duckdb/src/main/database_file_path_manager.cpp b/src/duckdb/src/main/database_file_path_manager.cpp index 1c553c614..05adeadfe 100644 --- a/src/duckdb/src/main/database_file_path_manager.cpp +++ b/src/duckdb/src/main/database_file_path_manager.cpp @@ -7,7 +7,7 @@ namespace duckdb { idx_t DatabaseFilePathManager::ApproxDatabaseCount() const { lock_guard path_lock(db_paths_lock); - return db_paths_to_name.size(); + return db_paths.size(); } InsertDatabasePathResult DatabaseFilePathManager::InsertDatabasePath(const string &path, const string &name, @@ -18,14 +18,15 @@ InsertDatabasePathResult DatabaseFilePathManager::InsertDatabasePath(const strin } lock_guard path_lock(db_paths_lock); - auto entry = db_paths_to_name.emplace(path, name); + auto entry = db_paths.emplace(path, DatabasePathInfo(name)); if (!entry.second) { - if (on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT && entry.first->second == name) { + auto &existing = entry.first->second; + if (on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT && existing.name == name) { return InsertDatabasePathResult::ALREADY_EXISTS; } throw BinderException("Unique file handle conflict: Cannot attach \"%s\" - the database file \"%s\" is already " "attached by database \"%s\"", - name, path, entry.first->second); + name, path, existing.name); } options.stored_database_path = make_uniq(*this, path, name); return InsertDatabasePathResult::SUCCESS; @@ -36,7 +37,7 @@ void DatabaseFilePathManager::EraseDatabasePath(const string &path) { return; } lock_guard path_lock(db_paths_lock); - db_paths_to_name.erase(path); + db_paths.erase(path); } } // namespace duckdb diff --git a/src/duckdb/src/main/database_manager.cpp b/src/duckdb/src/main/database_manager.cpp index df70d365a..ae0a6447d 100644 --- a/src/duckdb/src/main/database_manager.cpp +++ b/src/duckdb/src/main/database_manager.cpp @@ -84,22 +84,30 @@ shared_ptr DatabaseManager::GetDatabaseInternal(const lock_gua shared_ptr DatabaseManager::AttachDatabase(ClientContext &context, AttachInfo &info, AttachOptions &options) { - auto &config = DBConfig::GetConfig(context); if (options.db_type.empty() || StringUtil::CIEquals(options.db_type, "duckdb")) { while (InsertDatabasePath(info, options) == InsertDatabasePathResult::ALREADY_EXISTS) { // database with this name and path already exists + // first check if it exists within this transaction + auto &meta_transaction = MetaTransaction::Get(context); + auto existing_db = meta_transaction.GetReferencedDatabaseOwning(info.name); + if (existing_db) { + // it does! return it + return existing_db; + } // ... but it might not be done attaching yet! // verify the database has actually finished attaching prior to returning lock_guard guard(databases_lock); - if (databases.find(info.name) != databases.end()) { - // database ACTUALLY exists - return - return nullptr; + auto entry = databases.find(info.name); + if (entry != databases.end()) { + // database ACTUALLY exists - return it + return entry->second; } if (context.interrupted) { throw InterruptException(); } } } + auto &config = DBConfig::GetConfig(context); GetDatabaseType(context, info, config, options); if (!options.db_type.empty()) { // we only need to prevent duplicate opening of DuckDB files @@ -126,6 +134,20 @@ shared_ptr DatabaseManager::AttachDatabase(ClientContext &cont // now create the attached database auto &db = DatabaseInstance::GetDatabase(context); auto attached_db = db.CreateAttachedDatabase(context, info, options); + + //! Initialize the database. + if (options.is_main_database) { + attached_db->SetInitialDatabase(); + attached_db->Initialize(context); + } else { + attached_db->Initialize(context); + if (!options.default_table.name.empty()) { + attached_db->GetCatalog().SetDefaultTable(options.default_table.schema, options.default_table.name); + } + attached_db->FinalizeLoad(context); + } + + FinalizeAttach(context, info, attached_db); return attached_db; } diff --git a/src/duckdb/src/main/relation/setop_relation.cpp b/src/duckdb/src/main/relation/setop_relation.cpp index a0253df37..9a24fdd16 100644 --- a/src/duckdb/src/main/relation/setop_relation.cpp +++ b/src/duckdb/src/main/relation/setop_relation.cpp @@ -20,8 +20,8 @@ unique_ptr SetOpRelation::GetQueryNode() { if (!setop_all) { result->modifiers.push_back(make_uniq()); } - result->left = left->GetQueryNode(); - result->right = right->GetQueryNode(); + result->children.push_back(left->GetQueryNode()); + result->children.push_back(right->GetQueryNode()); result->setop_type = setop_type; result->setop_all = setop_all; return std::move(result); diff --git a/src/duckdb/src/main/settings/autogenerated_settings.cpp b/src/duckdb/src/main/settings/autogenerated_settings.cpp index acfbf1083..96c3065f2 100644 --- a/src/duckdb/src/main/settings/autogenerated_settings.cpp +++ b/src/duckdb/src/main/settings/autogenerated_settings.cpp @@ -528,6 +528,13 @@ Value SchedulerProcessPartialSetting::GetSetting(const ClientContext &context) { return Value::BOOLEAN(config.options.scheduler_process_partial); } +//===----------------------------------------------------------------------===// +// Storage Block Prefetch +//===----------------------------------------------------------------------===// +void StorageBlockPrefetchSetting::OnSet(SettingCallbackInfo &info, Value ¶meter) { + EnumUtil::FromString(StringValue::Get(parameter)); +} + //===----------------------------------------------------------------------===// // Zstd Min String Length //===----------------------------------------------------------------------===// diff --git a/src/duckdb/src/optimizer/filter_pushdown.cpp b/src/duckdb/src/optimizer/filter_pushdown.cpp index dcb79fe60..c4f7bb04b 100644 --- a/src/duckdb/src/optimizer/filter_pushdown.cpp +++ b/src/duckdb/src/optimizer/filter_pushdown.cpp @@ -7,6 +7,7 @@ #include "duckdb/planner/operator/logical_filter.hpp" #include "duckdb/planner/operator/logical_join.hpp" #include "duckdb/planner/operator/logical_projection.hpp" +#include "duckdb/planner/operator/logical_empty_result.hpp" #include "duckdb/planner/operator/logical_window.hpp" namespace duckdb { @@ -276,6 +277,50 @@ unique_ptr FilterPushdown::PushFinalFilters(unique_ptr FilterPushdown::FinishPushdown(unique_ptr op) { + if (op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) { + for (idx_t i = 0; i < filters.size(); i++) { + auto &f = *filters[i]; + for (auto &child : op->children) { + FilterPushdown pushdown(optimizer, convert_mark_joins); + + // check if filter bindings can be applied to the child bindings. + auto child_bindings = child->GetColumnBindings(); + unordered_set child_bindings_table; + for (auto &binding : child_bindings) { + child_bindings_table.insert(binding.table_index); + } + + // Check if ALL bindings of the filter are present in the child + bool should_push = true; + for (auto &binding : f.bindings) { + if (child_bindings_table.find(binding) == child_bindings_table.end()) { + should_push = false; + break; + } + } + + if (!should_push) { + continue; + } + + // copy the filter + auto filter_copy = f.filter->Copy(); + if (pushdown.AddFilter(std::move(filter_copy)) == FilterResult::UNSATISFIABLE) { + return make_uniq(std::move(op)); + } + + // push the filter into the child. + pushdown.GenerateFilters(); + child = pushdown.Rewrite(std::move(child)); + + // Don't push same filter again + filters.erase_at(i); + i--; + break; + } + } + } + // unhandled type, first perform filter pushdown in its children for (auto &child : op->children) { FilterPushdown pushdown(optimizer, convert_mark_joins); diff --git a/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp b/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp index 0b9b6888b..6cf8dfea5 100644 --- a/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +++ b/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp @@ -225,25 +225,31 @@ RelationStats RelationStatisticsHelper::CombineStatsOfReorderableOperator(vector } RelationStats RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(LogicalOperator &op, - vector child_stats) { - D_ASSERT(child_stats.size() == 2); + const vector &child_stats) { RelationStats ret; - idx_t child_1_card = child_stats[0].stats_initialized ? child_stats[0].cardinality : 0; - idx_t child_2_card = child_stats[1].stats_initialized ? child_stats[1].cardinality : 0; - ret.cardinality = MaxValue(child_1_card, child_2_card); + ret.cardinality = 0; + + // default predicted cardinality is the max of all child cardinalities + vector child_cardinalities; + for (auto &stats : child_stats) { + idx_t child_cardinality = stats.stats_initialized ? stats.cardinality : 0; + ret.cardinality = MaxValue(ret.cardinality, child_cardinality); + child_cardinalities.push_back(child_cardinality); + } switch (op.type) { case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: { + D_ASSERT(child_stats.size() == 2); auto &join = op.Cast(); switch (join.join_type) { case JoinType::RIGHT_ANTI: case JoinType::RIGHT_SEMI: - ret.cardinality = child_2_card; + ret.cardinality = child_cardinalities[1]; break; case JoinType::ANTI: case JoinType::SEMI: case JoinType::SINGLE: case JoinType::MARK: - ret.cardinality = child_1_card; + ret.cardinality = child_cardinalities[0]; break; default: break; @@ -254,18 +260,21 @@ RelationStats RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(Log auto &setop = op.Cast(); if (setop.setop_all) { // setop returns all records - ret.cardinality = child_1_card + child_2_card; - } else { - ret.cardinality = MaxValue(child_1_card, child_2_card); + ret.cardinality = 0; + for (auto &child_cardinality : child_cardinalities) { + ret.cardinality += child_cardinality; + } } break; } case LogicalOperatorType::LOGICAL_INTERSECT: { - ret.cardinality = MinValue(child_1_card, child_2_card); + D_ASSERT(child_stats.size() == 2); + ret.cardinality = MinValue(child_cardinalities[0], child_cardinalities[1]); break; } case LogicalOperatorType::LOGICAL_EXCEPT: { - ret.cardinality = child_1_card; + D_ASSERT(child_stats.size() == 2); + ret.cardinality = child_cardinalities[0]; break; } default: @@ -274,8 +283,12 @@ RelationStats RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(Log ret.stats_initialized = true; ret.filter_strength = 1; - ret.table_name = child_stats[0].table_name + " joined with " + child_stats[1].table_name; + ret.table_name = string(); for (auto &stats : child_stats) { + if (!ret.table_name.empty()) { + ret.table_name += " joined with "; + } + ret.table_name += stats.table_name; // MARK joins are nonreorderable. They won't return initialized stats // continue in this case. if (!stats.stats_initialized) { diff --git a/src/duckdb/src/optimizer/late_materialization.cpp b/src/duckdb/src/optimizer/late_materialization.cpp index a144df188..4e5b0f13e 100644 --- a/src/duckdb/src/optimizer/late_materialization.cpp +++ b/src/duckdb/src/optimizer/late_materialization.cpp @@ -62,6 +62,10 @@ unique_ptr LateMaterialization::ConstructLHS(LogicalGet &get) { get.names, get.virtual_columns); new_get->GetMutableColumnIds() = get.GetColumnIds(); new_get->projection_ids = get.projection_ids; + new_get->parameters = get.parameters; + new_get->named_parameters = get.named_parameters; + new_get->input_table_types = get.input_table_types; + new_get->input_table_names = get.input_table_names; return new_get; } diff --git a/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp b/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp index c0f8aa143..e56abdcd2 100644 --- a/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +++ b/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp @@ -29,54 +29,53 @@ unique_ptr FilterPushdown::PushdownSetOperation(unique_ptrtype == LogicalOperatorType::LOGICAL_INTERSECT); auto &setop = op->Cast(); - D_ASSERT(op->children.size() == 2); - auto left_bindings = op->children[0]->GetColumnBindings(); - auto right_bindings = op->children[1]->GetColumnBindings(); - if (left_bindings.size() != right_bindings.size()) { - throw InternalException("Filter pushdown - set operation LHS and RHS have incompatible counts"); - } - - // pushdown into set operation, we can duplicate the condition and pushdown the expressions into both sides - FilterPushdown left_pushdown(optimizer, convert_mark_joins), right_pushdown(optimizer, convert_mark_joins); - for (idx_t i = 0; i < filters.size(); i++) { - // first create a copy of the filter - auto right_filter = make_uniq(); - right_filter->filter = filters[i]->filter->Copy(); + for (auto &child : op->children) { + auto child_bindings = child->GetColumnBindings(); - // in the original filter, rewrite references to the result of the union into references to the left_index - ReplaceSetOpBindings(left_bindings, *filters[i], filters[i]->filter, setop); - // in the copied filter, rewrite references to the result of the union into references to the right_index - ReplaceSetOpBindings(right_bindings, *right_filter, right_filter->filter, setop); + FilterPushdown child_pushdown(optimizer, convert_mark_joins); + for (auto &original_filter : filters) { + // first create a copy of the filter + auto filter = make_uniq(); + filter->filter = original_filter->filter->Copy(); - // extract bindings again - filters[i]->ExtractBindings(); - right_filter->ExtractBindings(); + // rewrite references to the result of the union into references to the child index + ReplaceSetOpBindings(child_bindings, *filter, filter->filter, setop); - // move the filters into the child pushdown nodes - left_pushdown.filters.push_back(std::move(filters[i])); - right_pushdown.filters.push_back(std::move(right_filter)); - } + // extract bindings again + filter->ExtractBindings(); - op->children[0] = left_pushdown.Rewrite(std::move(op->children[0])); - op->children[1] = right_pushdown.Rewrite(std::move(op->children[1])); + // move the filters into the child pushdown nodes + child_pushdown.filters.push_back(std::move(filter)); + } - bool left_empty = op->children[0]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT; - bool right_empty = op->children[1]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT; - if (left_empty && right_empty) { - // both empty: return empty result + // pushdown into the child + child = child_pushdown.Rewrite(std::move(child)); + } + bool all_empty = true; + for (auto &child : op->children) { + if (child->type != LogicalOperatorType::LOGICAL_EMPTY_RESULT) { + all_empty = false; + } + } + if (all_empty) { + // all sides are empty: the result must be empty return make_uniq(std::move(op)); } + if (op->type == LogicalOperatorType::LOGICAL_UNION) { + // for UNION (ALL) - delete all empty children and return + for (idx_t i = 0; i < op->children.size(); i++) { + if (op->children[i]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT) { + op->children.erase(op->children.begin() + static_cast(i)); + i--; + } + } + return op; + } + bool left_empty = op->children[0]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT; + bool right_empty = op->children[1]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT; if (left_empty && setop.setop_all) { // left child is empty result switch (op->type) { - case LogicalOperatorType::LOGICAL_UNION: - if (op->children[1]->type == LogicalOperatorType::LOGICAL_PROJECTION) { - // union with empty left side: return right child - auto &projection = op->children[1]->Cast(); - projection.table_index = setop.table_index; - return std::move(op->children[1]); - } - break; case LogicalOperatorType::LOGICAL_EXCEPT: // except: if left child is empty, return empty result case LogicalOperatorType::LOGICAL_INTERSECT: @@ -88,7 +87,6 @@ unique_ptr FilterPushdown::PushdownSetOperation(unique_ptrtype) { - case LogicalOperatorType::LOGICAL_UNION: case LogicalOperatorType::LOGICAL_EXCEPT: if (op->children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION) { // union or except with empty right child: return left child diff --git a/src/duckdb/src/optimizer/rule/comparison_simplification.cpp b/src/duckdb/src/optimizer/rule/comparison_simplification.cpp index 1e377528d..dc778cfff 100644 --- a/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +++ b/src/duckdb/src/optimizer/rule/comparison_simplification.cpp @@ -56,13 +56,8 @@ unique_ptr ComparisonSimplificationRule::Apply(LogicalOperator &op, // Is the constant cast invertible? if (!cast_constant.IsNull() && !BoundCastExpression::CastIsInvertible(cast_expression.return_type, target_type)) { - // Is it actually invertible? - Value uncast_constant; - if (!cast_constant.TryCastAs(rewriter.context, constant_value.type(), uncast_constant, &error_message, - true) || - uncast_constant != constant_value) { - return nullptr; - } + // Cast is not invertible, so we do not rewrite this expression to ensure that the cast is executed + return nullptr; } //! We can cast, now we change our column_ref_expression from an operator cast to a column reference @@ -75,6 +70,7 @@ unique_ptr ComparisonSimplificationRule::Apply(LogicalOperator &op, expr.left = std::move(new_constant_expr); expr.right = std::move(child_expression); } + changes_made = true; } return nullptr; } diff --git a/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp b/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp index 4c8c33ccd..5a0fb090b 100644 --- a/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +++ b/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp @@ -4,6 +4,9 @@ namespace duckdb { void StatisticsPropagator::AddCardinalities(unique_ptr &stats, NodeStatistics &new_stats) { + if (!stats) { + return; + } if (!stats->has_estimated_cardinality || !new_stats.has_estimated_cardinality || !stats->has_max_cardinality || !new_stats.has_max_cardinality) { stats = nullptr; @@ -24,8 +27,59 @@ void StatisticsPropagator::AddCardinalities(unique_ptr &stats, N } } +unique_ptr StatisticsPropagator::PropagateUnion(LogicalSetOperation &setop, + unique_ptr &node_ptr) { + // first propagate statistics in the child nodes + vector> stats; + for (auto &child : setop.children) { + stats.push_back(PropagateStatistics(child)); + } + + // now fetch the column bindings of the children both sides + vector> child_bindings; + for (auto &child : setop.children) { + child_bindings.push_back(child->GetColumnBindings()); + } + for (idx_t i = 0; i < setop.column_count; i++) { + // for each column binding, we fetch the statistics from both the lhs and the rhs + unique_ptr new_stats; + for (idx_t child_idx = 0; child_idx < setop.children.size(); child_idx++) { + auto stats_entry = statistics_map.find(child_bindings[child_idx][i]); + if (stats_entry == statistics_map.end()) { + new_stats.reset(); + break; + } + auto &child_stats = stats_entry->second; + if (!new_stats) { + new_stats = child_stats->ToUnique(); + } else { + new_stats->Merge(*child_stats); + } + } + if (!new_stats) { + // no statistics on at least one of the sides: can't propagate stats + continue; + } + // propagate the stats for this column + ColumnBinding binding(setop.table_index, i); + statistics_map[binding] = std::move(new_stats); + } + // merge all cardinalities of the child stats together + for (idx_t i = 1; i < stats.size(); i++) { + if (!stats[i]) { + return nullptr; + } + AddCardinalities(stats[0], *stats[i]); + } + return std::move(stats[0]); +} + unique_ptr StatisticsPropagator::PropagateStatistics(LogicalSetOperation &setop, unique_ptr &node_ptr) { + if (setop.type == LogicalOperatorType::LOGICAL_UNION) { + return PropagateUnion(setop, node_ptr); + } + D_ASSERT(setop.children.size() == 2); // first propagate statistics in the child nodes auto left_stats = PropagateStatistics(setop.children[0]); auto right_stats = PropagateStatistics(setop.children[1]); @@ -46,11 +100,6 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalSetO } unique_ptr new_stats; switch (setop.type) { - case LogicalOperatorType::LOGICAL_UNION: - // union: merge the stats of the LHS and RHS together - new_stats = left_entry->second->ToUnique(); - new_stats->Merge(*right_entry->second); - break; case LogicalOperatorType::LOGICAL_EXCEPT: // except: use the stats of the LHS new_stats = left_entry->second->ToUnique(); @@ -70,9 +119,6 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalSetO if (!left_stats || !right_stats) { return nullptr; } - if (setop.type == LogicalOperatorType::LOGICAL_UNION) { - AddCardinalities(left_stats, *right_stats); - } return left_stats; } diff --git a/src/duckdb/src/parser/parsed_expression_iterator.cpp b/src/duckdb/src/parser/parsed_expression_iterator.cpp index 24bfd8e8a..7ca38a10e 100644 --- a/src/duckdb/src/parser/parsed_expression_iterator.cpp +++ b/src/duckdb/src/parser/parsed_expression_iterator.cpp @@ -300,8 +300,9 @@ void ParsedExpressionIterator::EnumerateQueryNodeChildren( } case QueryNodeType::SET_OPERATION_NODE: { auto &setop_node = node.Cast(); - EnumerateQueryNodeChildren(*setop_node.left, expr_callback, ref_callback); - EnumerateQueryNodeChildren(*setop_node.right, expr_callback, ref_callback); + for (auto &child : setop_node.children) { + EnumerateQueryNodeChildren(*child, expr_callback, ref_callback); + } break; } default: diff --git a/src/duckdb/src/parser/parser.cpp b/src/duckdb/src/parser/parser.cpp index f931f0671..552b6e180 100644 --- a/src/duckdb/src/parser/parser.cpp +++ b/src/duckdb/src/parser/parser.cpp @@ -202,6 +202,20 @@ void Parser::ParseQuery(const string &query) { } } { + if (options.extensions) { + for (auto &ext : *options.extensions) { + if (!ext.parser_override) { + continue; + } + auto result = ext.parser_override(ext.parser_info.get(), query); + if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) { + statements = std::move(result.statements); + return; + } else if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) { + throw ParserException(result.error); + } + } + } PostgresParser::SetPreserveIdentifierCase(options.preserve_identifier_case); bool parsing_succeed = false; // Creating a new scope to prevent multiple PostgresParser destructors being called diff --git a/src/duckdb/src/parser/query_node/set_operation_node.cpp b/src/duckdb/src/parser/query_node/set_operation_node.cpp index 9a8168a45..a8b624f21 100644 --- a/src/duckdb/src/parser/query_node/set_operation_node.cpp +++ b/src/duckdb/src/parser/query_node/set_operation_node.cpp @@ -8,28 +8,34 @@ namespace duckdb { SetOperationNode::SetOperationNode() : QueryNode(QueryNodeType::SET_OPERATION_NODE) { } +const vector> &SetOperationNode::GetSelectList() const { + return children[0]->GetSelectList(); +} + string SetOperationNode::ToString() const { string result; result = cte_map.ToString(); - result += "(" + left->ToString() + ") "; + result += "(" + children[0]->ToString() + ") "; - switch (setop_type) { - case SetOperationType::UNION: - result += setop_all ? "UNION ALL" : "UNION"; - break; - case SetOperationType::UNION_BY_NAME: - result += setop_all ? "UNION ALL BY NAME" : "UNION BY NAME"; - break; - case SetOperationType::EXCEPT: - result += setop_all ? "EXCEPT ALL" : "EXCEPT"; - break; - case SetOperationType::INTERSECT: - result += setop_all ? "INTERSECT ALL" : "INTERSECT"; - break; - default: - throw InternalException("Unsupported set operation type"); + for (idx_t i = 1; i < children.size(); i++) { + switch (setop_type) { + case SetOperationType::UNION: + result += setop_all ? "UNION ALL" : "UNION"; + break; + case SetOperationType::UNION_BY_NAME: + result += setop_all ? "UNION ALL BY NAME" : "UNION BY NAME"; + break; + case SetOperationType::EXCEPT: + result += setop_all ? "EXCEPT ALL" : "EXCEPT"; + break; + case SetOperationType::INTERSECT: + result += setop_all ? "INTERSECT ALL" : "INTERSECT"; + break; + default: + throw InternalException("Unsupported set operation type"); + } + result += " (" + children[i]->ToString() + ")"; } - result += " (" + right->ToString() + ")"; return result + ResultModifiersToString(); } @@ -47,11 +53,13 @@ bool SetOperationNode::Equals(const QueryNode *other_p) const { if (setop_all != other.setop_all) { return false; } - if (!left->Equals(other.left.get())) { + if (children.size() != other.children.size()) { return false; } - if (!right->Equals(other.right.get())) { - return false; + for (idx_t i = 0; i < children.size(); i++) { + if (!children[i]->Equals(other.children[i].get())) { + return false; + } } return true; } @@ -60,56 +68,72 @@ unique_ptr SetOperationNode::Copy() const { auto result = make_uniq(); result->setop_type = setop_type; result->setop_all = setop_all; - result->left = left->Copy(); - result->right = right->Copy(); + for (auto &child : children) { + result->children.push_back(child->Copy()); + } this->CopyProperties(*result); return std::move(result); } SetOperationNode::SetOperationNode(SetOperationType setop_type, unique_ptr left, unique_ptr right, - vector> children, bool setop_all) + vector> children_p, bool setop_all) : QueryNode(QueryNodeType::SET_OPERATION_NODE), setop_type(setop_type), setop_all(setop_all) { - if (left && right) { - // simple case - left/right are supplied - this->left = std::move(left); - this->right = std::move(right); - return; + if (children_p.empty()) { + if (!left || !right) { + throw SerializationException("Error deserializing SetOperationNode - left/right or children must be set"); + } + children.push_back(std::move(left)); + children.push_back(std::move(right)); + } else { + if (left || right) { + throw SerializationException("Error deserializing SetOperationNode - left/right or children must be set"); + } + children = std::move(children_p); + } + if (children.size() < 2) { + throw SerializationException("SetOperationNode must have at least two children"); + } +} + +unique_ptr SetOperationNode::SerializeChildNode(Serializer &serializer, idx_t index) const { + if (SerializeChildList(serializer)) { + // serialize new version - we are serializing all children in the new "children" field + return nullptr; } - if (children.size() == 2) { - this->left = std::move(children[0]); - this->right = std::move(children[1]); + // backwards compatibility - we are targeting an older version + // we need to serialize two children - "left" and "right" + if (index == 0) { + // for the left child, just directly emit the first child + return children[0]->Copy(); } - // we have multiple children - we need to construct a tree of set operation nodes - if (children.size() <= 1) { - throw SerializationException("Set Operation requires at least 2 children"); + if (index != 1) { + throw InternalException("SerializeChildNode should have index 0 or 1"); } - if (setop_type != SetOperationType::UNION) { - throw SerializationException("Multiple children in set-operations are only supported for UNION"); + vector> nodes; + for (idx_t i = 1; i < children.size(); i++) { + nodes.push_back(children[i]->Copy()); } - // construct a balanced tree from the union - while (children.size() > 2) { + // for the right child we construct a new tree by generating the set operation over all of the nodes + // we construct a balanced tree to avoid + while (nodes.size() > 1) { vector> new_children; - for (idx_t i = 0; i < children.size(); i += 2) { - if (i + 1 == children.size()) { - new_children.push_back(std::move(children[i])); + for (idx_t i = 0; i < nodes.size(); i += 2) { + if (i + 1 == nodes.size()) { + new_children.push_back(std::move(nodes[i])); } else { vector> empty_children; - auto setop_node = - make_uniq(setop_type, std::move(children[i]), std::move(children[i + 1]), - std::move(empty_children), setop_all); + auto setop_node = make_uniq(setop_type, std::move(nodes[i]), std::move(nodes[i + 1]), + std::move(empty_children), setop_all); new_children.push_back(std::move(setop_node)); } } - children = std::move(new_children); + nodes = std::move(new_children); } - // two children left - fill in the left/right of this node - this->left = std::move(children[0]); - this->right = std::move(children[1]); + return std::move(nodes[0]); } -vector> SetOperationNode::SerializeChildNodes() const { - // we always serialize children as left/right currently - return vector>(); +bool SetOperationNode::SerializeChildList(Serializer &serializer) const { + return serializer.ShouldSerialize(6); } } // namespace duckdb diff --git a/src/duckdb/src/parser/transform/statement/transform_create_function.cpp b/src/duckdb/src/parser/transform/statement/transform_create_function.cpp index 6490e29e5..1525f634d 100644 --- a/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +++ b/src/duckdb/src/parser/transform/statement/transform_create_function.cpp @@ -47,6 +47,8 @@ unique_ptr Transformer::TransformMacroFunction(duckdb_libpgquery: default_expr = make_uniq(std::move(default_value)); default_expr->SetAlias(param.name); macro_func->default_parameters[param.name] = std::move(default_expr); + } else if (!macro_func->default_parameters.empty()) { + throw ParserException("Parameter without a default follows parameter with a default"); } } diff --git a/src/duckdb/src/parser/transform/statement/transform_select_node.cpp b/src/duckdb/src/parser/transform/statement/transform_select_node.cpp index 56ef6c71c..cb61ccd54 100644 --- a/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +++ b/src/duckdb/src/parser/transform/statement/transform_select_node.cpp @@ -51,6 +51,51 @@ void Transformer::TransformModifiers(duckdb_libpgquery::PGSelectStmt &stmt, Quer } } +bool Transformer::SetOperationsMatch(duckdb_libpgquery::PGSelectStmt &root, duckdb_libpgquery::PGNode &node) { + if (node.type != duckdb_libpgquery::T_PGSelectStmt) { + // not a select or set-op - set operations cannot match + return false; + } + auto &stmt = PGCast(node); + if (root.op != stmt.op || root.all != stmt.all) { + // set operation type does not match + return false; + } + if (root.op != duckdb_libpgquery::PG_SETOP_UNION && root.op != duckdb_libpgquery::PG_SETOP_UNION_BY_NAME) { + // only generate multi-child nodes for UNION/UNION ALL + return false; + } + // check if this is a "simple" set operation + if (stmt.withClause || stmt.sortClause || stmt.limitCount || stmt.limitOffset || stmt.sampleOptions) { + // it is not - we need to unfold it + return false; + } + return true; +} + +void Transformer::TransformSetOperationChildren(duckdb_libpgquery::PGSelectStmt &stmt, SetOperationNode &result) { + D_ASSERT(stmt.larg && stmt.rarg); + vector> set_operations; + set_operations.push_back(*stmt.larg); + set_operations.push_back(*stmt.rarg); + + for (idx_t i = 0; i < set_operations.size(); i++) { + auto &node = set_operations[i].get(); + // check if this set operation can be merged into the parents' set operation + if (!SetOperationsMatch(stmt, node)) { + // it cannot - transform the child + result.children.push_back(TransformSelectNode(node)); + } else { + // it can - recurse into children + // note that we must process the children in a specific order - so we need to expand the children in-place + auto &select = PGCast(node); + set_operations[i] = *select.larg; + set_operations.insert(set_operations.begin() + static_cast(i + 1), *select.rarg); + i--; + } + } +} + unique_ptr Transformer::TransformSelectInternal(duckdb_libpgquery::PGSelectStmt &stmt) { D_ASSERT(stmt.type == duckdb_libpgquery::T_PGSelectStmt); auto stack_checker = StackCheck(); @@ -129,13 +174,9 @@ unique_ptr Transformer::TransformSelectInternal(duckdb_libpgquery::PG node = make_uniq(); auto &result = node->Cast(); if (stmt.withClause) { - TransformCTE(*PGPointerCast(stmt.withClause), node->cte_map); - } - result.left = TransformSelectNode(*stmt.larg); - result.right = TransformSelectNode(*stmt.rarg); - if (!result.left || !result.right) { - throw InternalException("Failed to transform setop children."); + TransformCTE(*PGPointerCast(stmt.withClause), result.cte_map); } + TransformSetOperationChildren(stmt, result); result.setop_all = stmt.all; switch (stmt.op) { diff --git a/src/duckdb/src/parser/transformer.cpp b/src/duckdb/src/parser/transformer.cpp index d7025eecb..4ab39fca7 100644 --- a/src/duckdb/src/parser/transformer.cpp +++ b/src/duckdb/src/parser/transformer.cpp @@ -249,7 +249,6 @@ unique_ptr Transformer::TransformMaterializedCTE(unique_ptr node_result; node_result = std::move(materialized_ctes.back()); - node_result->cte_map = root->cte_map.Copy(); node_result->child = std::move(root); root = std::move(node_result); materialized_ctes.pop_back(); diff --git a/src/duckdb/src/planner/binder.cpp b/src/duckdb/src/planner/binder.cpp index 1246f5a25..2ba52b64f 100644 --- a/src/duckdb/src/planner/binder.cpp +++ b/src/duckdb/src/planner/binder.cpp @@ -200,7 +200,7 @@ BoundStatement Binder::Bind(SQLStatement &statement) { void Binder::AddCTEMap(CommonTableExpressionMap &cte_map) { for (auto &cte_it : cte_map.map) { - AddCTE(cte_it.first, *cte_it.second); + AddCTE(cte_it.first); } } @@ -343,22 +343,16 @@ unique_ptr Binder::CreatePlan(BoundTableRef &ref) { return root; } -void Binder::AddCTE(const string &name, CommonTableExpressionInfo &info) { +void Binder::AddCTE(const string &name) { D_ASSERT(!name.empty()); - auto entry = CTE_bindings.find(name); - if (entry != CTE_bindings.end()) { - throw InternalException("Duplicate CTE \"%s\" in query!", name); - } - CTE_bindings.insert(make_pair(name, reference(info))); + CTE_bindings.insert(name); } -vector> Binder::FindCTE(const string &name, bool skip) { - auto entry = CTE_bindings.find(name); - vector> ctes; - if (entry != CTE_bindings.end()) { - if (!skip || entry->second.get().query->node->type == QueryNodeType::RECURSIVE_CTE_NODE) { - ctes.push_back(entry->second); - } +vector> Binder::FindCTE(const string &name, bool skip) { + auto entry = bind_context.GetCTEBinding(name); + vector> ctes; + if (entry) { + ctes.push_back(*entry.get()); } if (parent && binder_type == BinderType::REGULAR_BINDER) { auto parent_ctes = parent->FindCTE(name, name == alias); @@ -367,12 +361,12 @@ vector> Binder::FindCTE(const string &name, return ctes; } -bool Binder::CTEIsAlreadyBound(CommonTableExpressionInfo &cte) { - if (bound_ctes.find(cte) != bound_ctes.end()) { +bool Binder::CTEExists(const string &name) { + if (CTE_bindings.find(name) != CTE_bindings.end()) { return true; } if (parent && binder_type == BinderType::REGULAR_BINDER) { - return parent->CTEIsAlreadyBound(cte); + return parent->CTEExists(name); } return false; } diff --git a/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp b/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp index 092b46ca6..2a7cf8346 100644 --- a/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +++ b/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp @@ -27,6 +27,8 @@ unique_ptr Binder::BindCTE(CTENode &statement) { result->materialized = statement.materialized; result->setop_index = GenerateTableIndex(); + AddCTE(result->ctename); + result->query_binder = Binder::CreateBinder(context, this); result->query = result->query_binder->BindNode(*statement.query); diff --git a/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp b/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp index 82d6c754b..50c6b3c06 100644 --- a/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +++ b/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp @@ -21,29 +21,37 @@ static void GatherAliases(BoundQueryNode &node, SelectBindState &bind_state, con // create new reorder index if (setop.setop_type == SetOperationType::UNION_BY_NAME) { - vector new_left_reorder_idx(setop.left_reorder_idx.size()); - vector new_right_reorder_idx(setop.right_reorder_idx.size()); - for (idx_t i = 0; i < setop.left_reorder_idx.size(); ++i) { - new_left_reorder_idx[i] = reorder_idx[setop.left_reorder_idx[i]]; - } - - for (idx_t i = 0; i < setop.right_reorder_idx.size(); ++i) { - new_right_reorder_idx[i] = reorder_idx[setop.right_reorder_idx[i]]; + // for UNION BY NAME - create a new re-order index + case_insensitive_map_t reorder_map; + for (idx_t col_idx = 0; col_idx < setop.names.size(); ++col_idx) { + reorder_map[setop.names[col_idx]] = reorder_idx[col_idx]; } // use new reorder index - GatherAliases(*setop.left, bind_state, new_left_reorder_idx); - GatherAliases(*setop.right, bind_state, new_right_reorder_idx); + for (auto &child : setop.bound_children) { + vector new_reorder_idx; + for (idx_t col_idx = 0; col_idx < child.node->names.size(); col_idx++) { + auto &col_name = child.node->names[col_idx]; + auto entry = reorder_map.find(col_name); + if (entry == reorder_map.end()) { + throw InternalException("SetOp - Column name not found in reorder_map in UNION BY NAME"); + } + new_reorder_idx.push_back(entry->second); + } + GatherAliases(*child.node, bind_state, new_reorder_idx); + } return; } - GatherAliases(*setop.left, bind_state, reorder_idx); - GatherAliases(*setop.right, bind_state, reorder_idx); + for (auto &child : setop.bound_children) { + GatherAliases(*child.node, bind_state, reorder_idx); + } } else { // query node D_ASSERT(node.type == QueryNodeType::SELECT_NODE); auto &select = node.Cast(); // fill the alias lists with the names + D_ASSERT(reorder_idx.size() == select.names.size()); for (idx_t i = 0; i < select.names.size(); i++) { auto &name = select.names[i]; // first check if the alias is already in there @@ -56,7 +64,7 @@ static void GatherAliases(BoundQueryNode &node, SelectBindState &bind_state, con bind_state.alias_map[name] = index; } } - // check if the expression matches one of the expressions in the original expression liset + // check if the expression matches one of the expressions in the original expression list for (idx_t i = 0; i < select.bind_state.original_expressions.size(); i++) { auto &expr = select.bind_state.original_expressions[i]; idx_t index = reorder_idx[i]; @@ -76,112 +84,106 @@ static void GatherAliases(BoundQueryNode &node, SelectBindState &bind_state, con } } +static void GatherAliases(BoundQueryNode &node, SelectBindState &bind_state) { + vector reorder_idx; + for (idx_t i = 0; i < node.names.size(); i++) { + reorder_idx.push_back(i); + } + GatherAliases(node, bind_state, reorder_idx); +} + static void BuildUnionByNameInfo(ClientContext &context, BoundSetOperationNode &result, bool can_contain_nulls) { D_ASSERT(result.setop_type == SetOperationType::UNION_BY_NAME); - case_insensitive_map_t left_names_map; - case_insensitive_map_t right_names_map; - - auto &left_node = *result.left; - auto &right_node = *result.right; + vector> node_name_maps; + case_insensitive_set_t global_name_set; // Build a name_map to use to check if a name exists // We throw a binder exception if two same name in the SELECT list - for (idx_t i = 0; i < left_node.names.size(); ++i) { - if (left_names_map.find(left_node.names[i]) != left_names_map.end()) { - throw BinderException("UNION (ALL) BY NAME operation doesn't support duplicate names in the SELECT list - " - "the name \"%s\" occurs multiple times in the left-hand side", - left_node.names[i]); - } - left_names_map[left_node.names[i]] = i; - } - - for (idx_t i = 0; i < right_node.names.size(); ++i) { - if (right_names_map.find(right_node.names[i]) != right_names_map.end()) { - throw BinderException("UNION (ALL) BY NAME operation doesn't support duplicate names in the SELECT list - " - "the name \"%s\" occurs multiple times in the right-hand side", - right_node.names[i]); - } - if (left_names_map.find(right_node.names[i]) == left_names_map.end()) { - result.names.push_back(right_node.names[i]); + D_ASSERT(result.names.empty()); + for (auto &child : result.bound_children) { + auto &child_node = *child.node; + case_insensitive_map_t node_name_map; + for (idx_t i = 0; i < child_node.names.size(); ++i) { + auto &col_name = child_node.names[i]; + if (node_name_map.find(col_name) != node_name_map.end()) { + throw BinderException( + "UNION (ALL) BY NAME operation doesn't support duplicate names in the SELECT list - " + "the name \"%s\" occurs multiple times", + col_name); + } + if (global_name_set.find(col_name) == global_name_set.end()) { + // column is not yet present in the result + result.names.push_back(col_name); + global_name_set.insert(col_name); + } + node_name_map[col_name] = i; } - right_names_map[right_node.names[i]] = i; + node_name_maps.push_back(std::move(node_name_map)); } idx_t new_size = result.names.size(); bool need_reorder = false; - vector left_reorder_idx(left_node.names.size()); - vector right_reorder_idx(right_node.names.size()); - // Construct return type and reorder_idxs - // reorder_idxs is used to gather correct alias_map - // and expression_map in GatherAlias(...) + // construct the return type of each of the columns for (idx_t i = 0; i < new_size; ++i) { - auto left_index = left_names_map.find(result.names[i]); - auto right_index = right_names_map.find(result.names[i]); - bool left_exist = left_index != left_names_map.end(); - bool right_exist = right_index != right_names_map.end(); - LogicalType result_type; - if (left_exist && right_exist) { - result_type = LogicalType::ForceMaxLogicalType(left_node.types[left_index->second], - right_node.types[right_index->second]); - if (left_index->second != i || right_index->second != i) { + auto &col_name = result.names[i]; + LogicalType result_type(LogicalTypeId::INVALID); + for (idx_t child_idx = 0; child_idx < result.bound_children.size(); ++child_idx) { + auto &child = result.bound_children[child_idx]; + auto &child_name_map = node_name_maps[child_idx]; + // check if the column exists in this child node + auto entry = child_name_map.find(col_name); + if (entry == child_name_map.end()) { need_reorder = true; + } else { + auto col_idx_in_child = entry->second; + auto &child_col_type = child.node->types[col_idx_in_child]; + // the child exists in this node - compute the type + if (result_type.id() == LogicalTypeId::INVALID) { + result_type = child_col_type; + } else { + result_type = LogicalType::ForceMaxLogicalType(result_type, child_col_type); + } + if (i != col_idx_in_child) { + // the column exists - but the children are out-of-order, so we need to re-order anyway + need_reorder = true; + } } - left_reorder_idx[left_index->second] = i; - right_reorder_idx[right_index->second] = i; - } else if (left_exist) { - result_type = left_node.types[left_index->second]; - need_reorder = true; - left_reorder_idx[left_index->second] = i; - } else { - D_ASSERT(right_exist); - result_type = right_node.types[right_index->second]; - need_reorder = true; - right_reorder_idx[right_index->second] = i; } - + // compute the final type for each column if (!can_contain_nulls) { if (ExpressionBinder::ContainsNullType(result_type)) { result_type = ExpressionBinder::ExchangeNullType(result_type); } } - result.types.push_back(result_type); } - result.left_reorder_idx = std::move(left_reorder_idx); - result.right_reorder_idx = std::move(right_reorder_idx); - - // If reorder is required, collect reorder expressions for push projection - // into the two child nodes of union node - if (need_reorder) { - for (idx_t i = 0; i < new_size; ++i) { - auto left_index = left_names_map.find(result.names[i]); - auto right_index = right_names_map.find(result.names[i]); - bool left_exist = left_index != left_names_map.end(); - bool right_exist = right_index != right_names_map.end(); - unique_ptr left_reorder_expr; - unique_ptr right_reorder_expr; - if (left_exist && right_exist) { - left_reorder_expr = make_uniq( - left_node.types[left_index->second], ColumnBinding(left_node.GetRootIndex(), left_index->second)); - right_reorder_expr = - make_uniq(right_node.types[right_index->second], - ColumnBinding(right_node.GetRootIndex(), right_index->second)); - } else if (left_exist) { - left_reorder_expr = make_uniq( - left_node.types[left_index->second], ColumnBinding(left_node.GetRootIndex(), left_index->second)); - // create null value here - right_reorder_expr = make_uniq(Value(result.types[i])); + if (!need_reorder) { + // if all columns in the children of the set-operations are identical we don't need to re-order at all + // skip adding expressions entirely + return; + } + // If reorder is required, generate the expressions for each node + for (idx_t i = 0; i < new_size; ++i) { + auto &col_name = result.names[i]; + for (idx_t child_idx = 0; child_idx < result.bound_children.size(); ++child_idx) { + auto &child = result.bound_children[child_idx]; + auto &child_name_map = node_name_maps[child_idx]; + // check if the column exists in this child node + auto entry = child_name_map.find(col_name); + unique_ptr expr; + if (entry == child_name_map.end()) { + // the column does not exist - push a `NULL` + expr = make_uniq(Value(result.types[i])); } else { - D_ASSERT(right_exist); - left_reorder_expr = make_uniq(Value(result.types[i])); - right_reorder_expr = - make_uniq(right_node.types[right_index->second], - ColumnBinding(right_node.GetRootIndex(), right_index->second)); + // the column exists - reference it + auto col_idx_in_child = entry->second; + auto &child_col_type = child.node->types[col_idx_in_child]; + expr = make_uniq(child_col_type, + ColumnBinding(child.node->GetRootIndex(), col_idx_in_child)); } - result.left_reorder_exprs.push_back(std::move(left_reorder_expr)); - result.right_reorder_exprs.push_back(std::move(right_reorder_expr)); + child.reorder_expressions.push_back(std::move(expr)); } } } @@ -192,8 +194,9 @@ static void GatherSetOpBinders(BoundQueryNode &node, Binder &binder, vector(); - GatherSetOpBinders(*setop_node.left, *setop_node.left_binder, binders); - GatherSetOpBinders(*setop_node.right, *setop_node.right_binder, binders); + for (auto &child : setop_node.bound_children) { + GatherSetOpBinders(*child.node, *child.binder, binders); + } } unique_ptr Binder::BindNode(SetOperationNode &statement) { @@ -202,38 +205,49 @@ unique_ptr Binder::BindNode(SetOperationNode &statement) { result->setop_all = statement.setop_all; // first recursively visit the set operations - // both the left and right sides have an independent BindContext and Binder - D_ASSERT(statement.left); - D_ASSERT(statement.right); - + // all children have an independent BindContext and Binder result->setop_index = GenerateTableIndex(); - - result->left_binder = Binder::CreateBinder(context, this); - result->left_binder->can_contain_nulls = true; - result->left = result->left_binder->BindNode(*statement.left); - result->right_binder = Binder::CreateBinder(context, this); - result->right_binder->can_contain_nulls = true; - result->right = result->right_binder->BindNode(*statement.right); - - result->names = result->left->names; + if (statement.children.size() < 2) { + throw InternalException("Set Operations must have at least 2 children"); + } + if (statement.children.size() != 2 && statement.setop_type != SetOperationType::UNION && + statement.setop_type != SetOperationType::UNION_BY_NAME) { + throw InternalException("Set Operation type must have exactly 2 children - except for UNION/UNION_BY_NAME"); + } + for (auto &child : statement.children) { + BoundSetOpChild bound_child; + bound_child.binder = Binder::CreateBinder(context, this); + bound_child.binder->can_contain_nulls = true; + bound_child.node = bound_child.binder->BindNode(*child); + result->bound_children.push_back(std::move(bound_child)); + } // move the correlated expressions from the child binders to this binder - MoveCorrelatedExpressions(*result->left_binder); - MoveCorrelatedExpressions(*result->right_binder); - - // now both sides have been bound we can resolve types - if (result->setop_type != SetOperationType::UNION_BY_NAME && - result->left->types.size() != result->right->types.size()) { - throw BinderException("Set operations can only apply to expressions with the " - "same number of result columns"); + for (auto &bound_child : result->bound_children) { + MoveCorrelatedExpressions(*bound_child.binder); } if (result->setop_type == SetOperationType::UNION_BY_NAME) { + // UNION BY NAME - merge the columns from all sides BuildUnionByNameInfo(context, *result, can_contain_nulls); } else { + // UNION ALL BY POSITION - the columns of both sides must match exactly + result->names = result->bound_children[0].node->names; + auto result_columns = result->bound_children[0].node->types.size(); + for (idx_t i = 1; i < result->bound_children.size(); ++i) { + if (result->bound_children[i].node->types.size() != result_columns) { + throw BinderException("Set operations can only apply to expressions with the " + "same number of result columns"); + } + } + // figure out the types of the setop result by picking the max of both - for (idx_t i = 0; i < result->left->types.size(); i++) { - auto result_type = LogicalType::ForceMaxLogicalType(result->left->types[i], result->right->types[i]); + for (idx_t i = 0; i < result_columns; i++) { + auto result_type = result->bound_children[0].node->types[i]; + for (idx_t child_idx = 1; child_idx < result->bound_children.size(); ++child_idx) { + auto &child_node = *result->bound_children[child_idx].node; + result_type = LogicalType::ForceMaxLogicalType(result_type, child_node.types[i]); + } if (!can_contain_nulls) { if (ExpressionBinder::ContainsNullType(result_type)) { result_type = ExpressionBinder::ExchangeNullType(result_type); @@ -248,22 +262,14 @@ unique_ptr Binder::BindNode(SetOperationNode &statement) { // handle the ORDER BY/DISTINCT clauses // we recursively visit the children of this node to extract aliases and expressions that can be referenced - // in the ORDER BY + // in the ORDER BYs + GatherAliases(*result, bind_state); - if (result->setop_type == SetOperationType::UNION_BY_NAME) { - GatherAliases(*result->left, bind_state, result->left_reorder_idx); - GatherAliases(*result->right, bind_state, result->right_reorder_idx); - } else { - vector reorder_idx; - for (idx_t i = 0; i < result->names.size(); i++) { - reorder_idx.push_back(i); - } - GatherAliases(*result, bind_state, reorder_idx); - } // now we perform the actual resolution of the ORDER BY/DISTINCT expressions vector> binders; - GatherSetOpBinders(*result->left, *result->left_binder, binders); - GatherSetOpBinders(*result->right, *result->right_binder, binders); + for (auto &child : result->bound_children) { + GatherSetOpBinders(*child.node, *child.binder, binders); + } OrderBinder order_binder(binders, bind_state); PrepareModifiers(order_binder, statement, *result); } diff --git a/src/duckdb/src/planner/binder/query_node/plan_setop.cpp b/src/duckdb/src/planner/binder/query_node/plan_setop.cpp index ba929bcd8..9b0fa7c94 100644 --- a/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +++ b/src/duckdb/src/planner/binder/query_node/plan_setop.cpp @@ -93,45 +93,6 @@ unique_ptr Binder::CastLogicalOperatorToTypes(vector Binder::CreatePlan(BoundSetOperationNode &node) { - // Generate the logical plan for the left and right sides of the set operation - node.left_binder->is_outside_flattened = is_outside_flattened; - node.right_binder->is_outside_flattened = is_outside_flattened; - - auto left_node = node.left_binder->CreatePlan(*node.left); - auto right_node = node.right_binder->CreatePlan(*node.right); - - // Add a new projection to child node - D_ASSERT(node.left_reorder_exprs.size() == node.right_reorder_exprs.size()); - if (!node.left_reorder_exprs.empty()) { - D_ASSERT(node.setop_type == SetOperationType::UNION_BY_NAME); - vector left_types; - vector right_types; - // We are going to add a new projection operator, so collect the type - // of reorder exprs in order to call CastLogicalOperatorToTypes() - for (idx_t i = 0; i < node.left_reorder_exprs.size(); ++i) { - left_types.push_back(node.left_reorder_exprs[i]->return_type); - right_types.push_back(node.right_reorder_exprs[i]->return_type); - } - - auto left_projection = make_uniq(GenerateTableIndex(), std::move(node.left_reorder_exprs)); - left_projection->children.push_back(std::move(left_node)); - left_node = std::move(left_projection); - - auto right_projection = make_uniq(GenerateTableIndex(), std::move(node.right_reorder_exprs)); - right_projection->children.push_back(std::move(right_node)); - right_node = std::move(right_projection); - - left_node = CastLogicalOperatorToTypes(left_types, node.types, std::move(left_node)); - right_node = CastLogicalOperatorToTypes(right_types, node.types, std::move(right_node)); - } else { - left_node = CastLogicalOperatorToTypes(node.left->types, node.types, std::move(left_node)); - right_node = CastLogicalOperatorToTypes(node.right->types, node.types, std::move(right_node)); - } - - // check if there are any unplanned subqueries left in either child - has_unplanned_dependent_joins = has_unplanned_dependent_joins || node.left_binder->has_unplanned_dependent_joins || - node.right_binder->has_unplanned_dependent_joins; - // create actual logical ops for setops LogicalOperatorType logical_type = LogicalOperatorType::LOGICAL_INVALID; switch (node.setop_type) { @@ -146,13 +107,41 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { logical_type = LogicalOperatorType::LOGICAL_INTERSECT; break; default: - D_ASSERT(false); - break; + throw InternalException("Unsupported logical operator type for set-operation"); } + // Generate the logical plan for the children of the set operation + + D_ASSERT(node.bound_children.size() >= 2); + vector> children; + for (auto &child : node.bound_children) { + child.binder->is_outside_flattened = is_outside_flattened; - auto root = make_uniq(node.setop_index, node.types.size(), std::move(left_node), - std::move(right_node), logical_type, node.setop_all); + // construct the logical plan for the child node + auto child_node = child.binder->CreatePlan(*child.node); + if (!child.reorder_expressions.empty()) { + // if we have re-order expressions push a projection + vector child_types; + for (auto &expr : child.reorder_expressions) { + child_types.push_back(expr->return_type); + } + auto child_projection = + make_uniq(GenerateTableIndex(), std::move(child.reorder_expressions)); + child_projection->children.push_back(std::move(child_node)); + child_node = std::move(child_projection); + child_node = CastLogicalOperatorToTypes(child_types, node.types, std::move(child_node)); + } else { + // otherwise push only casts + child_node = CastLogicalOperatorToTypes(child.node->types, node.types, std::move(child_node)); + } + // check if there are any unplanned subqueries left in any child + if (child.binder->has_unplanned_dependent_joins) { + has_unplanned_dependent_joins = true; + } + children.push_back(std::move(child_node)); + } + auto root = make_uniq(node.setop_index, node.types.size(), std::move(children), logical_type, + node.setop_all); return VisitQueryNode(node, std::move(root)); } diff --git a/src/duckdb/src/planner/binder/statement/bind_export.cpp b/src/duckdb/src/planner/binder/statement/bind_export.cpp index ee162d2d8..20d2606fe 100644 --- a/src/duckdb/src/planner/binder/statement/bind_export.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_export.cpp @@ -140,21 +140,11 @@ unique_ptr Binder::UnionOperators(vector 1) { - vector> new_nodes; - for (idx_t i = 0; i < nodes.size(); i += 2) { - if (i + 1 == nodes.size()) { - new_nodes.push_back(std::move(nodes[i])); - } else { - auto copy_union = make_uniq(GenerateTableIndex(), 1U, std::move(nodes[i]), - std::move(nodes[i + 1]), - LogicalOperatorType::LOGICAL_UNION, true, false); - new_nodes.push_back(std::move(copy_union)); - } - } - nodes = std::move(new_nodes); + if (nodes.size() == 1) { + return std::move(nodes[0]); } - return std::move(nodes[0]); + return make_uniq(GenerateTableIndex(), 1U, std::move(nodes), + LogicalOperatorType::LOGICAL_UNION, true, false); } BoundStatement Binder::Bind(ExportStatement &stmt) { diff --git a/src/duckdb/src/planner/binder/statement/bind_prepare.cpp b/src/duckdb/src/planner/binder/statement/bind_prepare.cpp index 74062e413..cbb338dfc 100644 --- a/src/duckdb/src/planner/binder/statement/bind_prepare.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_prepare.cpp @@ -10,6 +10,11 @@ BoundStatement Binder::Bind(PrepareStatement &stmt) { auto prepared_data = prepared_planner.PrepareSQLStatement(std::move(stmt.statement)); this->bound_tables = prepared_planner.binder->bound_tables; + if (prepared_planner.properties.always_require_rebind) { + // we always need to rebind - don't keep the plan around + prepared_planner.plan.reset(); + } + auto prepare = make_uniq(stmt.name, std::move(prepared_data), std::move(prepared_planner.plan)); // we can always prepare, even if the transaction has been invalidated // this is required because most clients ALWAYS invoke prepared statements diff --git a/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp b/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp index e22a7b784..c9c50000b 100644 --- a/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +++ b/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp @@ -123,81 +123,60 @@ unique_ptr Binder::Bind(BaseTableRef &ref) { // CTE name should never be qualified (i.e. schema_name should be empty) // unless we want to refer to the recurring table of "using key". - vector> found_ctes; + vector> found_ctes; if (ref.schema_name.empty() || ref.schema_name == "recurring") { - found_ctes = FindCTE(ref.table_name, ref.table_name == alias); + found_ctes = FindCTE(ref.table_name, false); } if (!found_ctes.empty()) { // Check if there is a CTE binding in the BindContext - bool circular_cte = false; - for (auto found_cte : found_ctes) { - auto &cte = found_cte.get(); - auto ctebinding = bind_context.GetCTEBinding(ref.table_name); - if (ctebinding) { - // There is a CTE binding in the BindContext. - // This can only be the case if there is a recursive CTE, - // or a materialized CTE present. - auto index = GenerateTableIndex(); - auto materialized = cte.materialized; - - if (ref.schema_name == "recurring" && cte.key_targets.empty()) { - throw InvalidInputException("RECURRING can only be used with USING KEY in recursive CTE."); + auto ctebinding = bind_context.GetCTEBinding(ref.table_name); + if (ctebinding) { + // There is a CTE binding in the BindContext. + // This can only be the case if there is a recursive CTE, + // or a materialized CTE present. + auto index = GenerateTableIndex(); + + if (ref.schema_name == "recurring") { + auto recurring_bindings = FindCTE("recurring." + ref.table_name, false); + if (recurring_bindings.empty()) { + throw BinderException(error_context, + "There is a WITH item named \"%s\", but the recurring table cannot be " + "referenced from this part of the query." + " Hint: RECURRING can only be used with USING KEY in recursive CTE.", + ref.table_name); } + } - auto result = - make_uniq(index, ctebinding->index, materialized, ref.schema_name == "recurring"); - auto alias = ref.alias.empty() ? ref.table_name : ref.alias; - auto names = BindContext::AliasColumnNames(alias, ctebinding->names, ref.column_name_alias); - - bind_context.AddGenericBinding(index, alias, names, ctebinding->types); + auto result = make_uniq(index, ctebinding->index, ref.schema_name == "recurring"); + auto alias = ref.alias.empty() ? ref.table_name : ref.alias; + auto names = BindContext::AliasColumnNames(alias, ctebinding->names, ref.column_name_alias); - auto cte_reference = ref.schema_name.empty() ? ref.table_name : ref.schema_name + "." + ref.table_name; + bind_context.AddGenericBinding(index, alias, names, ctebinding->types); - // Update references to CTE - auto cteref = bind_context.cte_references[cte_reference]; + auto cte_reference = ref.schema_name.empty() ? ref.table_name : ref.schema_name + "." + ref.table_name; - if (cteref == nullptr && ref.schema_name == "recurring") { - throw BinderException("There is a WITH item named \"%s\", but the recurring table cannot be " - "referenced from this part of the query.", - ref.table_name); - } + // Update references to CTE + auto cteref = bind_context.cte_references[cte_reference]; - (*cteref)++; - - result->types = ctebinding->types; - result->bound_columns = std::move(names); - return std::move(result); - } else { - if (CTEIsAlreadyBound(cte)) { - // remember error state - circular_cte = true; - // retry with next candidate CTE - continue; - } + if (cteref == nullptr && ref.schema_name == "recurring") { + throw BinderException(error_context, + "There is a WITH item named \"%s\", but the recurring table cannot be " + "referenced from this part of the query.", + ref.table_name); + } - // If we have found a materialized CTE, but no corresponding CTE binding, - // something is wrong. - if (cte.materialized == CTEMaterialize::CTE_MATERIALIZE_ALWAYS) { - throw BinderException( - "There is a WITH item named \"%s\", but it cannot be referenced from this part of the query.", - ref.table_name); - } + (*cteref)++; - if (ref.schema_name == "recurring") { - throw BinderException("There is a WITH item named \"%s\", but the recurring table cannot be " - "referenced from this part of the query.", - ref.table_name); - } - } + result->types = ctebinding->types; + result->bound_columns = std::move(names); + return std::move(result); } - if (circular_cte) { - auto replacement_scan_bind_result = BindWithReplacementScan(context, ref); - if (replacement_scan_bind_result) { - return replacement_scan_bind_result; - } - + } else { + // remember that we did not find a CTE + if (ref.schema_name.empty() && CTEExists(ref.table_name)) { throw BinderException( + error_context, "Circular reference to CTE \"%s\", There are two possible solutions. \n1. use WITH RECURSIVE to " "use recursive CTEs. \n2. If " "you want to use the TABLE name \"%s\" the same as the CTE name, please explicitly add " @@ -205,6 +184,7 @@ unique_ptr Binder::Bind(BaseTableRef &ref) { ref.table_name, ref.table_name, ref.table_name); } } + // not a CTE // extract a table or view from the catalog auto at_clause = BindAtClause(ref.at_clause); @@ -343,7 +323,6 @@ unique_ptr Binder::Bind(BaseTableRef &ref) { while (!materialized_ctes.empty()) { unique_ptr node_result; node_result = std::move(materialized_ctes.back()); - node_result->cte_map = root->cte_map.Copy(); node_result->child = std::move(root); root = std::move(node_result); materialized_ctes.pop_back(); diff --git a/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp b/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp index 5acbd06b5..9eed0ea61 100644 --- a/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +++ b/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp @@ -4,12 +4,9 @@ namespace duckdb { -unique_ptr Binder::Bind(SubqueryRef &ref, optional_ptr cte) { +unique_ptr Binder::Bind(SubqueryRef &ref) { auto binder = Binder::CreateBinder(context, this); binder->can_contain_nulls = true; - if (cte) { - binder->bound_ctes.insert(*cte); - } auto subquery = binder->BindNode(*ref.subquery->node); binder->alias = ref.alias.empty() ? "unnamed_subquery" : ref.alias; idx_t bind_index = subquery->GetRootIndex(); diff --git a/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp b/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp index 8bf1934fc..4ee2b9a76 100644 --- a/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +++ b/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp @@ -5,8 +5,7 @@ namespace duckdb { unique_ptr Binder::CreatePlan(BoundCTERef &ref) { - return make_uniq(ref.bind_index, ref.cte_index, ref.types, ref.bound_columns, ref.materialized_cte, - ref.is_recurring); + return make_uniq(ref.bind_index, ref.cte_index, ref.types, ref.bound_columns, ref.is_recurring); } } // namespace duckdb diff --git a/src/duckdb/src/planner/expression_iterator.cpp b/src/duckdb/src/planner/expression_iterator.cpp index 8f3140e17..042712732 100644 --- a/src/duckdb/src/planner/expression_iterator.cpp +++ b/src/duckdb/src/planner/expression_iterator.cpp @@ -195,8 +195,9 @@ void BoundNodeVisitor::VisitBoundQueryNode(BoundQueryNode &node) { switch (node.type) { case QueryNodeType::SET_OPERATION_NODE: { auto &bound_setop = node.Cast(); - VisitBoundQueryNode(*bound_setop.left); - VisitBoundQueryNode(*bound_setop.right); + for (auto &child : bound_setop.bound_children) { + VisitBoundQueryNode(*child.node); + } break; } case QueryNodeType::RECURSIVE_CTE_NODE: { diff --git a/src/duckdb/src/planner/operator/logical_set_operation.cpp b/src/duckdb/src/planner/operator/logical_set_operation.cpp index 72da2e053..a6eeaed7f 100644 --- a/src/duckdb/src/planner/operator/logical_set_operation.cpp +++ b/src/duckdb/src/planner/operator/logical_set_operation.cpp @@ -4,6 +4,33 @@ namespace duckdb { +LogicalSetOperation::LogicalSetOperation(idx_t table_index, idx_t column_count, LogicalOperatorType type, + bool setop_all, bool allow_out_of_order) + : LogicalOperator(type), table_index(table_index), column_count(column_count), setop_all(setop_all), + allow_out_of_order(allow_out_of_order) { +} + +LogicalSetOperation::LogicalSetOperation(idx_t table_index, idx_t column_count, + vector> children_p, LogicalOperatorType type, + bool setop_all, bool allow_out_of_order) + : LogicalOperator(type), table_index(table_index), column_count(column_count), setop_all(setop_all), + allow_out_of_order(allow_out_of_order) { + D_ASSERT(type == LogicalOperatorType::LOGICAL_UNION || type == LogicalOperatorType::LOGICAL_EXCEPT || + type == LogicalOperatorType::LOGICAL_INTERSECT); + children = std::move(children_p); +} + +LogicalSetOperation::LogicalSetOperation(idx_t table_index, idx_t column_count, unique_ptr top, + unique_ptr bottom, LogicalOperatorType type, bool setop_all, + bool allow_out_of_order) + : LogicalOperator(type), table_index(table_index), column_count(column_count), setop_all(setop_all), + allow_out_of_order(allow_out_of_order) { + D_ASSERT(type == LogicalOperatorType::LOGICAL_UNION || type == LogicalOperatorType::LOGICAL_EXCEPT || + type == LogicalOperatorType::LOGICAL_INTERSECT); + children.push_back(std::move(top)); + children.push_back(std::move(bottom)); +} + vector LogicalSetOperation::GetTableIndex() const { return vector {table_index}; } diff --git a/src/duckdb/src/planner/planner.cpp b/src/duckdb/src/planner/planner.cpp index 9f1cafa4d..78bca8a02 100644 --- a/src/duckdb/src/planner/planner.cpp +++ b/src/duckdb/src/planner/planner.cpp @@ -48,10 +48,6 @@ void Planner::CreatePlan(SQLStatement &statement) { this->names = bound_statement.names; this->types = bound_statement.types; this->plan = std::move(bound_statement.plan); - auto max_tree_depth = ClientConfig::GetConfig(context).max_expression_depth; - CheckTreeDepth(*plan, max_tree_depth); - - this->plan = FlattenDependentJoins::DecorrelateIndependent(*binder, std::move(this->plan)); } catch (const std::exception &ex) { ErrorData error(ex); this->plan = nullptr; @@ -80,6 +76,12 @@ void Planner::CreatePlan(SQLStatement &statement) { throw; } } + if (this->plan) { + auto max_tree_depth = ClientConfig::GetConfig(context).max_expression_depth; + CheckTreeDepth(*plan, max_tree_depth); + + this->plan = FlattenDependentJoins::DecorrelateIndependent(*this->binder, std::move(this->plan)); + } this->properties = binder->GetStatementProperties(); this->properties.parameter_count = parameter_count; properties.bound_all_parameters = !bound_parameters.rebind && parameters_resolved; diff --git a/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp b/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp index 0b0ddc672..7b2909c6d 100644 --- a/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +++ b/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp @@ -890,12 +890,16 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal auto &setop = plan->Cast(); // set operator, push into both children #ifdef DEBUG - plan->children[0]->ResolveOperatorTypes(); - plan->children[1]->ResolveOperatorTypes(); - D_ASSERT(plan->children[0]->types == plan->children[1]->types); + for (auto &child : plan->children) { + child->ResolveOperatorTypes(); + } + for (idx_t i = 1; i < plan->children.size(); i++) { + D_ASSERT(plan->children[0]->types.size() == plan->children[i]->types.size()); + } #endif - plan->children[0] = PushDownDependentJoin(std::move(plan->children[0])); - plan->children[1] = PushDownDependentJoin(std::move(plan->children[1])); + for (auto &child : plan->children) { + child = PushDownDependentJoin(std::move(child)); + } for (idx_t i = 0; i < plan->children.size(); i++) { if (plan->children[i]->type == LogicalOperatorType::LOGICAL_CROSS_PRODUCT) { auto proj_index = binder.GenerateTableIndex(); @@ -920,10 +924,15 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal // here we need to check the children. If they have reorderable bindings, you need to plan a projection // on top that will guarantee the order of the bindings. #ifdef DEBUG - D_ASSERT(plan->children[0]->GetColumnBindings().size() == plan->children[1]->GetColumnBindings().size()); - plan->children[0]->ResolveOperatorTypes(); - plan->children[1]->ResolveOperatorTypes(); - D_ASSERT(plan->children[0]->types == plan->children[1]->types); + for (idx_t i = 1; i < plan->children.size(); i++) { + D_ASSERT(plan->children[0]->GetColumnBindings().size() == plan->children[i]->GetColumnBindings().size()); + } + for (auto &child : plan->children) { + child->ResolveOperatorTypes(); + } + for (idx_t i = 1; i < plan->children.size(); i++) { + D_ASSERT(plan->children[0]->types.size() == plan->children[i]->types.size()); + } #endif // we have to refer to the setop index now base_binding.table_index = setop.table_index; diff --git a/src/duckdb/src/storage/checkpoint_manager.cpp b/src/duckdb/src/storage/checkpoint_manager.cpp index 8618b38eb..af361d4bf 100644 --- a/src/duckdb/src/storage/checkpoint_manager.cpp +++ b/src/duckdb/src/storage/checkpoint_manager.cpp @@ -279,7 +279,7 @@ void SingleFileCheckpointReader::LoadFromStorage() { return; } - if (block_manager.IsRemote()) { + if (block_manager.Prefetch()) { auto metadata_blocks = metadata_manager.GetBlocks(); auto &buffer_manager = BufferManager::GetBufferManager(storage.GetDatabase()); buffer_manager.Prefetch(metadata_blocks); diff --git a/src/duckdb/src/storage/data_table.cpp b/src/duckdb/src/storage/data_table.cpp index 2b29b2820..7d19449bb 100644 --- a/src/duckdb/src/storage/data_table.cpp +++ b/src/duckdb/src/storage/data_table.cpp @@ -905,12 +905,14 @@ void DataTable::FinalizeLocalAppend(LocalAppendState &state) { LocalStorage::FinalizeAppend(state); } -PhysicalIndex DataTable::CreateOptimisticCollection(ClientContext &context, unique_ptr collection) { +PhysicalIndex DataTable::CreateOptimisticCollection(ClientContext &context, + unique_ptr collection) { auto &local_storage = LocalStorage::Get(context, db); return local_storage.CreateOptimisticCollection(*this, std::move(collection)); } -RowGroupCollection &DataTable::GetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index) { +OptimisticWriteCollection &DataTable::GetOptimisticCollection(ClientContext &context, + const PhysicalIndex collection_index) { auto &local_storage = LocalStorage::Get(context, db); return local_storage.GetOptimisticCollection(*this, collection_index); } @@ -925,7 +927,7 @@ OptimisticDataWriter &DataTable::GetOptimisticWriter(ClientContext &context) { return local_storage.GetOptimisticWriter(*this); } -void DataTable::LocalMerge(ClientContext &context, RowGroupCollection &collection) { +void DataTable::LocalMerge(ClientContext &context, OptimisticWriteCollection &collection) { auto &local_storage = LocalStorage::Get(context, db); local_storage.LocalMerge(*this, collection); } @@ -1620,6 +1622,10 @@ void DataTable::CommitDropColumn(const idx_t column_index) { row_groups->CommitDropColumn(column_index); } +void DataTable::Destroy() { + row_groups->Destroy(); +} + idx_t DataTable::ColumnCount() const { return column_definitions.size(); } diff --git a/src/duckdb/src/storage/local_storage.cpp b/src/duckdb/src/storage/local_storage.cpp index b89183e26..e3cbb8f3b 100644 --- a/src/duckdb/src/storage/local_storage.cpp +++ b/src/duckdb/src/storage/local_storage.cpp @@ -21,9 +21,9 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &table) auto types = table.GetTypes(); auto data_table_info = table.GetDataTableInfo(); - auto &io_manager = TableIOManager::Get(table); - row_groups = make_shared_ptr(data_table_info, io_manager, types, MAX_ROW_ID, 0); - row_groups->InitializeEmpty(); + row_groups = OptimisticDataWriter::CreateCollection(table, types); + auto &collection = *row_groups->collection; + collection.InitializeEmpty(); data_table_info->GetIndexes().Scan([&](Index &index) { auto constraint = index.GetConstraintType(); @@ -68,9 +68,12 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_data optimistic_writer(new_data_table, parent.optimistic_writer), merged_storage(parent.merged_storage) { // Alter the column type. - row_groups = parent.row_groups->AlterType(context, alter_column_index, target_type, bound_columns, cast_expr); - parent.row_groups->CommitDropColumn(alter_column_index); - parent.row_groups.reset(); + auto &parent_collection = *parent.row_groups->collection; + auto new_collection = + parent_collection.AlterType(context, alter_column_index, target_type, bound_columns, cast_expr); + parent_collection.CommitDropColumn(alter_column_index); + row_groups = std::move(parent.row_groups); + row_groups->collection = std::move(new_collection); append_indexes.Move(parent.append_indexes); } @@ -82,9 +85,11 @@ LocalTableStorage::LocalTableStorage(DataTable &new_data_table, LocalTableStorag optimistic_writer(new_data_table, parent.optimistic_writer), merged_storage(parent.merged_storage) { // Remove the column from the previous table storage. - row_groups = parent.row_groups->RemoveColumn(drop_column_index); - parent.row_groups->CommitDropColumn(drop_column_index); - parent.row_groups.reset(); + auto &parent_collection = *parent.row_groups->collection; + auto new_collection = parent_collection.RemoveColumn(drop_column_index); + parent_collection.CommitDropColumn(drop_column_index); + row_groups = std::move(parent.row_groups); + row_groups->collection = std::move(new_collection); append_indexes.Move(parent.append_indexes); } @@ -95,8 +100,10 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_dt, optimistic_collections(std::move(parent.optimistic_collections)), optimistic_writer(new_dt, parent.optimistic_writer), merged_storage(parent.merged_storage) { - row_groups = parent.row_groups->AddColumn(context, new_column, default_executor); - parent.row_groups.reset(); + auto &parent_collection = *parent.row_groups->collection; + auto new_collection = parent_collection.AddColumn(context, new_column, default_executor); + row_groups = std::move(parent.row_groups); + row_groups->collection = std::move(new_collection); append_indexes.Move(parent.append_indexes); } @@ -104,19 +111,21 @@ LocalTableStorage::~LocalTableStorage() { } void LocalTableStorage::InitializeScan(CollectionScanState &state, optional_ptr table_filters) { - if (row_groups->GetTotalRows() == 0) { + auto &collection = *row_groups->collection; + if (collection.GetTotalRows() == 0) { throw InternalException("No rows in LocalTableStorage row group for scan"); } - row_groups->InitializeScan(state, state.GetColumnIds(), table_filters.get()); + collection.InitializeScan(state, state.GetColumnIds(), table_filters.get()); } idx_t LocalTableStorage::EstimatedSize() { // count the appended rows - idx_t appended_rows = row_groups->GetTotalRows() - deleted_rows; + auto &collection = *row_groups->collection; + idx_t appended_rows = collection.GetTotalRows() - deleted_rows; // get the (estimated) size of a row (no compressions, etc.) idx_t row_size = 0; - auto &types = row_groups->GetTypes(); + auto &types = collection.GetTypes(); for (auto &type : types) { row_size += GetTypeIdSize(type.InternalType()); } @@ -144,8 +153,9 @@ void LocalTableStorage::WriteNewRowGroup() { } void LocalTableStorage::FlushBlocks() { - const idx_t row_group_size = row_groups->GetRowGroupSize(); - if (!merged_storage && row_groups->GetTotalRows() > row_group_size) { + auto &collection = *row_groups->collection; + const idx_t row_group_size = collection.GetRowGroupSize(); + if (!merged_storage && collection.GetTotalRows() > row_group_size) { optimistic_writer.WriteLastRowGroup(*row_groups); } optimistic_writer.FinalFlush(); @@ -205,16 +215,17 @@ void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppen auto &index_list = data_table_info->GetIndexes(); ErrorData error; + auto &collection = *row_groups->collection; if (append_to_table) { // Appending to the table: we need to scan the entire chunk. DataChunk index_chunk; vector mapped_column_ids; if (table.HasIndexes() && index_list.HasUnbound()) { - TableIndexList::InitializeIndexChunk(index_chunk, row_groups->GetTypes(), mapped_column_ids, + TableIndexList::InitializeIndexChunk(index_chunk, collection.GetTypes(), mapped_column_ids, *data_table_info); } - row_groups->Scan(transaction, [&](DataChunk &table_chunk) -> bool { + collection.Scan(transaction, [&](DataChunk &table_chunk) -> bool { if (table.HasIndexes()) { if (index_list.HasUnbound()) { // The index chunk references all indexed columns. @@ -238,14 +249,14 @@ void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppen } else { // We only append to the indexes. - error = AppendToIndexes(transaction, *row_groups, index_list, table.GetTypes(), append_state.current_row); + error = AppendToIndexes(transaction, collection, index_list, table.GetTypes(), append_state.current_row); } if (error.HasError()) { // Revert all appended row IDs. row_t current_row = append_state.row_start; // Remove the data from the indexes, if any. - row_groups->Scan(transaction, [&](DataChunk &chunk) -> bool { + collection.Scan(transaction, [&](DataChunk &chunk) -> bool { // Remove the chunk. try { table.RemoveFromIndexes(append_state, chunk, current_row); @@ -278,13 +289,13 @@ void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppen } } -PhysicalIndex LocalTableStorage::CreateOptimisticCollection(unique_ptr collection) { +PhysicalIndex LocalTableStorage::CreateOptimisticCollection(unique_ptr collection) { lock_guard l(collections_lock); optimistic_collections.push_back(std::move(collection)); return PhysicalIndex(optimistic_collections.size() - 1); } -RowGroupCollection &LocalTableStorage::GetOptimisticCollection(const PhysicalIndex collection_index) { +OptimisticWriteCollection &LocalTableStorage::GetOptimisticCollection(const PhysicalIndex collection_index) { lock_guard l(collections_lock); auto &collection = optimistic_collections[collection_index.index]; return *collection; @@ -306,10 +317,10 @@ void LocalTableStorage::Rollback() { if (!collection) { continue; } - collection->CommitDropTable(); + collection->collection->CommitDropTable(); } optimistic_collections.clear(); - row_groups->CommitDropTable(); + row_groups->collection->CommitDropTable(); } //===--------------------------------------------------------------------===// @@ -398,7 +409,7 @@ LocalStorage &LocalStorage::Get(ClientContext &context, Catalog &catalog) { void LocalStorage::InitializeScan(DataTable &table, CollectionScanState &state, optional_ptr table_filters) { auto storage = table_manager.GetStorage(table); - if (storage == nullptr || storage->row_groups->GetTotalRows() == 0) { + if (storage == nullptr || storage->GetCollection().GetTotalRows() == 0) { return; } storage->InitializeScan(state, table_filters); @@ -415,22 +426,26 @@ void LocalStorage::InitializeParallelScan(DataTable &table, ParallelCollectionSc state.vector_index = 0; state.current_row_group = nullptr; } else { - storage->row_groups->InitializeParallelScan(state); + storage->GetCollection().InitializeParallelScan(state); } } +RowGroupCollection &LocalTableStorage::GetCollection() { + return *row_groups->collection; +} + bool LocalStorage::NextParallelScan(ClientContext &context, DataTable &table, ParallelCollectionScanState &state, CollectionScanState &scan_state) { auto storage = table_manager.GetStorage(table); if (!storage) { return false; } - return storage->row_groups->NextParallelScan(context, state, scan_state); + return storage->GetCollection().NextParallelScan(context, state, scan_state); } void LocalStorage::InitializeAppend(LocalAppendState &state, DataTable &table) { state.storage = &table_manager.GetOrCreateStorage(context, table); - state.storage->row_groups->InitializeAppend(TransactionData(transaction), state.append_state); + state.storage->GetCollection().InitializeAppend(TransactionData(transaction), state.append_state); } void LocalStorage::InitializeStorage(LocalAppendState &state, DataTable &table) { @@ -459,7 +474,7 @@ void LocalTableStorage::AppendToDeleteIndexes(Vector &row_ids, DataChunk &delete void LocalStorage::Append(LocalAppendState &state, DataChunk &table_chunk, DataTableInfo &data_table_info) { // Append to any unique indexes. auto storage = state.storage; - auto offset = NumericCast(MAX_ROW_ID) + storage->row_groups->GetTotalRows(); + auto offset = NumericCast(MAX_ROW_ID) + storage->GetCollection().GetTotalRows(); idx_t base_id = offset + state.append_state.total_append_count; if (!storage->append_indexes.Empty()) { @@ -482,7 +497,7 @@ void LocalStorage::Append(LocalAppendState &state, DataChunk &table_chunk, DataT } // Append the chunk to the local storage. - auto new_row_group = storage->row_groups->Append(table_chunk, state.append_state); + auto new_row_group = storage->GetCollection().Append(table_chunk, state.append_state); // Check if we should pre-emptively flush blocks to disk. if (new_row_group) { @@ -491,30 +506,32 @@ void LocalStorage::Append(LocalAppendState &state, DataChunk &table_chunk, DataT } void LocalStorage::FinalizeAppend(LocalAppendState &state) { - state.storage->row_groups->FinalizeAppend(state.append_state.transaction, state.append_state); + state.storage->GetCollection().FinalizeAppend(state.append_state.transaction, state.append_state); } -void LocalStorage::LocalMerge(DataTable &table, RowGroupCollection &collection) { +void LocalStorage::LocalMerge(DataTable &table, OptimisticWriteCollection &collection) { auto &storage = table_manager.GetOrCreateStorage(context, table); if (!storage.append_indexes.Empty()) { // append data to indexes if required - row_t base_id = MAX_ROW_ID + NumericCast(storage.row_groups->GetTotalRows()); - auto error = - storage.AppendToIndexes(transaction, collection, storage.append_indexes, table.GetTypes(), base_id); + row_t base_id = MAX_ROW_ID + NumericCast(storage.GetCollection().GetTotalRows()); + auto error = storage.AppendToIndexes(transaction, *collection.collection, storage.append_indexes, + table.GetTypes(), base_id); if (error.HasError()) { error.Throw(); } } - storage.row_groups->MergeStorage(collection, nullptr, nullptr); + storage.GetCollection().MergeStorage(*collection.collection, nullptr, nullptr); storage.merged_storage = true; } -PhysicalIndex LocalStorage::CreateOptimisticCollection(DataTable &table, unique_ptr collection) { +PhysicalIndex LocalStorage::CreateOptimisticCollection(DataTable &table, + unique_ptr collection) { auto &storage = table_manager.GetOrCreateStorage(context, table); return storage.CreateOptimisticCollection(std::move(collection)); } -RowGroupCollection &LocalStorage::GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index) { +OptimisticWriteCollection &LocalStorage::GetOptimisticCollection(DataTable &table, + const PhysicalIndex collection_index) { auto &storage = table_manager.GetOrCreateStorage(context, table); return storage.GetOptimisticCollection(collection_index); } @@ -547,11 +564,11 @@ idx_t LocalStorage::Delete(DataTable &table, Vector &row_ids, idx_t count) { // delete from unique indices (if any) if (!storage->append_indexes.Empty()) { - storage->row_groups->RemoveFromIndexes(storage->append_indexes, row_ids, count); + storage->GetCollection().RemoveFromIndexes(storage->append_indexes, row_ids, count); } auto ids = FlatVector::GetData(row_ids); - idx_t delete_count = storage->row_groups->Delete(TransactionData(0, 0), table, ids, count); + idx_t delete_count = storage->GetCollection().Delete(TransactionData(0, 0), table, ids, count); storage->deleted_rows += delete_count; return delete_count; } @@ -563,27 +580,27 @@ void LocalStorage::Update(DataTable &table, Vector &row_ids, const vector(row_ids); - storage->row_groups->Update(TransactionData(0, 0), ids, column_ids, updates); + storage->GetCollection().Update(TransactionData(0, 0), ids, column_ids, updates); } void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage, optional_ptr commit_state) { if (storage.is_dropped) { return; } - if (storage.row_groups->GetTotalRows() <= storage.deleted_rows) { + if (storage.GetCollection().GetTotalRows() <= storage.deleted_rows) { // all rows that we added were deleted // rollback any partial blocks that are still outstanding storage.Rollback(); return; } - auto append_count = storage.row_groups->GetTotalRows() - storage.deleted_rows; - const auto row_group_size = storage.row_groups->GetRowGroupSize(); + auto append_count = storage.GetCollection().GetTotalRows() - storage.deleted_rows; + const auto row_group_size = storage.GetCollection().GetRowGroupSize(); TableAppendState append_state; table.AppendLock(append_state); transaction.PushAppend(table, NumericCast(append_state.row_start), append_count); - if ((append_state.row_start == 0 || storage.row_groups->GetTotalRows() >= row_group_size) && + if ((append_state.row_start == 0 || storage.GetCollection().GetTotalRows() >= row_group_size) && storage.deleted_rows == 0) { // table is currently empty OR we are bulk appending: move over the storage directly // first flush any outstanding blocks @@ -593,7 +610,7 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage, optional_ storage.AppendToIndexes(transaction, append_state, false); } // finally move over the row groups - table.MergeStorage(*storage.row_groups, storage.append_indexes, commit_state); + table.MergeStorage(storage.GetCollection(), storage.append_indexes, commit_state); } else { // check if we have written data // if we have, we cannot merge to disk after all @@ -641,7 +658,7 @@ idx_t LocalStorage::AddedRows(DataTable &table) { if (!storage) { return 0; } - return storage->row_groups->GetTotalRows() - storage->deleted_rows; + return storage->GetCollection().GetTotalRows() - storage->deleted_rows; } vector LocalStorage::GetPartitionStats(DataTable &table) const { @@ -649,7 +666,7 @@ vector LocalStorage::GetPartitionStats(DataTable &table) co if (!storage) { return vector(); } - return storage->row_groups->GetPartitionStats(); + return storage->GetCollection().GetPartitionStats(); } void LocalStorage::DropTable(DataTable &table) { @@ -710,7 +727,7 @@ void LocalStorage::FetchChunk(DataTable &table, Vector &row_ids, idx_t count, co if (!storage) { throw InternalException("LocalStorage::FetchChunk - local storage not found"); } - storage->row_groups->Fetch(transaction, chunk, col_ids, row_ids, count, fetch_state); + storage->GetCollection().Fetch(transaction, chunk, col_ids, row_ids, count, fetch_state); } bool LocalStorage::CanFetch(DataTable &table, const row_t row_id) { @@ -718,7 +735,7 @@ bool LocalStorage::CanFetch(DataTable &table, const row_t row_id) { if (!storage) { throw InternalException("LocalStorage::CanFetch - local storage not found"); } - return storage->row_groups->CanFetch(transaction, row_id); + return storage->GetCollection().CanFetch(transaction, row_id); } TableIndexList &LocalStorage::GetIndexes(ClientContext &context, DataTable &table) { @@ -735,7 +752,7 @@ void LocalStorage::VerifyNewConstraint(DataTable &parent, const BoundConstraint if (!storage) { return; } - storage->row_groups->VerifyNewConstraint(parent, constraint); + storage->GetCollection().VerifyNewConstraint(parent, constraint); } } // namespace duckdb diff --git a/src/duckdb/src/storage/metadata/metadata_manager.cpp b/src/duckdb/src/storage/metadata/metadata_manager.cpp index 55b8790e4..8674f742d 100644 --- a/src/duckdb/src/storage/metadata/metadata_manager.cpp +++ b/src/duckdb/src/storage/metadata/metadata_manager.cpp @@ -54,6 +54,8 @@ MetadataManager::~MetadataManager() { MetadataHandle MetadataManager::AllocateHandle() { // check if there is any free space left in an existing block // if not allocate a new block + MetadataPointer pointer; + unique_lock guard(block_lock); block_id_t free_block = INVALID_BLOCK; for (auto &kv : blocks) { auto &block = kv.second; @@ -63,13 +65,16 @@ MetadataHandle MetadataManager::AllocateHandle() { break; } } + guard.unlock(); if (free_block == INVALID_BLOCK || free_block > PeekNextBlockId()) { - free_block = AllocateNewBlock(); + free_block = AllocateNewBlock(guard); + } else { + guard.lock(); } + D_ASSERT(guard.owns_lock()); D_ASSERT(free_block != INVALID_BLOCK); // select the first free metadata block we can find - MetadataPointer pointer; pointer.block_index = UnsafeNumericCast(free_block); auto &block = blocks[free_block]; // the block is now dirty @@ -77,7 +82,7 @@ MetadataHandle MetadataManager::AllocateHandle() { if (block.block->BlockId() < MAXIMUM_BLOCK) { // this block is a disk-backed block, yet we are planning to write to it // we need to convert it into a transient block before we can write to it - ConvertToTransient(block); + ConvertToTransient(guard, block); D_ASSERT(block.block->BlockId() >= MAXIMUM_BLOCK); } D_ASSERT(!block.free_blocks.empty()); @@ -85,6 +90,7 @@ MetadataHandle MetadataManager::AllocateHandle() { // mark the block as used block.free_blocks.pop_back(); D_ASSERT(pointer.index < METADATA_BLOCK_COUNT); + guard.unlock(); // pin the block return Pin(pointer); } @@ -95,25 +101,34 @@ MetadataHandle MetadataManager::Pin(const MetadataPointer &pointer) { MetadataHandle MetadataManager::Pin(QueryContext context, const MetadataPointer &pointer) { D_ASSERT(pointer.index < METADATA_BLOCK_COUNT); - auto &block = blocks[UnsafeNumericCast(pointer.block_index)]; + shared_ptr block_handle; + { + lock_guard guard(block_lock); + auto &block = blocks[UnsafeNumericCast(pointer.block_index)]; #ifdef DEBUG - for (auto &free_block : block.free_blocks) { - if (free_block == pointer.index) { - throw InternalException("Pinning block %d.%d but it is marked as a free block", block.block_id, free_block); + for (auto &free_block : block.free_blocks) { + if (free_block == pointer.index) { + throw InternalException("Pinning block %d.%d but it is marked as a free block", block.block_id, + free_block); + } } - } #endif + block_handle = block.block; + } MetadataHandle handle; handle.pointer.block_index = pointer.block_index; handle.pointer.index = pointer.index; - handle.handle = buffer_manager.Pin(block.block); + handle.handle = buffer_manager.Pin(block_handle); return handle; } -void MetadataManager::ConvertToTransient(MetadataBlock &metadata_block) { +void MetadataManager::ConvertToTransient(unique_lock &block_lock, MetadataBlock &metadata_block) { + D_ASSERT(block_lock.owns_lock()); + auto old_block = metadata_block.block; + block_lock.unlock(); // pin the old block - auto old_buffer = buffer_manager.Pin(metadata_block.block); + auto old_buffer = buffer_manager.Pin(old_block); // allocate a new transient block to replace it auto new_buffer = buffer_manager.Allocate(MemoryTag::METADATA, &block_manager, false); @@ -121,14 +136,17 @@ void MetadataManager::ConvertToTransient(MetadataBlock &metadata_block) { // copy the data to the transient block memcpy(new_buffer.Ptr(), old_buffer.Ptr(), block_manager.GetBlockSize()); - metadata_block.block = std::move(new_block); - metadata_block.dirty = true; // unregister the old block block_manager.UnregisterBlock(metadata_block.block_id); + + block_lock.lock(); + metadata_block.block = std::move(new_block); + metadata_block.dirty = true; } -block_id_t MetadataManager::AllocateNewBlock() { +block_id_t MetadataManager::AllocateNewBlock(unique_lock &block_lock) { + D_ASSERT(!block_lock.owns_lock()); auto new_block_id = GetNextBlockId(); MetadataBlock new_block; @@ -141,11 +159,14 @@ block_id_t MetadataManager::AllocateNewBlock() { new_block.dirty = true; // zero-initialize the handle memset(handle.Ptr(), 0, block_manager.GetBlockSize()); - AddBlock(std::move(new_block)); + + block_lock.lock(); + AddBlock(block_lock, std::move(new_block)); return new_block_id; } -void MetadataManager::AddBlock(MetadataBlock new_block, bool if_exists) { +void MetadataManager::AddBlock(unique_lock &block_lock, MetadataBlock new_block, bool if_exists) { + D_ASSERT(block_lock.owns_lock()); if (blocks.find(new_block.block_id) != blocks.end()) { if (if_exists) { return; @@ -155,15 +176,17 @@ void MetadataManager::AddBlock(MetadataBlock new_block, bool if_exists) { blocks[new_block.block_id] = std::move(new_block); } -void MetadataManager::AddAndRegisterBlock(MetadataBlock block) { +void MetadataManager::AddAndRegisterBlock(unique_lock &block_lock, MetadataBlock block) { if (block.block) { throw InternalException("Calling AddAndRegisterBlock on block that already exists"); } if (block.block_id >= MAXIMUM_BLOCK) { throw InternalException("AddAndRegisterBlock called with a transient block id"); } + block_lock.unlock(); block.block = block_manager.RegisterBlock(block.block_id); - AddBlock(std::move(block), true); + block_lock.lock(); + AddBlock(block_lock, std::move(block), true); } MetaBlockPointer MetadataManager::GetDiskPointer(const MetadataPointer &pointer, uint32_t offset) { @@ -181,8 +204,14 @@ uint32_t MetaBlockPointer::GetBlockIndex() const { } MetadataPointer MetadataManager::FromDiskPointer(MetaBlockPointer pointer) { + unique_lock guard(block_lock); + return FromDiskPointerInternal(guard, pointer); +} + +MetadataPointer MetadataManager::FromDiskPointerInternal(unique_lock &block_lock, MetaBlockPointer pointer) { auto block_id = pointer.GetBlockId(); auto index = pointer.GetBlockIndex(); + auto entry = blocks.find(block_id); if (entry == blocks.end()) { // LCOV_EXCL_START throw InternalException("Failed to load metadata pointer (id %llu, idx %llu, ptr %llu)\n", block_id, index, @@ -195,11 +224,13 @@ MetadataPointer MetadataManager::FromDiskPointer(MetaBlockPointer pointer) { } MetadataPointer MetadataManager::RegisterDiskPointer(MetaBlockPointer pointer) { + unique_lock guard(block_lock); + auto block_id = pointer.GetBlockId(); MetadataBlock block; block.block_id = block_id; - AddAndRegisterBlock(std::move(block)); - return FromDiskPointer(pointer); + AddAndRegisterBlock(guard, std::move(block)); + return FromDiskPointerInternal(guard, pointer); } BlockPointer MetadataManager::ToBlockPointer(MetaBlockPointer meta_pointer, const idx_t metadata_block_size) { @@ -232,6 +263,7 @@ void MetadataManager::Flush() { // Write the blocks of the metadata manager to disk. const idx_t total_metadata_size = GetMetadataBlockSize() * METADATA_BLOCK_COUNT; + unique_lock guard(block_lock, std::defer_lock); for (auto &kv : blocks) { auto &block = kv.second; if (!block.dirty) { @@ -245,9 +277,13 @@ void MetadataManager::Flush() { memset(handle.Ptr() + total_metadata_size, 0, block_manager.GetBlockSize() - total_metadata_size); D_ASSERT(kv.first == block.block_id); if (block.block->BlockId() >= MAXIMUM_BLOCK) { + auto new_block = + block_manager.ConvertToPersistent(QueryContext(), kv.first, block.block, std::move(handle)); + // Convert the temporary block to a persistent block. - block.block = - block_manager.ConvertToPersistent(QueryContext(), kv.first, std::move(block.block), std::move(handle)); + guard.lock(); + block.block = std::move(new_block); + guard.unlock(); } else { // Already a persistent block, so we only need to write it. D_ASSERT(block.block->BlockId() == block.block_id); @@ -269,10 +305,12 @@ void MetadataManager::Read(ReadStream &source) { auto block_count = source.Read(); for (idx_t i = 0; i < block_count; i++) { auto block = MetadataBlock::Read(source); + + unique_lock guard(block_lock); auto entry = blocks.find(block.block_id); if (entry == blocks.end()) { // block does not exist yet - AddAndRegisterBlock(std::move(block)); + AddAndRegisterBlock(guard, std::move(block)); } else { // block was already created - only copy over the free list entry->second.free_blocks = std::move(block.free_blocks); @@ -349,6 +387,7 @@ void MetadataManager::MarkBlocksAsModified() { } modified_blocks.clear(); + for (auto &kv : blocks) { auto &block = kv.second; idx_t free_list = block.FreeBlocksToInteger(); @@ -361,6 +400,7 @@ void MetadataManager::ClearModifiedBlocks(const vector &pointe if (pointers.empty()) { return; } + unique_lock guard(block_lock); for (auto &pointer : pointers) { auto block_id = pointer.GetBlockId(); auto block_index = pointer.GetBlockIndex(); @@ -376,6 +416,7 @@ void MetadataManager::ClearModifiedBlocks(const vector &pointe vector MetadataManager::GetMetadataInfo() const { vector result; + unique_lock guard(block_lock); for (auto &block : blocks) { MetadataBlockInfo block_info; block_info.block_id = block.second.block_id; @@ -393,17 +434,18 @@ vector MetadataManager::GetMetadataInfo() const { vector> MetadataManager::GetBlocks() const { vector> result; + unique_lock guard(block_lock); for (auto &entry : blocks) { result.push_back(entry.second.block); } return result; } -block_id_t MetadataManager::PeekNextBlockId() { +block_id_t MetadataManager::PeekNextBlockId() const { return block_manager.PeekFreeBlockId(); } -block_id_t MetadataManager::GetNextBlockId() { +block_id_t MetadataManager::GetNextBlockId() const { return block_manager.GetFreeBlockId(); } diff --git a/src/duckdb/src/storage/optimistic_data_writer.cpp b/src/duckdb/src/storage/optimistic_data_writer.cpp index 94dd5210e..4f595223f 100644 --- a/src/duckdb/src/storage/optimistic_data_writer.cpp +++ b/src/duckdb/src/storage/optimistic_data_writer.cpp @@ -2,6 +2,7 @@ #include "duckdb/storage/table/column_segment.hpp" #include "duckdb/storage/partial_block_manager.hpp" #include "duckdb/storage/table/column_checkpoint_state.hpp" +#include "duckdb/main/settings.hpp" namespace duckdb { @@ -33,30 +34,55 @@ bool OptimisticDataWriter::PrepareWrite() { return true; } -void OptimisticDataWriter::WriteNewRowGroup(RowGroupCollection &row_groups) { +unique_ptr OptimisticDataWriter::CreateCollection(DataTable &storage, + const vector &insert_types) { + auto table_info = storage.GetDataTableInfo(); + auto &io_manager = TableIOManager::Get(storage); + + // Create the local row group collection. + auto max_row_id = NumericCast(MAX_ROW_ID); + auto row_groups = make_shared_ptr(std::move(table_info), io_manager, insert_types, max_row_id); + + auto result = make_uniq(); + result->collection = std::move(row_groups); + return result; +} + +void OptimisticDataWriter::WriteNewRowGroup(OptimisticWriteCollection &row_groups) { // we finished writing a complete row group if (!PrepareWrite()) { return; } - // flush second-to-last row group - auto row_group = row_groups.GetRowGroup(-2); - FlushToDisk(*row_group); + + row_groups.complete_row_groups++; + auto unflushed_row_groups = row_groups.complete_row_groups - row_groups.last_flushed; + if (unflushed_row_groups >= DBConfig::GetSetting(context)) { + // we have crossed our flush threshold - flush any unwritten row groups to disk + vector> to_flush; + for (idx_t i = row_groups.last_flushed; i < row_groups.complete_row_groups; i++) { + to_flush.push_back(*row_groups.collection->GetRowGroup(NumericCast(i))); + } + FlushToDisk(to_flush); + row_groups.last_flushed = row_groups.complete_row_groups; + } } -void OptimisticDataWriter::WriteLastRowGroup(RowGroupCollection &row_groups) { +void OptimisticDataWriter::WriteLastRowGroup(OptimisticWriteCollection &row_groups) { // we finished writing a complete row group if (!PrepareWrite()) { return; } - // flush second-to-last row group - auto row_group = row_groups.GetRowGroup(-1); - if (!row_group) { - return; + // flush the last batch of row groups + vector> to_flush; + for (idx_t i = row_groups.last_flushed; i < row_groups.complete_row_groups; i++) { + to_flush.push_back(*row_groups.collection->GetRowGroup(NumericCast(i))); } - FlushToDisk(*row_group); + // add the last (incomplete) row group + to_flush.push_back(*row_groups.collection->GetRowGroup(-1)); + FlushToDisk(to_flush); } -void OptimisticDataWriter::FlushToDisk(RowGroup &row_group) { +void OptimisticDataWriter::FlushToDisk(const vector> &row_groups) { //! The set of column compression types (if any) vector compression_types; D_ASSERT(compression_types.empty()); @@ -64,7 +90,7 @@ void OptimisticDataWriter::FlushToDisk(RowGroup &row_group) { compression_types.push_back(column.CompressionType()); } RowGroupWriteInfo info(*partial_manager, compression_types); - row_group.WriteToDisk(info); + RowGroup::WriteToDisk(info, row_groups); } void OptimisticDataWriter::Merge(OptimisticDataWriter &other) { diff --git a/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp b/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp index 83c37ec4f..4c70c25c1 100644 --- a/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +++ b/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp @@ -290,7 +290,6 @@ void LogicalCTERef::Serialize(Serializer &serializer) const { serializer.WritePropertyWithDefault(201, "cte_index", cte_index); serializer.WritePropertyWithDefault>(202, "chunk_types", chunk_types); serializer.WritePropertyWithDefault>(203, "bound_columns", bound_columns); - serializer.WriteProperty(204, "materialized_cte", materialized_cte); serializer.WritePropertyWithDefault(205, "is_recurring", is_recurring); } @@ -299,8 +298,7 @@ unique_ptr LogicalCTERef::Deserialize(Deserializer &deserialize auto cte_index = deserializer.ReadPropertyWithDefault(201, "cte_index"); auto chunk_types = deserializer.ReadPropertyWithDefault>(202, "chunk_types"); auto bound_columns = deserializer.ReadPropertyWithDefault>(203, "bound_columns"); - auto materialized_cte = deserializer.ReadProperty(204, "materialized_cte"); - auto result = duckdb::unique_ptr(new LogicalCTERef(table_index, cte_index, std::move(chunk_types), std::move(bound_columns), materialized_cte)); + auto result = duckdb::unique_ptr(new LogicalCTERef(table_index, cte_index, std::move(chunk_types), std::move(bound_columns))); deserializer.ReadPropertyWithDefault(205, "is_recurring", result->is_recurring); return std::move(result); } diff --git a/src/duckdb/src/storage/serialization/serialize_query_node.cpp b/src/duckdb/src/storage/serialization/serialize_query_node.cpp index 807d8da71..50ab535d2 100644 --- a/src/duckdb/src/storage/serialization/serialize_query_node.cpp +++ b/src/duckdb/src/storage/serialization/serialize_query_node.cpp @@ -111,10 +111,12 @@ unique_ptr SelectNode::Deserialize(Deserializer &deserializer) { void SetOperationNode::Serialize(Serializer &serializer) const { QueryNode::Serialize(serializer); serializer.WriteProperty(200, "setop_type", setop_type); - serializer.WritePropertyWithDefault>(201, "left", left); - serializer.WritePropertyWithDefault>(202, "right", right); + serializer.WritePropertyWithDefault>(201, "left", SerializeChildNode(serializer, 0)); + serializer.WritePropertyWithDefault>(202, "right", SerializeChildNode(serializer, 1)); serializer.WritePropertyWithDefault(203, "setop_all", setop_all, true); - serializer.WritePropertyWithDefault>>(204, "children", SerializeChildNodes()); + if (serializer.ShouldSerialize(7)) { + serializer.WritePropertyWithDefault>>(204, "children", children); + } } unique_ptr SetOperationNode::Deserialize(Deserializer &deserializer) { diff --git a/src/duckdb/src/storage/serialization/serialize_storage.cpp b/src/duckdb/src/storage/serialization/serialize_storage.cpp index 29ac3038c..a6964ed2b 100644 --- a/src/duckdb/src/storage/serialization/serialize_storage.cpp +++ b/src/duckdb/src/storage/serialization/serialize_storage.cpp @@ -25,7 +25,9 @@ BlockPointer BlockPointer::Deserialize(Deserializer &deserializer) { } void DataPointer::Serialize(Serializer &serializer) const { - serializer.WritePropertyWithDefault(100, "row_start", row_start); + if (!serializer.ShouldSerialize(7)) { + serializer.WritePropertyWithDefault(100, "row_start", row_start); + } serializer.WritePropertyWithDefault(101, "tuple_count", tuple_count); serializer.WriteProperty(102, "block_pointer", block_pointer); serializer.WriteProperty(103, "compression_type", compression_type); @@ -34,13 +36,12 @@ void DataPointer::Serialize(Serializer &serializer) const { } DataPointer DataPointer::Deserialize(Deserializer &deserializer) { - auto row_start = deserializer.ReadPropertyWithDefault(100, "row_start"); + deserializer.ReadDeletedProperty(100, "row_start"); auto tuple_count = deserializer.ReadPropertyWithDefault(101, "tuple_count"); auto block_pointer = deserializer.ReadProperty(102, "block_pointer"); auto compression_type = deserializer.ReadProperty(103, "compression_type"); auto statistics = deserializer.ReadProperty(104, "statistics"); DataPointer result(std::move(statistics)); - result.row_start = row_start; result.tuple_count = tuple_count; result.block_pointer = block_pointer; result.compression_type = compression_type; diff --git a/src/duckdb/src/storage/single_file_block_manager.cpp b/src/duckdb/src/storage/single_file_block_manager.cpp index 42ebe4491..6d22ff423 100644 --- a/src/duckdb/src/storage/single_file_block_manager.cpp +++ b/src/duckdb/src/storage/single_file_block_manager.cpp @@ -866,6 +866,20 @@ bool SingleFileBlockManager::IsRemote() { return !handle->OnDiskFile(); } +bool SingleFileBlockManager::Prefetch() { + switch (DBConfig::GetSetting(db.GetDatabase())) { + case StorageBlockPrefetch::NEVER: + return false; + case StorageBlockPrefetch::DEBUG_FORCE_ALWAYS: + case StorageBlockPrefetch::ALWAYS_PREFETCH: + return !InMemory(); + case StorageBlockPrefetch::REMOTE_ONLY: + return IsRemote(); + default: + throw InternalException("Unknown StorageBlockPrefetch type"); + } +} + unique_ptr SingleFileBlockManager::ConvertBlock(block_id_t block_id, FileBuffer &source_buffer) { D_ASSERT(source_buffer.AllocSize() == GetBlockAllocSize()); // FIXME; maybe we should pass the block header size explicitly diff --git a/src/duckdb/src/storage/standard_buffer_manager.cpp b/src/duckdb/src/storage/standard_buffer_manager.cpp index d7e51f3dc..e15986e1c 100644 --- a/src/duckdb/src/storage/standard_buffer_manager.cpp +++ b/src/duckdb/src/storage/standard_buffer_manager.cpp @@ -12,6 +12,7 @@ #include "duckdb/storage/temporary_file_manager.hpp" #include "duckdb/storage/temporary_memory_manager.hpp" #include "duckdb/common/encryption_functions.hpp" +#include "duckdb/main/settings.hpp" namespace duckdb { @@ -246,14 +247,14 @@ void StandardBufferManager::BatchRead(vector> &handles, block_id_t first_block, block_id_t last_block) { auto &block_manager = handles[0]->block_manager; idx_t block_count = NumericCast(last_block - first_block + 1); -#ifndef DUCKDB_ALTERNATIVE_VERIFY if (block_count == 1) { - // prefetching with block_count == 1 has no performance impact since we can't batch reads - // skip the prefetch in this case - // we do it anyway if alternative_verify is on for extra testing - return; + if (DBConfig::GetSetting(db) != StorageBlockPrefetch::DEBUG_FORCE_ALWAYS) { + // prefetching with block_count == 1 has no performance impact since we can't batch reads + // skip the prefetch in this case + // we do it anyway if alternative_verify is on for extra testing + return; + } } -#endif // allocate a buffer to hold the data of all of the blocks auto total_block_size = block_count * block_manager.GetBlockAllocSize(); diff --git a/src/duckdb/src/storage/storage_info.cpp b/src/duckdb/src/storage/storage_info.cpp index b4a7422e9..616aa3039 100644 --- a/src/duckdb/src/storage/storage_info.cpp +++ b/src/duckdb/src/storage/storage_info.cpp @@ -83,13 +83,14 @@ static const StorageVersionInfo storage_version_info[] = { {"v1.3.1", 66}, {"v1.3.2", 66}, {"v1.4.0", 67}, + {"v1.5.0", 67}, {nullptr, 0} }; // END OF STORAGE VERSION INFO static_assert(DEFAULT_STORAGE_VERSION_INFO == VERSION_NUMBER, "Check on VERSION_INFO"); // START OF SERIALIZATION VERSION INFO -const uint64_t LATEST_SERIALIZATION_VERSION_INFO = 6; +const uint64_t LATEST_SERIALIZATION_VERSION_INFO = 7; const uint64_t DEFAULT_SERIALIZATION_VERSION_INFO = 1; static const SerializationVersionInfo serialization_version_info[] = { {"v0.10.0", 1}, @@ -108,7 +109,8 @@ static const SerializationVersionInfo serialization_version_info[] = { {"v1.3.1", 5}, {"v1.3.2", 5}, {"v1.4.0", 6}, - {"latest", 6}, + {"v1.5.0", 7}, + {"latest", 7}, {nullptr, 0} }; // END OF SERIALIZATION VERSION INFO diff --git a/src/duckdb/src/storage/storage_manager.cpp b/src/duckdb/src/storage/storage_manager.cpp index ef90d5535..d82905452 100644 --- a/src/duckdb/src/storage/storage_manager.cpp +++ b/src/duckdb/src/storage/storage_manager.cpp @@ -13,6 +13,9 @@ #include "duckdb/storage/storage_extension.hpp" #include "duckdb/storage/table/column_data.hpp" #include "duckdb/storage/table/in_memory_checkpoint.hpp" +#include "duckdb/catalog/duck_catalog.hpp" +#include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp" +#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" #include "mbedtls_wrapper.hpp" namespace duckdb { @@ -142,6 +145,9 @@ bool StorageManager::InMemory() const { return path == IN_MEMORY_PATH; } +void StorageManager::Destroy() { +} + void StorageManager::Initialize(QueryContext context) { bool in_memory = InMemory(); if (in_memory && read_only) { @@ -490,6 +496,33 @@ void SingleFileStorageManager::CreateCheckpoint(QueryContext context, Checkpoint } } +void SingleFileStorageManager::Destroy() { + if (!load_complete) { + return; + } + vector> schemas; + // we scan the set of committed schemas + auto &catalog = Catalog::GetCatalog(db).Cast(); + catalog.ScanSchemas([&](SchemaCatalogEntry &entry) { schemas.push_back(entry); }); + + vector> tables; + for (auto &schema : schemas) { + schema.get().Scan(CatalogType::TABLE_ENTRY, [&](CatalogEntry &entry) { + if (entry.internal) { + return; + } + if (entry.type == CatalogType::TABLE_ENTRY) { + tables.push_back(entry.Cast()); + } + }); + } + + for (auto &table : tables) { + auto &data_table = table.get().GetStorage(); + data_table.Destroy(); + } +} + DatabaseSize SingleFileStorageManager::GetDatabaseSize() { // All members default to zero DatabaseSize ds; diff --git a/src/duckdb/src/storage/table/array_column_data.cpp b/src/duckdb/src/storage/table/array_column_data.cpp index 05964339a..7c8a12f13 100644 --- a/src/duckdb/src/storage/table/array_column_data.cpp +++ b/src/duckdb/src/storage/table/array_column_data.cpp @@ -97,7 +97,7 @@ idx_t ArrayColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t c void ArrayColumnData::Select(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result, SelectionVector &sel, idx_t sel_count) { - bool is_supported = !child_column->type.IsNested(); + bool is_supported = !child_column->type.IsNested() && child_column->type.InternalType() != PhysicalType::VARCHAR; if (!is_supported) { ColumnData::Select(transaction, vector_index, state, result, sel, sel_count); return; diff --git a/src/duckdb/src/storage/table/column_data.cpp b/src/duckdb/src/storage/table/column_data.cpp index 9200d7bef..c212fcb18 100644 --- a/src/duckdb/src/storage/table/column_data.cpp +++ b/src/duckdb/src/storage/table/column_data.cpp @@ -873,6 +873,17 @@ PersistentColumnData ColumnData::Serialize() { return result; } +void RealignColumnData(PersistentColumnData &column_data, idx_t new_start) { + idx_t current_start = new_start; + for (auto &pointer : column_data.pointers) { + pointer.row_start = current_start; + current_start += pointer.tuple_count; + } + for (auto &child : column_data.child_columns) { + RealignColumnData(child, new_start); + } +} + shared_ptr ColumnData::Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row, ReadStream &source, const LogicalType &type) { auto entry = ColumnData::CreateColumn(block_manager, info, column_index, start_row, type, nullptr); @@ -890,6 +901,9 @@ shared_ptr ColumnData::Deserialize(BlockManager &block_manager, Data deserializer.Unset(); deserializer.End(); + // re-align data segments, in case our start_row has changed + RealignColumnData(persistent_column_data, start_row); + // initialize the column entry->InitializeColumn(persistent_column_data, entry->stats->statistics); return entry; diff --git a/src/duckdb/src/storage/table/column_data_checkpointer.cpp b/src/duckdb/src/storage/table/column_data_checkpointer.cpp index 1cf1d7de2..68c35f842 100644 --- a/src/duckdb/src/storage/table/column_data_checkpointer.cpp +++ b/src/duckdb/src/storage/table/column_data_checkpointer.cpp @@ -376,8 +376,22 @@ void ColumnDataCheckpointer::WritePersistentSegments(ColumnCheckpointState &stat for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) { auto segment = nodes[segment_idx].node.get(); if (segment->start != current_row) { + string extra_info; + for (auto &s : nodes) { + extra_info += "\n"; + extra_info += StringUtil::Format("Start %d, count %d", s.node->start, s.node->count.load()); + } + const_reference root = col_data; + while (root.get().HasParent()) { + root = root.get().Parent(); + } throw InternalException( - "Failure in RowGroup::Checkpoint - column data pointer is unaligned with row group start"); + "Failure in RowGroup::Checkpoint - column data pointer is unaligned with row group " + "start\nRow group start: %d\nRow group count %d\nCurrent row: %d\nSegment start: %d\nColumn index: " + "%d\nColumn type: %s\nRoot type: %s\nTable: %s.%s\nAll segments:%s", + row_group.start, row_group.count.load(), current_row, segment->start, root.get().column_index, + col_data.type, root.get().type, root.get().info.GetSchemaName(), root.get().info.GetTableName(), + extra_info); } current_row += segment->count; auto pointer = segment->GetDataPointer(); diff --git a/src/duckdb/src/storage/table/row_group.cpp b/src/duckdb/src/storage/table/row_group.cpp index f9b7e03de..40e20d2d4 100644 --- a/src/duckdb/src/storage/table/row_group.cpp +++ b/src/duckdb/src/storage/table/row_group.cpp @@ -551,14 +551,7 @@ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &s count = max_count; } auto &block_manager = GetBlockManager(); -#ifndef DUCKDB_ALTERNATIVE_VERIFY - // // in regular operation we only prefetch from remote file systems - // // when alternative verify is set, we always prefetch for testing purposes - if (block_manager.IsRemote()) -#else - if (!block_manager.InMemory()) -#endif - { + if (block_manager.Prefetch()) { PrefetchState prefetch_state; for (idx_t i = 0; i < column_ids.size(); i++) { const auto &column = column_ids[i]; @@ -925,38 +918,62 @@ CompressionType ColumnCheckpointInfo::GetCompressionType() { return info.compression_types[column_idx]; } -RowGroupWriteData RowGroup::WriteToDisk(RowGroupWriteInfo &info) { - RowGroupWriteData result; - result.states.reserve(columns.size()); - result.statistics.reserve(columns.size()); +vector RowGroup::WriteToDisk(RowGroupWriteInfo &info, + const vector> &row_groups) { + vector result; + if (row_groups.empty()) { + return result; + } - // Checkpoint the individual columns of the row group - // Here we're iterating over columns. Each column can have multiple segments. + idx_t column_count = row_groups[0].get().GetColumnCount(); + for (auto &row_group : row_groups) { + D_ASSERT(column_count == row_group.get().GetColumnCount()); + RowGroupWriteData write_data; + write_data.states.reserve(column_count); + write_data.statistics.reserve(column_count); + result.push_back(std::move(write_data)); + } + + // Checkpoint the row groups + // In order to co-locate columns across different row groups, we write column-at-a-time + // i.e. we first write column #0 of all row groups, then column #1, ... + + // Each column can have multiple segments. // (Some columns will be wider than others, and require different numbers // of blocks to encode.) Segments cannot span blocks. // // Some of these columns are composite (list, struct). The data is written // first sequentially, and the pointers are written later, so that the // pointers all end up densely packed, and thus more cache-friendly. - for (idx_t column_idx = 0; column_idx < GetColumnCount(); column_idx++) { - auto &column = GetColumn(column_idx); - if (column.start != start) { - throw InternalException("RowGroup::WriteToDisk - child-column is unaligned with row group"); - } - ColumnCheckpointInfo checkpoint_info(info, column_idx); - auto checkpoint_state = column.Checkpoint(*this, checkpoint_info); - D_ASSERT(checkpoint_state); + for (idx_t column_idx = 0; column_idx < column_count; column_idx++) { + for (idx_t row_group_idx = 0; row_group_idx < row_groups.size(); row_group_idx++) { + auto &row_group = row_groups[row_group_idx].get(); + auto &row_group_write_data = result[row_group_idx]; + auto &column = row_group.GetColumn(column_idx); + if (column.start != row_group.start) { + throw InternalException("RowGroup::WriteToDisk - child-column is unaligned with row group"); + } + ColumnCheckpointInfo checkpoint_info(info, column_idx); + auto checkpoint_state = column.Checkpoint(row_group, checkpoint_info); + D_ASSERT(checkpoint_state); - auto stats = checkpoint_state->GetStatistics(); - D_ASSERT(stats); + auto stats = checkpoint_state->GetStatistics(); + D_ASSERT(stats); - result.statistics.push_back(stats->Copy()); - result.states.push_back(std::move(checkpoint_state)); + row_group_write_data.statistics.push_back(stats->Copy()); + row_group_write_data.states.push_back(std::move(checkpoint_state)); + } } - D_ASSERT(result.states.size() == result.statistics.size()); return result; } +RowGroupWriteData RowGroup::WriteToDisk(RowGroupWriteInfo &info) { + vector> row_groups; + row_groups.push_back(*this); + auto result = WriteToDisk(info, row_groups); + return std::move(result[0]); +} + idx_t RowGroup::GetCommittedRowCount() { auto vinfo = GetVersionInfo(); if (!vinfo) { diff --git a/src/duckdb/src/storage/table/row_group_collection.cpp b/src/duckdb/src/storage/table/row_group_collection.cpp index 7e195cdc1..d44ca0544 100644 --- a/src/duckdb/src/storage/table/row_group_collection.cpp +++ b/src/duckdb/src/storage/table/row_group_collection.cpp @@ -132,8 +132,8 @@ void RowGroupCollection::AppendRowGroup(SegmentLock &l, idx_t start_row) { requires_new_row_group = false; } -RowGroup *RowGroupCollection::GetRowGroup(int64_t index) { - return (RowGroup *)row_groups->GetSegmentByIndex(index); +optional_ptr RowGroupCollection::GetRowGroup(int64_t index) { + return row_groups->GetSegmentByIndex(index); } void RowGroupCollection::Verify() { @@ -1008,8 +1008,8 @@ bool RowGroupCollection::ScheduleVacuumTasks(CollectionCheckpointState &checkpoi } idx_t merge_rows; idx_t next_idx = 0; - idx_t merge_count; - idx_t target_count; + idx_t merge_count = 0; + idx_t target_count = 0; bool perform_merge = false; // check if we can merge row groups adjacent to the current segment_idx // we try merging row groups into batches of 1-3 row groups @@ -1061,6 +1061,8 @@ bool RowGroupCollection::ScheduleVacuumTasks(CollectionCheckpointState &checkpoi return false; } // schedule the vacuum task + DUCKDB_LOG(checkpoint_state.writer.GetDatabase(), CheckpointLogType, GetAttached(), *info, segment_idx, merge_count, + target_count, merge_rows, state.row_start); auto vacuum_task = make_uniq(checkpoint_state, state, segment_idx, merge_count, target_count, merge_rows, state.row_start); checkpoint_state.executor->ScheduleTask(std::move(vacuum_task)); @@ -1107,6 +1109,8 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl // schedule a checkpoint task for this row group entry.node->MoveToCollection(*this, vacuum_state.row_start); if (writer.GetCheckpointType() != CheckpointType::VACUUM_ONLY) { + DUCKDB_LOG(checkpoint_state.writer.GetDatabase(), CheckpointLogType, GetAttached(), *info, segment_idx, + *entry.node); auto checkpoint_task = GetCheckpointTask(checkpoint_state, segment_idx); checkpoint_state.executor->ScheduleTask(std::move(checkpoint_task)); } @@ -1185,6 +1189,36 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl Verify(); } +//===--------------------------------------------------------------------===// +// Destroy +//===--------------------------------------------------------------------===// + +class DestroyTask : public BaseExecutorTask { +public: + DestroyTask(TaskExecutor &executor, unique_ptr row_group_p) + : BaseExecutorTask(executor), row_group(std::move(row_group_p)) { + } + + void ExecuteTask() override { + row_group.reset(); + } + +private: + unique_ptr row_group; +}; + +void RowGroupCollection::Destroy() { + auto l = row_groups->Lock(); + auto &segments = row_groups->ReferenceLoadedSegmentsMutable(l); + + TaskExecutor executor(TaskScheduler::GetScheduler(GetAttached().GetDatabase())); + for (auto &segment : segments) { + auto destroy_task = make_uniq(executor, std::move(segment.node)); + executor.ScheduleTask(std::move(destroy_task)); + } + executor.WorkOnTasks(); +} + //===--------------------------------------------------------------------===// // CommitDrop //===--------------------------------------------------------------------===// diff --git a/src/duckdb/src/transaction/duck_transaction_manager.cpp b/src/duckdb/src/transaction/duck_transaction_manager.cpp index 018cfbd2c..eace5283c 100644 --- a/src/duckdb/src/transaction/duck_transaction_manager.cpp +++ b/src/duckdb/src/transaction/duck_transaction_manager.cpp @@ -425,7 +425,6 @@ unique_ptr DuckTransactionManager::RemoveTransaction(DuckTransa } lowest_active_start = lowest_start_time; lowest_active_id = lowest_transaction_id; - auto lowest_stored_query = lowest_start_time; D_ASSERT(t_index != active_transactions.size()); // Decide if we need to store the transaction, or if we can schedule it for cleanup. @@ -458,7 +457,6 @@ unique_ptr DuckTransactionManager::RemoveTransaction(DuckTransa idx_t i = 0; for (; i < recently_committed_transactions.size(); i++) { D_ASSERT(recently_committed_transactions[i]); - lowest_stored_query = MinValue(recently_committed_transactions[i]->start_time, lowest_stored_query); if (recently_committed_transactions[i]->commit_id >= lowest_start_time) { // recently_committed_transactions is ordered on commit_id. // Thus, if the current commit_id is greater than diff --git a/src/duckdb/src/transaction/meta_transaction.cpp b/src/duckdb/src/transaction/meta_transaction.cpp index 92485584d..6fee5d96b 100644 --- a/src/duckdb/src/transaction/meta_transaction.cpp +++ b/src/duckdb/src/transaction/meta_transaction.cpp @@ -194,6 +194,16 @@ optional_ptr MetaTransaction::GetReferencedDatabase(const stri return nullptr; } +shared_ptr MetaTransaction::GetReferencedDatabaseOwning(const string &name) { + lock_guard guard(referenced_database_lock); + for (auto &entry : referenced_databases) { + if (StringUtil::CIEquals(entry.first.get().name, name)) { + return entry.second; + } + } + return nullptr; +} + void MetaTransaction::DetachDatabase(AttachedDatabase &database) { lock_guard guard(referenced_database_lock); used_databases.erase(database.GetName()); diff --git a/src/duckdb/src/verification/deserialized_statement_verifier.cpp b/src/duckdb/src/verification/deserialized_statement_verifier.cpp index 345828ea5..1ade815d7 100644 --- a/src/duckdb/src/verification/deserialized_statement_verifier.cpp +++ b/src/duckdb/src/verification/deserialized_statement_verifier.cpp @@ -17,7 +17,9 @@ DeserializedStatementVerifier::Create(const SQLStatement &statement, auto &select_stmt = statement.Cast(); Allocator allocator; MemoryStream stream(allocator); - BinarySerializer::Serialize(select_stmt, stream); + SerializationOptions options; + options.serialization_compatibility = SerializationCompatibility::FromString("latest"); + BinarySerializer::Serialize(select_stmt, stream, options); stream.Rewind(); auto result = BinaryDeserializer::Deserialize(stream); diff --git a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp index f59aac815..fc4104f94 100644 --- a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +++ b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp @@ -348,17 +348,17 @@ #include "extension/icu/third_party/icu/i18n/wintzimpl.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp" - -#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" #include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp" #include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" + +#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" diff --git a/src/duckdb/ub_src_function_table.cpp b/src/duckdb/ub_src_function_table.cpp index d68316c8d..a9e44c2b5 100644 --- a/src/duckdb/ub_src_function_table.cpp +++ b/src/duckdb/ub_src_function_table.cpp @@ -20,6 +20,8 @@ #include "src/function/table/read_csv.cpp" +#include "src/function/table/read_duckdb.cpp" + #include "src/function/table/sniff_csv.cpp" #include "src/function/table/read_file.cpp" diff --git a/src/duckdb/ub_src_main_capi.cpp b/src/duckdb/ub_src_main_capi.cpp index eb1e182ee..30ba6a200 100644 --- a/src/duckdb/ub_src_main_capi.cpp +++ b/src/duckdb/ub_src_main_capi.cpp @@ -20,6 +20,8 @@ #include "src/main/capi/expression-c.cpp" +#include "src/main/capi/file_system-c.cpp" + #include "src/main/capi/helper-c.cpp" #include "src/main/capi/hugeint-c.cpp"