From 1dd57281db6e3e79dfc384ccc38a09d665c39357 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Thu, 2 Oct 2025 05:20:56 +0000 Subject: [PATCH] Update vendored DuckDB sources to 76a3383248 --- CMakeLists.txt | 78 ++++++++-------- .../scalar/generic/current_setting.cpp | 5 +- src/duckdb/extension/icu/icu-strptime.cpp | 3 +- .../extension/json/include/json_common.hpp | 6 +- src/duckdb/extension/json/json_functions.cpp | 6 +- .../extension/parquet/column_writer.cpp | 37 +++++--- .../parquet/include/column_writer.hpp | 2 +- .../include/reader/string_column_reader.hpp | 13 +++ .../include/writer/array_column_writer.hpp | 4 + .../extension/parquet/parquet_reader.cpp | 5 +- .../extension/parquet/parquet_statistics.cpp | 12 +-- .../parquet/reader/string_column_reader.cpp | 21 ++++- .../parquet/writer/array_column_writer.cpp | 50 +++++----- .../parquet/writer/struct_column_writer.cpp | 5 +- .../src/catalog/catalog_search_path.cpp | 4 +- src/duckdb/src/common/enum_util.cpp | 20 ++++ src/duckdb/src/common/string_util.cpp | 24 +++++ .../src/execution/index/art/art_merger.cpp | 3 - src/duckdb/src/execution/index/art/prefix.cpp | 4 + .../scanner/string_value_scanner.cpp | 2 +- .../operator/helper/physical_reset.cpp | 4 +- .../operator/schema/physical_attach.cpp | 2 +- .../physical_plan/plan_asof_join.cpp | 6 +- .../table/system/duckdb_connection_count.cpp | 45 +++++++++ .../function/table/system/duckdb_settings.cpp | 12 ++- .../src/function/table/system_functions.cpp | 1 + .../function/table/version/pragma_version.cpp | 6 +- .../src/include/duckdb/common/enum_util.hpp | 8 ++ .../src/include/duckdb/common/string_util.hpp | 2 + .../function/scalar/variant_functions.hpp | 2 +- .../function/table/system_functions.hpp | 4 + .../include/duckdb/logging/log_storage.hpp | 12 +-- .../src/include/duckdb/logging/log_type.hpp | 29 +++++- .../include/duckdb/main/attached_database.hpp | 4 + .../include/duckdb/main/client_context.hpp | 2 + .../src/include/duckdb/main/connection.hpp | 1 - .../duckdb/main/connection_manager.hpp | 1 - .../main/database_file_path_manager.hpp | 13 ++- .../include/duckdb/main/database_manager.hpp | 3 + .../src/include/duckdb/planner/binder.hpp | 65 ++++++++++++- .../expression_binder/lateral_binder.hpp | 4 +- .../duckdb/planner/operator/logical_cte.hpp | 2 +- .../operator/logical_dependent_join.hpp | 6 +- .../subquery/flatten_dependent_join.hpp | 4 +- .../subquery/has_correlated_expressions.hpp | 4 +- .../planner/subquery/rewrite_cte_scan.hpp | 4 +- .../duckdb/planner/tableref/bound_joinref.hpp | 2 +- .../storage/compression/alp/alp_analyze.hpp | 7 +- .../compression/alprd/alprd_analyze.hpp | 6 +- .../storage/metadata/metadata_manager.hpp | 16 ++-- .../storage/statistics/string_stats.hpp | 2 + src/duckdb/src/logging/log_types.cpp | 63 +++++++++++++ src/duckdb/src/main/attached_database.cpp | 19 +++- src/duckdb/src/main/client_context.cpp | 46 ++++++---- src/duckdb/src/main/connection.cpp | 7 +- .../src/main/database_file_path_manager.cpp | 29 ++++-- src/duckdb/src/main/database_manager.cpp | 21 ++++- src/duckdb/src/main/http/http_util.cpp | 4 +- .../rule/comparison_simplification.cpp | 10 +- .../statement/transform_create_function.cpp | 2 + src/duckdb/src/planner/binder.cpp | 4 +- .../binder/query_node/plan_subquery.cpp | 14 +-- .../planner/binder/statement/bind_create.cpp | 6 +- .../binder/statement/bind_merge_into.cpp | 12 ++- .../planner/binder/tableref/bind_pivot.cpp | 11 ++- .../binder/tableref/bind_table_function.cpp | 11 ++- src/duckdb/src/planner/expression_binder.cpp | 4 +- .../expression_binder/lateral_binder.cpp | 22 ++--- .../table_function_binder.cpp | 4 + .../operator/logical_dependent_join.cpp | 4 +- .../subquery/flatten_dependent_join.cpp | 26 +++--- .../subquery/has_correlated_expressions.cpp | 2 +- .../src/planner/subquery/rewrite_cte_scan.cpp | 4 +- .../src/storage/compression/bitpacking.cpp | 3 +- .../src/storage/metadata/metadata_manager.cpp | 92 ++++++++++++++----- .../src/storage/statistics/string_stats.cpp | 8 ++ .../src/storage/table/array_column_data.cpp | 2 +- src/duckdb/src/storage/table/column_data.cpp | 14 +++ .../table/column_data_checkpointer.cpp | 16 +++- .../storage/table/row_group_collection.cpp | 8 +- src/duckdb/third_party/httplib/httplib.hpp | 7 +- .../ub_extension_icu_third_party_icu_i18n.cpp | 12 +-- src/duckdb/ub_src_function_table_system.cpp | 2 + 83 files changed, 777 insertions(+), 295 deletions(-) create mode 100644 src/duckdb/src/function/table/system/duckdb_connection_count.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index dd8af71b2..671bbb98f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -347,48 +347,48 @@ set(DUCKDB_SRC_FILES src/duckdb/third_party/zstd/dict/divsufsort.cpp src/duckdb/third_party/zstd/dict/fastcover.cpp src/duckdb/third_party/zstd/dict/zdict.cpp - src/duckdb/extension/core_functions/core_functions_extension.cpp - src/duckdb/extension/core_functions/function_list.cpp src/duckdb/extension/core_functions/lambda_functions.cpp - src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp + src/duckdb/extension/core_functions/function_list.cpp + src/duckdb/extension/core_functions/core_functions_extension.cpp + src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp src/duckdb/ub_extension_core_functions_aggregate_nested.cpp - src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp src/duckdb/ub_extension_core_functions_aggregate_regression.cpp - src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp - src/duckdb/ub_extension_core_functions_scalar_generic.cpp - src/duckdb/ub_extension_core_functions_scalar_array.cpp - src/duckdb/ub_extension_core_functions_scalar_random.cpp - src/duckdb/ub_extension_core_functions_scalar_list.cpp - src/duckdb/ub_extension_core_functions_scalar_struct.cpp - src/duckdb/ub_extension_core_functions_scalar_date.cpp - src/duckdb/ub_extension_core_functions_scalar_enum.cpp + src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp + src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp + src/duckdb/ub_extension_core_functions_scalar_string.cpp + src/duckdb/ub_extension_core_functions_scalar_bit.cpp src/duckdb/ub_extension_core_functions_scalar_operators.cpp + src/duckdb/ub_extension_core_functions_scalar_enum.cpp + src/duckdb/ub_extension_core_functions_scalar_map.cpp + src/duckdb/ub_extension_core_functions_scalar_random.cpp src/duckdb/ub_extension_core_functions_scalar_math.cpp - src/duckdb/ub_extension_core_functions_scalar_string.cpp + src/duckdb/ub_extension_core_functions_scalar_union.cpp + src/duckdb/ub_extension_core_functions_scalar_generic.cpp + src/duckdb/ub_extension_core_functions_scalar_struct.cpp + src/duckdb/ub_extension_core_functions_scalar_list.cpp + src/duckdb/ub_extension_core_functions_scalar_array.cpp src/duckdb/ub_extension_core_functions_scalar_debug.cpp - src/duckdb/ub_extension_core_functions_scalar_bit.cpp src/duckdb/ub_extension_core_functions_scalar_blob.cpp - src/duckdb/ub_extension_core_functions_scalar_union.cpp - src/duckdb/ub_extension_core_functions_scalar_map.cpp - src/duckdb/extension/parquet/parquet_crypto.cpp - src/duckdb/extension/parquet/parquet_reader.cpp - src/duckdb/extension/parquet/parquet_metadata.cpp + src/duckdb/ub_extension_core_functions_scalar_date.cpp src/duckdb/extension/parquet/parquet_writer.cpp src/duckdb/extension/parquet/zstd_file_system.cpp + src/duckdb/extension/parquet/parquet_crypto.cpp + src/duckdb/extension/parquet/parquet_reader.cpp src/duckdb/extension/parquet/parquet_timestamp.cpp + src/duckdb/extension/parquet/parquet_float16.cpp + src/duckdb/extension/parquet/parquet_statistics.cpp src/duckdb/extension/parquet/parquet_multi_file_info.cpp - src/duckdb/extension/parquet/column_writer.cpp + src/duckdb/extension/parquet/column_reader.cpp src/duckdb/extension/parquet/geo_parquet.cpp - src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp - src/duckdb/extension/parquet/parquet_statistics.cpp src/duckdb/extension/parquet/parquet_extension.cpp - src/duckdb/extension/parquet/parquet_float16.cpp + src/duckdb/extension/parquet/column_writer.cpp + src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp src/duckdb/extension/parquet/serialize_parquet.cpp - src/duckdb/extension/parquet/column_reader.cpp + src/duckdb/extension/parquet/parquet_metadata.cpp src/duckdb/ub_extension_parquet_decoder.cpp - src/duckdb/ub_extension_parquet_writer.cpp src/duckdb/ub_extension_parquet_reader.cpp src/duckdb/ub_extension_parquet_reader_variant.cpp + src/duckdb/ub_extension_parquet_writer.cpp src/duckdb/third_party/parquet/parquet_types.cpp src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp @@ -427,32 +427,32 @@ set(DUCKDB_SRC_FILES src/duckdb/third_party/brotli/enc/metablock.cpp src/duckdb/third_party/brotli/enc/static_dict.cpp src/duckdb/third_party/brotli/enc/utf8_util.cpp - src/duckdb/extension/icu/./icu-datetrunc.cpp - src/duckdb/extension/icu/./icu-timezone.cpp - src/duckdb/extension/icu/./icu-current.cpp - src/duckdb/extension/icu/./icu-list-range.cpp - src/duckdb/extension/icu/./icu-datefunc.cpp - src/duckdb/extension/icu/./icu-strptime.cpp - src/duckdb/extension/icu/./icu-dateadd.cpp src/duckdb/extension/icu/./icu_extension.cpp - src/duckdb/extension/icu/./icu-timebucket.cpp - src/duckdb/extension/icu/./icu-table-range.cpp + src/duckdb/extension/icu/./icu-strptime.cpp src/duckdb/extension/icu/./icu-datepart.cpp src/duckdb/extension/icu/./icu-makedate.cpp + src/duckdb/extension/icu/./icu-datefunc.cpp + src/duckdb/extension/icu/./icu-current.cpp + src/duckdb/extension/icu/./icu-timezone.cpp + src/duckdb/extension/icu/./icu-table-range.cpp + src/duckdb/extension/icu/./icu-dateadd.cpp + src/duckdb/extension/icu/./icu-timebucket.cpp src/duckdb/extension/icu/./icu-datesub.cpp + src/duckdb/extension/icu/./icu-datetrunc.cpp + src/duckdb/extension/icu/./icu-list-range.cpp src/duckdb/ub_extension_icu_third_party_icu_common.cpp src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp + src/duckdb/extension/json/json_reader.cpp src/duckdb/extension/json/json_functions.cpp + src/duckdb/extension/json/json_extension.cpp + src/duckdb/extension/json/json_scan.cpp src/duckdb/extension/json/json_multi_file_info.cpp + src/duckdb/extension/json/json_enums.cpp src/duckdb/extension/json/serialize_json.cpp src/duckdb/extension/json/json_common.cpp - src/duckdb/extension/json/json_scan.cpp - src/duckdb/extension/json/json_enums.cpp - src/duckdb/extension/json/json_reader.cpp - src/duckdb/extension/json/json_serializer.cpp - src/duckdb/extension/json/json_extension.cpp src/duckdb/extension/json/json_deserializer.cpp + src/duckdb/extension/json/json_serializer.cpp src/duckdb/ub_extension_json_json_functions.cpp) set(JEMALLOC_SRC_FILES diff --git a/src/duckdb/extension/core_functions/scalar/generic/current_setting.cpp b/src/duckdb/extension/core_functions/scalar/generic/current_setting.cpp index 4464f0544..31e1afe17 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/current_setting.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/current_setting.cpp @@ -53,10 +53,7 @@ unique_ptr CurrentSettingBind(ClientContext &context, ScalarFuncti if (!context.TryGetCurrentSetting(key, val)) { auto extension_name = Catalog::AutoloadExtensionByConfigName(context, key); // If autoloader didn't throw, the config is now available - if (!context.TryGetCurrentSetting(key, val)) { - throw InternalException("Extension %s did not provide the '%s' config setting", - extension_name.ToStdString(), key); - } + context.TryGetCurrentSetting(key, val); } bound_function.return_type = val.type(); diff --git a/src/duckdb/extension/icu/icu-strptime.cpp b/src/duckdb/extension/icu/icu-strptime.cpp index d7e1283c9..63e383dcd 100644 --- a/src/duckdb/extension/icu/icu-strptime.cpp +++ b/src/duckdb/extension/icu/icu-strptime.cpp @@ -221,8 +221,9 @@ struct ICUStrptime : public ICUDateFunc { if (!error.empty()) { throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error); } - // If any format has UTC offsets, then we have to produce TSTZ + // If any format has UTC offsets or names, then we have to produce TSTZ has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME); + has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::UTC_OFFSET); formats.emplace_back(format); } if (has_tz) { diff --git a/src/duckdb/extension/json/include/json_common.hpp b/src/duckdb/extension/json/include/json_common.hpp index f6dd78f05..81bbd6868 100644 --- a/src/duckdb/extension/json/include/json_common.hpp +++ b/src/duckdb/extension/json/include/json_common.hpp @@ -13,6 +13,7 @@ #include "duckdb/common/operator/string_cast.hpp" #include "duckdb/planner/expression/bound_function_expression.hpp" #include "yyjson.hpp" +#include "duckdb/common/types/blob.hpp" using namespace duckdb_yyjson; // NOLINT @@ -228,11 +229,8 @@ struct JSONCommon { static string FormatParseError(const char *data, idx_t length, yyjson_read_err &error, const string &extra = "") { D_ASSERT(error.code != YYJSON_READ_SUCCESS); - // Go to blob so we can have a better error message for weird strings - auto blob = Value::BLOB(string(data, length)); // Truncate, so we don't print megabytes worth of JSON - string input = blob.ToString(); - input = input.length() > 50 ? string(input.c_str(), 47) + "..." : input; + auto input = length > 50 ? string(data, 47) + "..." : string(data, length); // Have to replace \r, otherwise output is unreadable input = StringUtil::Replace(input, "\r", "\\r"); return StringUtil::Format("Malformed JSON at byte %lld of input: %s. %s Input: \"%s\"", error.pos, error.msg, diff --git a/src/duckdb/extension/json/json_functions.cpp b/src/duckdb/extension/json/json_functions.cpp index 2d09828c3..2d0ef11f5 100644 --- a/src/duckdb/extension/json/json_functions.cpp +++ b/src/duckdb/extension/json/json_functions.cpp @@ -394,7 +394,11 @@ void JSONFunctions::RegisterSimpleCastFunctions(ExtensionLoader &loader) { loader.RegisterCastFunction(LogicalType::LIST(LogicalType::JSON()), LogicalTypeId::VARCHAR, CastJSONListToVarchar, json_list_to_varchar_cost); - // VARCHAR to JSON[] (also needs a special case otherwise get a VARCHAR -> VARCHAR[] cast first) + // JSON[] to JSON is allowed implicitly + loader.RegisterCastFunction(LogicalType::LIST(LogicalType::JSON()), LogicalType::JSON(), CastJSONListToVarchar, + 100); + + // VARCHAR to JSON[] (also needs a special case otherwise we get a VARCHAR -> VARCHAR[] cast first) const auto varchar_to_json_list_cost = CastFunctionSet::ImplicitCastCost(db, LogicalType::VARCHAR, LogicalType::LIST(LogicalType::JSON())) - 1; BoundCastInfo varchar_to_json_list_info(CastVarcharToJSONList, nullptr, JSONFunctionLocalState::InitCastLocalState); diff --git a/src/duckdb/extension/parquet/column_writer.cpp b/src/duckdb/extension/parquet/column_writer.cpp index 90d800162..7cdd51bc5 100644 --- a/src/duckdb/extension/parquet/column_writer.cpp +++ b/src/duckdb/extension/parquet/column_writer.cpp @@ -187,9 +187,12 @@ void ColumnWriter::HandleRepeatLevels(ColumnWriterState &state, ColumnWriterStat // no repeat levels without a parent node return; } - while (state.repetition_levels.size() < parent->repetition_levels.size()) { - state.repetition_levels.push_back(parent->repetition_levels[state.repetition_levels.size()]); + if (state.repetition_levels.size() >= parent->repetition_levels.size()) { + return; } + state.repetition_levels.insert(state.repetition_levels.end(), + parent->repetition_levels.begin() + state.repetition_levels.size(), + parent->repetition_levels.end()); } void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity, @@ -200,36 +203,41 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat while (state.definition_levels.size() < parent->definition_levels.size()) { idx_t current_index = state.definition_levels.size(); if (parent->definition_levels[current_index] != PARQUET_DEFINE_VALID) { + //! Inherit nulls from parent state.definition_levels.push_back(parent->definition_levels[current_index]); state.parent_null_count++; } else if (validity.RowIsValid(vector_index)) { + //! Produce a non-null define state.definition_levels.push_back(define_value); } else { + //! Produce a null define if (!can_have_nulls) { throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); } state.null_count++; state.definition_levels.push_back(null_value); } + D_ASSERT(parent->is_empty.empty() || current_index < parent->is_empty.size()); if (parent->is_empty.empty() || !parent->is_empty[current_index]) { vector_index++; } } + return; + } + + // no parent: set definition levels only from this validity mask + if (validity.AllValid()) { + state.definition_levels.insert(state.definition_levels.end(), count, define_value); } else { - // no parent: set definition levels only from this validity mask - if (validity.AllValid()) { - state.definition_levels.insert(state.definition_levels.end(), count, define_value); - } else { - for (idx_t i = 0; i < count; i++) { - const auto is_null = !validity.RowIsValid(i); - state.definition_levels.emplace_back(is_null ? null_value : define_value); - state.null_count += is_null; - } - } - if (!can_have_nulls && state.null_count != 0) { - throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); + for (idx_t i = 0; i < count; i++) { + const auto is_null = !validity.RowIsValid(i); + state.definition_levels.emplace_back(is_null ? null_value : define_value); + state.null_count += is_null; } } + if (!can_have_nulls && state.null_count != 0) { + throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); + } } //===--------------------------------------------------------------------===// @@ -368,6 +376,7 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector definition_levels; unsafe_vector repetition_levels; - vector is_empty; + unsafe_vector is_empty; idx_t parent_null_count = 0; idx_t null_count = 0; diff --git a/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp b/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp index 4bc19516a..bfc0692af 100644 --- a/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp +++ b/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp @@ -14,12 +14,25 @@ namespace duckdb { class StringColumnReader : public ColumnReader { + enum class StringColumnType : uint8_t { VARCHAR, JSON, OTHER }; + + static StringColumnType GetStringColumnType(const LogicalType &type) { + if (type.IsJSONType()) { + return StringColumnType::JSON; + } + if (type.id() == LogicalTypeId::VARCHAR) { + return StringColumnType::VARCHAR; + } + return StringColumnType::OTHER; + } + public: static constexpr const PhysicalType TYPE = PhysicalType::VARCHAR; public: StringColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema); idx_t fixed_width_string_length; + const StringColumnType string_column_type; public: static void VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar); diff --git a/src/duckdb/extension/parquet/include/writer/array_column_writer.hpp b/src/duckdb/extension/parquet/include/writer/array_column_writer.hpp index 630bfd17f..1ebb16c04 100644 --- a/src/duckdb/extension/parquet/include/writer/array_column_writer.hpp +++ b/src/duckdb/extension/parquet/include/writer/array_column_writer.hpp @@ -25,6 +25,10 @@ class ArrayColumnWriter : public ListColumnWriter { void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count, bool vector_can_span_multiple_pages) override; void Write(ColumnWriterState &state, Vector &vector, idx_t count) override; + +protected: + void WriteArrayState(ListColumnWriterState &state, idx_t array_size, uint16_t first_repeat_level, + idx_t define_value, const bool is_empty = false); }; } // namespace duckdb diff --git a/src/duckdb/extension/parquet/parquet_reader.cpp b/src/duckdb/extension/parquet/parquet_reader.cpp index cad5f3a9b..8287b7b0b 100644 --- a/src/duckdb/extension/parquet/parquet_reader.cpp +++ b/src/duckdb/extension/parquet/parquet_reader.cpp @@ -570,7 +570,10 @@ ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_d auto file_meta_data = GetFileMetadata(); D_ASSERT(file_meta_data); - D_ASSERT(next_schema_idx < file_meta_data->schema.size()); + if (next_schema_idx >= file_meta_data->schema.size()) { + throw InvalidInputException("Malformed Parquet schema in file \"%s\": invalid schema index %d", file.path, + next_schema_idx); + } auto &s_ele = file_meta_data->schema[next_schema_idx]; auto this_idx = next_schema_idx; diff --git a/src/duckdb/extension/parquet/parquet_statistics.cpp b/src/duckdb/extension/parquet/parquet_statistics.cpp index 5f7d93718..a22613271 100644 --- a/src/duckdb/extension/parquet/parquet_statistics.cpp +++ b/src/duckdb/extension/parquet/parquet_statistics.cpp @@ -395,23 +395,21 @@ unique_ptr ParquetStatisticsUtils::TransformColumnStatistics(con } break; case LogicalTypeId::VARCHAR: { - auto string_stats = StringStats::CreateEmpty(type); + auto string_stats = StringStats::CreateUnknown(type); if (parquet_stats.__isset.min_value) { StringColumnReader::VerifyString(parquet_stats.min_value.c_str(), parquet_stats.min_value.size(), true); - StringStats::Update(string_stats, parquet_stats.min_value); + StringStats::SetMin(string_stats, parquet_stats.min_value); } else if (parquet_stats.__isset.min) { StringColumnReader::VerifyString(parquet_stats.min.c_str(), parquet_stats.min.size(), true); - StringStats::Update(string_stats, parquet_stats.min); + StringStats::SetMin(string_stats, parquet_stats.min); } if (parquet_stats.__isset.max_value) { StringColumnReader::VerifyString(parquet_stats.max_value.c_str(), parquet_stats.max_value.size(), true); - StringStats::Update(string_stats, parquet_stats.max_value); + StringStats::SetMax(string_stats, parquet_stats.max_value); } else if (parquet_stats.__isset.max) { StringColumnReader::VerifyString(parquet_stats.max.c_str(), parquet_stats.max.size(), true); - StringStats::Update(string_stats, parquet_stats.max); + StringStats::SetMax(string_stats, parquet_stats.max); } - StringStats::SetContainsUnicode(string_stats); - StringStats::ResetMaxStringLength(string_stats); row_group_stats = string_stats.ToUnique(); break; } diff --git a/src/duckdb/extension/parquet/reader/string_column_reader.cpp b/src/duckdb/extension/parquet/reader/string_column_reader.cpp index 6b2a3db6d..867dbb4d8 100644 --- a/src/duckdb/extension/parquet/reader/string_column_reader.cpp +++ b/src/duckdb/extension/parquet/reader/string_column_reader.cpp @@ -9,7 +9,7 @@ namespace duckdb { // String Column Reader //===--------------------------------------------------------------------===// StringColumnReader::StringColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema) - : ColumnReader(reader, schema) { + : ColumnReader(reader, schema), string_column_type(GetStringColumnType(Type())) { fixed_width_string_length = 0; if (schema.parquet_type == Type::FIXED_LEN_BYTE_ARRAY) { fixed_width_string_length = schema.type_length; @@ -26,13 +26,26 @@ void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, co size_t pos; auto utf_type = Utf8Proc::Analyze(str_data, str_len, &reason, &pos); if (utf_type == UnicodeType::INVALID) { - throw InvalidInputException("Invalid string encoding found in Parquet file: value \"" + - Blob::ToString(string_t(str_data, str_len)) + "\" is not valid UTF8!"); + throw InvalidInputException("Invalid string encoding found in Parquet file: value \"%s\" is not valid UTF8!", + Blob::ToString(string_t(str_data, str_len))); } } void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) { - VerifyString(str_data, str_len, Type().id() == LogicalTypeId::VARCHAR); + switch (string_column_type) { + case StringColumnType::VARCHAR: + VerifyString(str_data, str_len, true); + break; + case StringColumnType::JSON: { + const auto error = StringUtil::ValidateJSON(str_data, str_len); + if (!error.empty()) { + throw InvalidInputException("Invalid JSON found in Parquet file: %s", error); + } + break; + } + default: + break; + } } class ParquetStringVectorBuffer : public VectorBuffer { diff --git a/src/duckdb/extension/parquet/writer/array_column_writer.cpp b/src/duckdb/extension/parquet/writer/array_column_writer.cpp index 024dbe819..60284ff28 100644 --- a/src/duckdb/extension/parquet/writer/array_column_writer.cpp +++ b/src/duckdb/extension/parquet/writer/array_column_writer.cpp @@ -9,6 +9,22 @@ void ArrayColumnWriter::Analyze(ColumnWriterState &state_p, ColumnWriterState *p child_writer->Analyze(*state.child_state, &state_p, array_child, array_size * count); } +void ArrayColumnWriter::WriteArrayState(ListColumnWriterState &state, idx_t array_size, uint16_t first_repeat_level, + idx_t define_value, const bool is_empty) { + state.definition_levels.push_back(define_value); + state.repetition_levels.push_back(first_repeat_level); + state.is_empty.push_back(is_empty); + + if (is_empty) { + return; + } + for (idx_t k = 1; k < array_size; k++) { + state.repetition_levels.push_back(MaxRepeat() + 1); + state.definition_levels.push_back(define_value); + state.is_empty.push_back(false); + } +} + void ArrayColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count, bool vector_can_span_multiple_pages) { auto &state = state_p.Cast(); @@ -25,42 +41,20 @@ void ArrayColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *p for (idx_t i = start; i < vcount; i++) { idx_t parent_index = state.parent_index + i; if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index]) { - state.definition_levels.push_back(parent->definition_levels[parent_index]); - state.repetition_levels.push_back(parent->repetition_levels[parent_index]); - state.is_empty.push_back(true); + WriteArrayState(state, array_size, parent->repetition_levels[parent_index], + parent->definition_levels[parent_index], true); continue; } auto first_repeat_level = parent && !parent->repetition_levels.empty() ? parent->repetition_levels[parent_index] : MaxRepeat(); if (parent && parent->definition_levels[parent_index] != PARQUET_DEFINE_VALID) { - state.definition_levels.push_back(parent->definition_levels[parent_index]); - state.repetition_levels.push_back(first_repeat_level); - state.is_empty.push_back(false); - for (idx_t k = 1; k < array_size; k++) { - state.repetition_levels.push_back(MaxRepeat() + 1); - state.definition_levels.push_back(parent->definition_levels[parent_index]); - state.is_empty.push_back(false); - } + WriteArrayState(state, array_size, first_repeat_level, parent->definition_levels[parent_index]); } else if (validity.RowIsValid(vector_index)) { // push the repetition levels - state.definition_levels.push_back(PARQUET_DEFINE_VALID); - state.is_empty.push_back(false); - - state.repetition_levels.push_back(first_repeat_level); - for (idx_t k = 1; k < array_size; k++) { - state.repetition_levels.push_back(MaxRepeat() + 1); - state.definition_levels.push_back(PARQUET_DEFINE_VALID); - state.is_empty.push_back(false); - } + WriteArrayState(state, array_size, first_repeat_level, PARQUET_DEFINE_VALID); } else { - state.definition_levels.push_back(MaxDefine() - 1); - state.repetition_levels.push_back(first_repeat_level); - state.is_empty.push_back(false); - for (idx_t k = 1; k < array_size; k++) { - state.repetition_levels.push_back(MaxRepeat() + 1); - state.definition_levels.push_back(MaxDefine() - 1); - state.is_empty.push_back(false); - } + //! Produce a null + WriteArrayState(state, array_size, first_repeat_level, MaxDefine() - 1); } vector_index++; } diff --git a/src/duckdb/extension/parquet/writer/struct_column_writer.cpp b/src/duckdb/extension/parquet/writer/struct_column_writer.cpp index c70c35ba2..e65515ad5 100644 --- a/src/duckdb/extension/parquet/writer/struct_column_writer.cpp +++ b/src/duckdb/extension/parquet/writer/struct_column_writer.cpp @@ -62,8 +62,9 @@ void StructColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState * auto &validity = FlatVector::Validity(vector); if (parent) { // propagate empty entries from the parent - while (state.is_empty.size() < parent->is_empty.size()) { - state.is_empty.push_back(parent->is_empty[state.is_empty.size()]); + if (state.is_empty.size() < parent->is_empty.size()) { + state.is_empty.insert(state.is_empty.end(), parent->is_empty.begin() + state.is_empty.size(), + parent->is_empty.end()); } } HandleRepeatLevels(state_p, parent, count, MaxRepeat()); diff --git a/src/duckdb/src/catalog/catalog_search_path.cpp b/src/duckdb/src/catalog/catalog_search_path.cpp index 6af56c22d..6388b9134 100644 --- a/src/duckdb/src/catalog/catalog_search_path.cpp +++ b/src/duckdb/src/catalog/catalog_search_path.cpp @@ -24,8 +24,8 @@ string CatalogSearchEntry::ToString() const { string CatalogSearchEntry::WriteOptionallyQuoted(const string &input) { for (idx_t i = 0; i < input.size(); i++) { - if (input[i] == '.' || input[i] == ',') { - return "\"" + input + "\""; + if (input[i] == '.' || input[i] == ',' || input[i] == '"') { + return "\"" + StringUtil::Replace(input, "\"", "\"\"") + "\""; } } return input; diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp index da03abf6d..b982b08ac 100644 --- a/src/duckdb/src/common/enum_util.cpp +++ b/src/duckdb/src/common/enum_util.cpp @@ -117,6 +117,7 @@ #include "duckdb/function/table/arrow/enum/arrow_variable_size_type.hpp" #include "duckdb/function/table_function.hpp" #include "duckdb/function/window/window_merge_sort_tree.hpp" +#include "duckdb/logging/log_storage.hpp" #include "duckdb/logging/logging.hpp" #include "duckdb/main/appender.hpp" #include "duckdb/main/capi/capi_internal.hpp" @@ -2439,6 +2440,25 @@ LogMode EnumUtil::FromString(const char *value) { return static_cast(StringUtil::StringToEnum(GetLogModeValues(), 3, "LogMode", value)); } +const StringUtil::EnumStringLiteral *GetLoggingTargetTableValues() { + static constexpr StringUtil::EnumStringLiteral values[] { + { static_cast(LoggingTargetTable::ALL_LOGS), "ALL_LOGS" }, + { static_cast(LoggingTargetTable::LOG_ENTRIES), "LOG_ENTRIES" }, + { static_cast(LoggingTargetTable::LOG_CONTEXTS), "LOG_CONTEXTS" } + }; + return values; +} + +template<> +const char* EnumUtil::ToChars(LoggingTargetTable value) { + return StringUtil::EnumToString(GetLoggingTargetTableValues(), 3, "LoggingTargetTable", static_cast(value)); +} + +template<> +LoggingTargetTable EnumUtil::FromString(const char *value) { + return static_cast(StringUtil::StringToEnum(GetLoggingTargetTableValues(), 3, "LoggingTargetTable", value)); +} + const StringUtil::EnumStringLiteral *GetLogicalOperatorTypeValues() { static constexpr StringUtil::EnumStringLiteral values[] { { static_cast(LogicalOperatorType::LOGICAL_INVALID), "LOGICAL_INVALID" }, diff --git a/src/duckdb/src/common/string_util.cpp b/src/duckdb/src/common/string_util.cpp index 71e865880..1e6309ee0 100644 --- a/src/duckdb/src/common/string_util.cpp +++ b/src/duckdb/src/common/string_util.cpp @@ -572,6 +572,15 @@ static unique_ptr ParseJSON(const string &json, yyjson_doc *doc, yy const bool bool_val = yyjson_get_bool(root); return make_uniq(bool_val ? "true" : "false"); } + case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT: + return make_uniq(to_string(unsafe_yyjson_get_uint(root))); + case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT: + return make_uniq(to_string(unsafe_yyjson_get_sint(root))); + case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL: + case YYJSON_TYPE_RAW | YYJSON_SUBTYPE_NONE: + return make_uniq(to_string(unsafe_yyjson_get_real(root))); + case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE: + return make_uniq("null"); default: yyjson_doc_free(doc); throw SerializationException("Failed to parse JSON string: %s", json); @@ -693,6 +702,21 @@ string StringUtil::ToComplexJSONMap(const ComplexJSON &complex_json) { return ComplexJSON::GetValueRecursive(complex_json); } +string StringUtil::ValidateJSON(const char *data, const idx_t &len) { + // Same flags as in JSON extension + static constexpr auto READ_FLAG = + YYJSON_READ_ALLOW_INF_AND_NAN | YYJSON_READ_ALLOW_TRAILING_COMMAS | YYJSON_READ_BIGNUM_AS_RAW; + yyjson_read_err error; + yyjson_doc *doc = yyjson_read_opts((char *)data, len, READ_FLAG, nullptr, &error); // NOLINT: for yyjson + if (error.code != YYJSON_READ_SUCCESS) { + return StringUtil::Format("Malformed JSON at byte %lld of input: %s. Input: \"%s\"", error.pos, error.msg, + string(data, len)); + } + + yyjson_doc_free(doc); + return string(); +} + string StringUtil::ExceptionToJSONMap(ExceptionType type, const string &message, const unordered_map &map) { D_ASSERT(map.find("exception_type") == map.end()); diff --git a/src/duckdb/src/execution/index/art/art_merger.cpp b/src/duckdb/src/execution/index/art/art_merger.cpp index 70781cbfb..61d2ec317 100644 --- a/src/duckdb/src/execution/index/art/art_merger.cpp +++ b/src/duckdb/src/execution/index/art/art_merger.cpp @@ -217,9 +217,6 @@ void ARTMerger::MergeNodeAndPrefix(Node &node, Node &prefix, const GateStatus pa auto child = node.GetChildMutable(art, byte); // Reduce the prefix to the bytes after pos. - // We always reduce by at least one byte, - // thus, if the prefix was a gate, it no longer is. - prefix.SetGateStatus(GateStatus::GATE_NOT_SET); Prefix::Reduce(art, prefix, pos); if (child) { diff --git a/src/duckdb/src/execution/index/art/prefix.cpp b/src/duckdb/src/execution/index/art/prefix.cpp index 00e94967a..1d7861135 100644 --- a/src/duckdb/src/execution/index/art/prefix.cpp +++ b/src/duckdb/src/execution/index/art/prefix.cpp @@ -100,6 +100,10 @@ void Prefix::Reduce(ART &art, Node &node, const idx_t pos) { D_ASSERT(node.HasMetadata()); D_ASSERT(pos < Count(art)); + // We always reduce by at least one byte, + // thus, if the prefix was a gate, it no longer is. + node.SetGateStatus(GateStatus::GATE_NOT_SET); + Prefix prefix(art, node); if (pos == idx_t(prefix.data[Count(art)] - 1)) { auto next = *prefix.ptr; diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp index f390819a6..5ed14a992 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp @@ -797,7 +797,7 @@ void StringValueResult::NullPaddingQuotedNewlineCheck() const { // If we have null_padding set, we found a quoted new line, we are scanning the file in parallel; We error. LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read); auto csv_error = CSVError::NullPaddingFail(state_machine.options, lines_per_batch, path); - error_handler.Error(csv_error, try_row); + error_handler.Error(csv_error, true); } } diff --git a/src/duckdb/src/execution/operator/helper/physical_reset.cpp b/src/duckdb/src/execution/operator/helper/physical_reset.cpp index 711cc1b3b..1f5baf75d 100644 --- a/src/duckdb/src/execution/operator/helper/physical_reset.cpp +++ b/src/duckdb/src/execution/operator/helper/physical_reset.cpp @@ -65,9 +65,9 @@ SourceResultType PhysicalReset::GetData(ExecutionContext &context, DataChunk &ch } if (variable_scope == SetScope::SESSION) { auto &client_config = ClientConfig::GetConfig(context.client); - client_config.set_variables.erase(name.ToStdString()); + client_config.set_variables.erase(option->name); } else { - config.ResetGenericOption(name); + config.ResetGenericOption(option->name); } return SourceResultType::FINISHED; } diff --git a/src/duckdb/src/execution/operator/schema/physical_attach.cpp b/src/duckdb/src/execution/operator/schema/physical_attach.cpp index 691cf13b8..cfb737a30 100644 --- a/src/duckdb/src/execution/operator/schema/physical_attach.cpp +++ b/src/duckdb/src/execution/operator/schema/physical_attach.cpp @@ -36,7 +36,7 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c if (info->on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT || info->on_conflict == OnCreateConflict::REPLACE_ON_CONFLICT) { // constant-time lookup in the catalog for the db name - auto existing_db = db_manager.GetDatabase(context.client, name); + auto existing_db = db_manager.GetDatabase(name); if (existing_db) { if ((existing_db->IsReadOnly() && options.access_mode == AccessMode::READ_WRITE) || (!existing_db->IsReadOnly() && options.access_mode == AccessMode::READ_ONLY)) { diff --git a/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp b/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp index 5759583c5..c457e9826 100644 --- a/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +++ b/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp @@ -27,7 +27,7 @@ PhysicalPlanGenerator::PlanAsOfLoopJoin(LogicalComparisonJoin &op, PhysicalOpera // // ∏ * \ pk // | - // Γ pk;first(P),arg_xxx(B,inequality) + // Γ pk;first(P),arg_xxx_null(B,inequality) // | // ∏ *,inequality // | @@ -88,13 +88,13 @@ PhysicalPlanGenerator::PlanAsOfLoopJoin(LogicalComparisonJoin &op, PhysicalOpera case ExpressionType::COMPARE_GREATERTHAN: D_ASSERT(asof_idx == op.conditions.size()); asof_idx = i; - arg_min_max = "arg_max"; + arg_min_max = "arg_max_null"; break; case ExpressionType::COMPARE_LESSTHANOREQUALTO: case ExpressionType::COMPARE_LESSTHAN: D_ASSERT(asof_idx == op.conditions.size()); asof_idx = i; - arg_min_max = "arg_min"; + arg_min_max = "arg_min_null"; break; case ExpressionType::COMPARE_EQUAL: case ExpressionType::COMPARE_NOTEQUAL: diff --git a/src/duckdb/src/function/table/system/duckdb_connection_count.cpp b/src/duckdb/src/function/table/system/duckdb_connection_count.cpp new file mode 100644 index 000000000..ce7857f3b --- /dev/null +++ b/src/duckdb/src/function/table/system/duckdb_connection_count.cpp @@ -0,0 +1,45 @@ +#include "duckdb/function/table/system_functions.hpp" + +#include "duckdb/main/client_context.hpp" +#include "duckdb/main/database.hpp" +#include "duckdb/main/connection_manager.hpp" + +namespace duckdb { + +struct DuckDBConnectionCountData : public GlobalTableFunctionState { + DuckDBConnectionCountData() : count(0), finished(false) { + } + idx_t count; + bool finished; +}; + +static unique_ptr DuckDBConnectionCountBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + names.emplace_back("count"); + return_types.emplace_back(LogicalType::UBIGINT); + return nullptr; +} + +unique_ptr DuckDBConnectionCountInit(ClientContext &context, TableFunctionInitInput &input) { + auto result = make_uniq(); + auto &conn_manager = context.db->GetConnectionManager(); + result->count = conn_manager.GetConnectionCount(); + return std::move(result); +} + +void DuckDBConnectionCountFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto &data = data_p.global_state->Cast(); + if (data.finished) { + return; + } + output.SetValue(0, 0, Value::UBIGINT(data.count)); + output.SetCardinality(1); + data.finished = true; +} + +void DuckDBConnectionCountFun::RegisterFunction(BuiltinFunctions &set) { + set.AddFunction(TableFunction("duckdb_connection_count", {}, DuckDBConnectionCountFunction, + DuckDBConnectionCountBind, DuckDBConnectionCountInit)); +} + +} // namespace duckdb diff --git a/src/duckdb/src/function/table/system/duckdb_settings.cpp b/src/duckdb/src/function/table/system/duckdb_settings.cpp index 3ec34d908..9908854bd 100644 --- a/src/duckdb/src/function/table/system/duckdb_settings.cpp +++ b/src/duckdb/src/function/table/system/duckdb_settings.cpp @@ -12,6 +12,10 @@ struct DuckDBSettingValue { string input_type; string scope; vector aliases; + + inline bool operator<(const DuckDBSettingValue &rhs) const { + return name < rhs.name; + }; }; struct DuckDBSettingsData : public GlobalTableFunctionState { @@ -79,7 +83,12 @@ unique_ptr DuckDBSettingsInit(ClientContext &context, if (entry != aliases.end()) { value.aliases = std::move(entry->second); } - + for (auto &alias : value.aliases) { + DuckDBSettingValue alias_value = value; + alias_value.name = StringValue::Get(alias); + alias_value.aliases.clear(); + result->settings.push_back(std::move(alias_value)); + } result->settings.push_back(std::move(value)); } for (auto &ext_param : config.extension_parameters) { @@ -98,6 +107,7 @@ unique_ptr DuckDBSettingsInit(ClientContext &context, result->settings.push_back(std::move(value)); } + std::sort(result->settings.begin(), result->settings.end()); return std::move(result); } diff --git a/src/duckdb/src/function/table/system_functions.cpp b/src/duckdb/src/function/table/system_functions.cpp index d10ec5d31..0a6a03507 100644 --- a/src/duckdb/src/function/table/system_functions.cpp +++ b/src/duckdb/src/function/table/system_functions.cpp @@ -18,6 +18,7 @@ void BuiltinFunctions::RegisterSQLiteFunctions() { PragmaDatabaseSize::RegisterFunction(*this); PragmaUserAgent::RegisterFunction(*this); + DuckDBConnectionCountFun::RegisterFunction(*this); DuckDBApproxDatabaseCountFun::RegisterFunction(*this); DuckDBColumnsFun::RegisterFunction(*this); DuckDBConstraintsFun::RegisterFunction(*this); diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 7f6562164..a7274fec9 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "0" +#define DUCKDB_PATCH_VERSION "1-dev136" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 4 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.4.0" +#define DUCKDB_VERSION "v1.4.1-dev136" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "b8a06e4a22" +#define DUCKDB_SOURCE_ID "76a3383248" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/enum_util.hpp b/src/duckdb/src/include/duckdb/common/enum_util.hpp index 415f86a2f..adf038cec 100644 --- a/src/duckdb/src/include/duckdb/common/enum_util.hpp +++ b/src/duckdb/src/include/duckdb/common/enum_util.hpp @@ -234,6 +234,8 @@ enum class LogLevel : uint8_t; enum class LogMode : uint8_t; +enum class LoggingTargetTable : uint8_t; + enum class LogicalOperatorType : uint8_t; enum class LogicalTypeId : uint8_t; @@ -748,6 +750,9 @@ const char* EnumUtil::ToChars(LogLevel value); template<> const char* EnumUtil::ToChars(LogMode value); +template<> +const char* EnumUtil::ToChars(LoggingTargetTable value); + template<> const char* EnumUtil::ToChars(LogicalOperatorType value); @@ -1367,6 +1372,9 @@ LogLevel EnumUtil::FromString(const char *value); template<> LogMode EnumUtil::FromString(const char *value); +template<> +LoggingTargetTable EnumUtil::FromString(const char *value); + template<> LogicalOperatorType EnumUtil::FromString(const char *value); diff --git a/src/duckdb/src/include/duckdb/common/string_util.hpp b/src/duckdb/src/include/duckdb/common/string_util.hpp index 8c0c19bef..1448c559a 100644 --- a/src/duckdb/src/include/duckdb/common/string_util.hpp +++ b/src/duckdb/src/include/duckdb/common/string_util.hpp @@ -318,6 +318,8 @@ class StringUtil { //! Transforms an complex JSON to a JSON string DUCKDB_API static string ToComplexJSONMap(const ComplexJSON &complex_json); + DUCKDB_API static string ValidateJSON(const char *data, const idx_t &len); + DUCKDB_API static string GetFileName(const string &file_path); DUCKDB_API static string GetFileExtension(const string &file_name); DUCKDB_API static string GetFileStem(const string &file_name); diff --git a/src/duckdb/src/include/duckdb/function/scalar/variant_functions.hpp b/src/duckdb/src/include/duckdb/function/scalar/variant_functions.hpp index 6408639ec..7c9ce455d 100644 --- a/src/duckdb/src/include/duckdb/function/scalar/variant_functions.hpp +++ b/src/duckdb/src/include/duckdb/function/scalar/variant_functions.hpp @@ -29,7 +29,7 @@ struct VariantTypeofFun { static constexpr const char *Name = "variant_typeof"; static constexpr const char *Parameters = "input_variant"; static constexpr const char *Description = "Returns the internal type of the `input_variant`."; - static constexpr const char *Example = "variant_typeof({'a': 42, 'b': [1,2,3])::VARIANT)"; + static constexpr const char *Example = "variant_typeof({'a': 42, 'b': [1,2,3]})::VARIANT)"; static constexpr const char *Categories = "variant"; static ScalarFunction GetFunction(); diff --git a/src/duckdb/src/include/duckdb/function/table/system_functions.hpp b/src/duckdb/src/include/duckdb/function/table/system_functions.hpp index e325b2f46..49c5e794c 100644 --- a/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +++ b/src/duckdb/src/include/duckdb/function/table/system_functions.hpp @@ -47,6 +47,10 @@ struct DuckDBSchemasFun { static void RegisterFunction(BuiltinFunctions &set); }; +struct DuckDBConnectionCountFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + struct DuckDBApproxDatabaseCountFun { static void RegisterFunction(BuiltinFunctions &set); }; diff --git a/src/duckdb/src/include/duckdb/logging/log_storage.hpp b/src/duckdb/src/include/duckdb/logging/log_storage.hpp index 10c06dddc..9a8a67405 100644 --- a/src/duckdb/src/include/duckdb/logging/log_storage.hpp +++ b/src/duckdb/src/include/duckdb/logging/log_storage.hpp @@ -33,7 +33,7 @@ struct CSVReaderOptions; //! Logging storage can store entries normalized or denormalized. This enum describes what a single table/file/etc //! contains -enum class LoggingTargetTable { +enum class LoggingTargetTable : uint8_t { ALL_LOGS, // Denormalized: log entries consisting of both the full log entry and the context LOG_ENTRIES, // Normalized: contains only the log entries and a context_id LOG_CONTEXTS, // Normalized: contains only the log contexts @@ -165,7 +165,7 @@ class BufferingLogStorage : public LogStorage { //! Debug option for testing buffering behaviour bool only_flush_on_full_buffer = false; //! The buffers used for each table - unordered_map> buffers; + map> buffers; //! This flag is set whenever a new context_is written to the entry buffer. It means that the next flush of //! LoggingTargetTable::LOG_ENTRIES also requires a flush of LoggingTargetTable::LOG_CONTEXTS bool flush_contexts_on_next_entry_flush = false; @@ -218,9 +218,9 @@ class CSVLogStorage : public BufferingLogStorage { void InitializeCastChunk(LoggingTargetTable table); //! The cast buffers used to cast from the original types to the VARCHAR types ready to write to CSV format - unordered_map> cast_buffers; + map> cast_buffers; //! The writers to be registered by child classes - unordered_map> writers; + map> writers; //! CSV Options to initialize the CSVWriters with. TODO: cleanup, this is now a little bit of a mixed bag of //! settings @@ -302,7 +302,7 @@ class FileLogStorage : public CSVLogStorage { }; //! The table info per table - unordered_map tables; + map tables; //! Base path to generate the file paths from string base_path; @@ -349,7 +349,7 @@ class InMemoryLogStorage : public BufferingLogStorage { //! Helper function to get the buffer ColumnDataCollection &GetBuffer(LoggingTargetTable table) const; - unordered_map> log_storage_buffers; + map> log_storage_buffers; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/logging/log_type.hpp b/src/duckdb/src/include/duckdb/logging/log_type.hpp index c843fe1b0..23d901c4e 100644 --- a/src/duckdb/src/include/duckdb/logging/log_type.hpp +++ b/src/duckdb/src/include/duckdb/logging/log_type.hpp @@ -17,6 +17,9 @@ struct FileHandle; struct BaseRequest; struct HTTPResponse; class PhysicalOperator; +class AttachedDatabase; +class RowGroup; +struct DataTableInfo; //! Log types provide some structure to the formats that the different log messages can have //! For now, this holds a type that the VARCHAR value will be auto-cast into. @@ -54,9 +57,7 @@ class QueryLogType : public LogType { QueryLogType() : LogType(NAME, LEVEL) {}; - static string ConstructLogMessage(const string &str) { - return str; - } + static string ConstructLogMessage(const string &str); }; class FileSystemLogType : public LogType { @@ -105,4 +106,26 @@ class PhysicalOperatorLogType : public LogType { const vector> &info); }; +class CheckpointLogType : public LogType { +public: + static constexpr const char *NAME = "Checkpoint"; + static constexpr LogLevel LEVEL = LogLevel::LOG_DEBUG; + + //! Construct the log type + CheckpointLogType(); + + static LogicalType GetLogType(); + + //! Vacuum + static string ConstructLogMessage(const AttachedDatabase &db, DataTableInfo &table, idx_t segment_idx, + idx_t merge_count, idx_t target_count, idx_t merge_rows, idx_t row_start); + //! Checkpoint + static string ConstructLogMessage(const AttachedDatabase &db, DataTableInfo &table, idx_t segment_idx, + RowGroup &row_group); + +private: + static string CreateLog(const AttachedDatabase &db, DataTableInfo &table, const char *op, vector map_keys, + vector map_values); +}; + } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/main/attached_database.hpp b/src/duckdb/src/include/duckdb/main/attached_database.hpp index 3a779aa3e..e55e972d4 100644 --- a/src/duckdb/src/include/duckdb/main/attached_database.hpp +++ b/src/duckdb/src/include/duckdb/main/attached_database.hpp @@ -40,6 +40,9 @@ struct StoredDatabasePath { DatabaseFilePathManager &manager; string path; + +public: + void OnDetach(); }; //! AttachOptions holds information about a database we plan to attach. These options are generalized, i.e., @@ -103,6 +106,7 @@ class AttachedDatabase : public CatalogEntry, public enable_shared_from_this { CreatePreparedStatementInternal(ClientContextLock &lock, const string &query, unique_ptr statement, optional_ptr> values); + SettingLookupResult TryGetCurrentSettingInternal(const string &key, Value &result) const; + private: //! Lock on using the ClientContext in parallel mutex context_lock; diff --git a/src/duckdb/src/include/duckdb/main/connection.hpp b/src/duckdb/src/include/duckdb/main/connection.hpp index c27d84d21..72c61209c 100644 --- a/src/duckdb/src/include/duckdb/main/connection.hpp +++ b/src/duckdb/src/include/duckdb/main/connection.hpp @@ -50,7 +50,6 @@ class Connection { DUCKDB_API ~Connection(); shared_ptr context; - warning_callback_t warning_cb; public: //! Returns query profiling information for the current query diff --git a/src/duckdb/src/include/duckdb/main/connection_manager.hpp b/src/duckdb/src/include/duckdb/main/connection_manager.hpp index 7fa5c66b5..1c647ce02 100644 --- a/src/duckdb/src/include/duckdb/main/connection_manager.hpp +++ b/src/duckdb/src/include/duckdb/main/connection_manager.hpp @@ -40,7 +40,6 @@ class ConnectionManager { mutex connections_lock; reference_map_t> connections; atomic connection_count; - atomic current_connection_id; }; diff --git a/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp b/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp index a4368d4f1..4f1e14054 100644 --- a/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +++ b/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp @@ -19,13 +19,24 @@ struct AttachOptions; enum class InsertDatabasePathResult { SUCCESS, ALREADY_EXISTS }; +struct DatabasePathInfo { + explicit DatabasePathInfo(string name_p) : name(std::move(name_p)), is_attached(true) { + } + + string name; + bool is_attached; +}; + //! The DatabaseFilePathManager is used to ensure we only ever open a single database file once class DatabaseFilePathManager { public: idx_t ApproxDatabaseCount() const; InsertDatabasePathResult InsertDatabasePath(const string &path, const string &name, OnCreateConflict on_conflict, AttachOptions &options); + //! Erase a database path - indicating we are done with using it void EraseDatabasePath(const string &path); + //! Called when a database is detached, but before it is fully finished being used + void DetachDatabase(const string &path); private: //! The lock to add entries to the database path map @@ -33,7 +44,7 @@ class DatabaseFilePathManager { //! A set containing all attached database path //! This allows to attach many databases efficiently, and to avoid attaching the //! same file path twice - case_insensitive_map_t db_paths_to_name; + case_insensitive_map_t db_paths; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/main/database_manager.hpp b/src/duckdb/src/include/duckdb/main/database_manager.hpp index 296aca71b..d143498fd 100644 --- a/src/duckdb/src/include/duckdb/main/database_manager.hpp +++ b/src/duckdb/src/include/duckdb/main/database_manager.hpp @@ -48,6 +48,7 @@ class DatabaseManager { void FinalizeStartup(); //! Get an attached database by its name optional_ptr GetDatabase(ClientContext &context, const string &name); + shared_ptr GetDatabase(const string &name); //! Attach a new database shared_ptr AttachDatabase(ClientContext &context, AttachInfo &info, AttachOptions &options); @@ -103,6 +104,8 @@ class DatabaseManager { //! Gets a list of all attached database paths vector GetAttachedDatabasePaths(); + shared_ptr GetDatabaseInternal(const lock_guard &, const string &name); + private: //! The system database is a special database that holds system entries (e.g. functions) shared_ptr system; diff --git a/src/duckdb/src/include/duckdb/planner/binder.hpp b/src/duckdb/src/include/duckdb/planner/binder.hpp index bc81db9bd..6ed02f8e3 100644 --- a/src/duckdb/src/include/duckdb/planner/binder.hpp +++ b/src/duckdb/src/include/duckdb/planner/binder.hpp @@ -100,6 +100,65 @@ struct CorrelatedColumnInfo { } }; +struct CorrelatedColumns { +private: + using container_type = vector; + +public: + CorrelatedColumns() : delim_index(1ULL << 63) { + } + + void AddColumn(container_type::value_type info) { + // Add to beginning + correlated_columns.insert(correlated_columns.begin(), std::move(info)); + delim_index++; + } + + void SetDelimIndexToZero() { + delim_index = 0; + } + + idx_t GetDelimIndex() const { + return delim_index; + } + + const container_type::value_type &operator[](const idx_t &index) const { + return correlated_columns.at(index); + } + + idx_t size() const { // NOLINT: match stl case + return correlated_columns.size(); + } + + bool empty() const { // NOLINT: match stl case + return correlated_columns.empty(); + } + + void clear() { // NOLINT: match stl case + correlated_columns.clear(); + } + + container_type::iterator begin() { // NOLINT: match stl case + return correlated_columns.begin(); + } + + container_type::iterator end() { // NOLINT: match stl case + return correlated_columns.end(); + } + + container_type::const_iterator begin() const { // NOLINT: match stl case + return correlated_columns.begin(); + } + + container_type::const_iterator end() const { // NOLINT: match stl case + return correlated_columns.end(); + } + +private: + container_type correlated_columns; + idx_t delim_index; +}; + //! Bind the parsed query tree to the actual columns present in the catalog. /*! The binder is responsible for binding tables and columns to actual physical @@ -124,7 +183,7 @@ class Binder : public enable_shared_from_this { BindContext bind_context; //! The set of correlated columns bound by this binder (FIXME: this should probably be an unordered_set and not a //! vector) - vector correlated_columns; + CorrelatedColumns correlated_columns; //! The set of parameter expressions bound by this binder optional_ptr parameters; //! The alias for the currently processing subquery, if it exists @@ -200,7 +259,7 @@ class Binder : public enable_shared_from_this { vector> &GetActiveBinders(); - void MergeCorrelatedColumns(vector &other); + void MergeCorrelatedColumns(CorrelatedColumns &other); //! Add a correlated column to this binder (if it does not exist) void AddCorrelatedColumn(const CorrelatedColumnInfo &info); @@ -428,7 +487,7 @@ class Binder : public enable_shared_from_this { void PlanSubqueries(unique_ptr &expr, unique_ptr &root); unique_ptr PlanSubquery(BoundSubqueryExpression &expr, unique_ptr &root); unique_ptr PlanLateralJoin(unique_ptr left, unique_ptr right, - vector &correlated_columns, + CorrelatedColumns &correlated_columns, JoinType join_type = JoinType::INNER, unique_ptr condition = nullptr); diff --git a/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp b/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp index eb68a0cdf..55f046cd7 100644 --- a/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +++ b/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp @@ -24,7 +24,7 @@ class LateralBinder : public ExpressionBinder { return !correlated_columns.empty(); } - static void ReduceExpressionDepth(LogicalOperator &op, const vector &info); + static void ReduceExpressionDepth(LogicalOperator &op, const CorrelatedColumns &info); protected: BindResult BindExpression(unique_ptr &expr_ptr, idx_t depth, @@ -37,7 +37,7 @@ class LateralBinder : public ExpressionBinder { void ExtractCorrelatedColumns(Expression &expr); private: - vector correlated_columns; + CorrelatedColumns correlated_columns; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/planner/operator/logical_cte.hpp b/src/duckdb/src/include/duckdb/planner/operator/logical_cte.hpp index cd2ed3c21..0548cd4e7 100644 --- a/src/duckdb/src/include/duckdb/planner/operator/logical_cte.hpp +++ b/src/duckdb/src/include/duckdb/planner/operator/logical_cte.hpp @@ -35,6 +35,6 @@ class LogicalCTE : public LogicalOperator { string ctename; idx_t table_index; idx_t column_count; - vector correlated_columns; + CorrelatedColumns correlated_columns; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp b/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp index 724f2bc57..5e4c83919 100644 --- a/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +++ b/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp @@ -27,7 +27,7 @@ class LogicalDependentJoin : public LogicalComparisonJoin { public: explicit LogicalDependentJoin(unique_ptr left, unique_ptr right, - vector correlated_columns, JoinType type, + CorrelatedColumns correlated_columns, JoinType type, unique_ptr condition); explicit LogicalDependentJoin(JoinType type); @@ -35,7 +35,7 @@ class LogicalDependentJoin : public LogicalComparisonJoin { //! The conditions of the join unique_ptr join_condition; //! The list of columns that have correlations with the right - vector correlated_columns; + CorrelatedColumns correlated_columns; SubqueryType subquery_type = SubqueryType::INVALID; bool perform_delim = true; @@ -51,7 +51,7 @@ class LogicalDependentJoin : public LogicalComparisonJoin { public: static unique_ptr Create(unique_ptr left, unique_ptr right, - vector correlated_columns, JoinType type, + CorrelatedColumns correlated_columns, JoinType type, unique_ptr condition); }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp b/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp index 2f343e901..14ad4510c 100644 --- a/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +++ b/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp @@ -18,7 +18,7 @@ namespace duckdb { //! The FlattenDependentJoins class is responsible for pushing the dependent join down into the plan to create a //! flattened subquery struct FlattenDependentJoins { - FlattenDependentJoins(Binder &binder, const vector &correlated, bool perform_delim = true, + FlattenDependentJoins(Binder &binder, const CorrelatedColumns &correlated, bool perform_delim = true, bool any_join = false, optional_ptr parent = nullptr); static unique_ptr DecorrelateIndependent(Binder &binder, unique_ptr plan); @@ -47,7 +47,7 @@ struct FlattenDependentJoins { reference_map_t has_correlated_expressions; column_binding_map_t correlated_map; column_binding_map_t replacement_map; - const vector &correlated_columns; + const CorrelatedColumns &correlated_columns; vector delim_types; bool perform_delim; diff --git a/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp b/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp index 6b238ffcc..81a097b49 100644 --- a/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +++ b/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp @@ -16,7 +16,7 @@ namespace duckdb { //! Helper class to recursively detect correlated expressions inside a single LogicalOperator class HasCorrelatedExpressions : public LogicalOperatorVisitor { public: - explicit HasCorrelatedExpressions(const vector &correlated, bool lateral = false, + explicit HasCorrelatedExpressions(const CorrelatedColumns &correlated, bool lateral = false, idx_t lateral_depth = 0); void VisitOperator(LogicalOperator &op) override; @@ -28,7 +28,7 @@ class HasCorrelatedExpressions : public LogicalOperatorVisitor { unique_ptr VisitReplace(BoundColumnRefExpression &expr, unique_ptr *expr_ptr) override; unique_ptr VisitReplace(BoundSubqueryExpression &expr, unique_ptr *expr_ptr) override; - const vector &correlated_columns; + const CorrelatedColumns &correlated_columns; // Tracks number of nested laterals idx_t lateral_depth; }; diff --git a/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp b/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp index e2c507e73..72886f80e 100644 --- a/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp +++ b/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp @@ -17,13 +17,13 @@ namespace duckdb { //! Helper class to rewrite correlated cte scans within a single LogicalOperator class RewriteCTEScan : public LogicalOperatorVisitor { public: - RewriteCTEScan(idx_t table_index, const vector &correlated_columns); + RewriteCTEScan(idx_t table_index, const CorrelatedColumns &correlated_columns); void VisitOperator(LogicalOperator &op) override; private: idx_t table_index; - const vector &correlated_columns; + const CorrelatedColumns &correlated_columns; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp b/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp index 38c83c95f..299189624 100644 --- a/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp +++ b/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp @@ -47,7 +47,7 @@ class BoundJoinRef : public BoundTableRef { //! Whether or not this is a lateral join bool lateral; //! The correlated columns of the right-side with the left-side - vector correlated_columns; + CorrelatedColumns correlated_columns; //! The mark index, for mark joins generated by the relational API idx_t mark_index {}; }; diff --git a/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp b/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp index bac590d0e..13eecf42b 100644 --- a/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +++ b/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp @@ -82,7 +82,12 @@ unique_ptr AlpInitAnalyze(ColumnData &col_data, PhysicalType type) */ template bool AlpAnalyze(AnalyzeState &state, Vector &input, idx_t count) { - auto &analyze_state = (AlpAnalyzeState &)state; + if (state.info.GetBlockSize() + state.info.GetBlockHeaderSize() < DEFAULT_BLOCK_ALLOC_SIZE) { + return false; + } + + auto &analyze_state = state.Cast>(); + bool must_skip_current_vector = alp::AlpUtils::MustSkipSamplingFromCurrentVector( analyze_state.vectors_count, analyze_state.vectors_sampled_count, count); analyze_state.vectors_count += 1; diff --git a/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp b/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp index 25901667e..da7f8bda0 100644 --- a/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +++ b/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp @@ -47,8 +47,12 @@ unique_ptr AlpRDInitAnalyze(ColumnData &col_data, PhysicalType typ */ template bool AlpRDAnalyze(AnalyzeState &state, Vector &input, idx_t count) { + if (state.info.GetBlockSize() + state.info.GetBlockHeaderSize() < DEFAULT_BLOCK_ALLOC_SIZE) { + return false; + } + using EXACT_TYPE = typename FloatingToExact::TYPE; - auto &analyze_state = (AlpRDAnalyzeState &)state; + auto &analyze_state = state.Cast>(); bool must_skip_current_vector = alp::AlpUtils::MustSkipSamplingFromCurrentVector( analyze_state.vectors_count, analyze_state.vectors_sampled_count, count); diff --git a/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp b/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp index 6abfb5d5b..cd63a96b8 100644 --- a/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp @@ -92,17 +92,19 @@ class MetadataManager { protected: BlockManager &block_manager; BufferManager &buffer_manager; + mutable mutex block_lock; unordered_map blocks; unordered_map modified_blocks; protected: - block_id_t AllocateNewBlock(); - block_id_t PeekNextBlockId(); - block_id_t GetNextBlockId(); - - void AddBlock(MetadataBlock new_block, bool if_exists = false); - void AddAndRegisterBlock(MetadataBlock block); - void ConvertToTransient(MetadataBlock &block); + block_id_t AllocateNewBlock(unique_lock &block_lock); + block_id_t PeekNextBlockId() const; + block_id_t GetNextBlockId() const; + + void AddBlock(unique_lock &block_lock, MetadataBlock new_block, bool if_exists = false); + void AddAndRegisterBlock(unique_lock &block_lock, MetadataBlock block); + void ConvertToTransient(unique_lock &block_lock, MetadataBlock &block); + MetadataPointer FromDiskPointerInternal(unique_lock &block_lock, MetaBlockPointer pointer); }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp b/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp index 0982f8905..6e5814a36 100644 --- a/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +++ b/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp @@ -71,6 +71,8 @@ struct StringStats { ExpressionType comparison_type, const string &value); DUCKDB_API static void Update(BaseStatistics &stats, const string_t &value); + DUCKDB_API static void SetMin(BaseStatistics &stats, const string_t &value); + DUCKDB_API static void SetMax(BaseStatistics &stats, const string_t &value); DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other); DUCKDB_API static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count); diff --git a/src/duckdb/src/logging/log_types.cpp b/src/duckdb/src/logging/log_types.cpp index 6f2d4bacd..f78abae59 100644 --- a/src/duckdb/src/logging/log_types.cpp +++ b/src/duckdb/src/logging/log_types.cpp @@ -1,3 +1,4 @@ +#include "duckdb/main/attached_database.hpp" #include "duckdb/logging/file_system_logger.hpp" #include "duckdb/logging/log_type.hpp" #include "duckdb/common/file_opener.hpp" @@ -13,7 +14,18 @@ constexpr LogLevel FileSystemLogType::LEVEL; constexpr LogLevel QueryLogType::LEVEL; constexpr LogLevel HTTPLogType::LEVEL; constexpr LogLevel PhysicalOperatorLogType::LEVEL; +constexpr LogLevel CheckpointLogType::LEVEL; +//===--------------------------------------------------------------------===// +// QueryLogType +//===--------------------------------------------------------------------===// +string QueryLogType::ConstructLogMessage(const string &str) { + return str; +} + +//===--------------------------------------------------------------------===// +// FileSystemLogType +//===--------------------------------------------------------------------===// FileSystemLogType::FileSystemLogType() : LogType(NAME, LEVEL, GetLogType()) { } @@ -36,6 +48,9 @@ LogicalType FileSystemLogType::GetLogType() { return LogicalType::STRUCT(child_list); } +//===--------------------------------------------------------------------===// +// HTTPLogType +//===--------------------------------------------------------------------===// HTTPLogType::HTTPLogType() : LogType(NAME, LEVEL, GetLogType()) { } @@ -92,6 +107,9 @@ string HTTPLogType::ConstructLogMessage(BaseRequest &request, optional_ptr child_list = { + {"database", LogicalType::VARCHAR}, + {"schema", LogicalType::VARCHAR}, + {"table", LogicalType::VARCHAR}, + {"type", LogicalType::VARCHAR}, + {"info", LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR)}, + }; + return LogicalType::STRUCT(child_list); +} + +string CheckpointLogType::CreateLog(const AttachedDatabase &db, DataTableInfo &table, const char *op_name, + vector map_keys, vector map_values) { + child_list_t child_list = { + {"database", db.name}, + {"schema", table.GetSchemaName()}, + {"table", table.GetTableName()}, + {"type", op_name}, + {"info", Value::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR, std::move(map_keys), std::move(map_values))}, + }; + + return Value::STRUCT(std::move(child_list)).ToString(); +} + +string CheckpointLogType::ConstructLogMessage(const AttachedDatabase &db, DataTableInfo &table, idx_t segment_idx, + idx_t merge_count, idx_t target_count, idx_t merge_rows, + idx_t row_start) { + vector map_keys = {"segment_idx", "merge_count", "target_count", "merge_rows", "row_start"}; + vector map_values = {to_string(segment_idx), to_string(merge_count), to_string(target_count), + to_string(merge_rows), to_string(row_start)}; + return CreateLog(db, table, "vacuum", std::move(map_keys), std::move(map_values)); +} + +string CheckpointLogType::ConstructLogMessage(const AttachedDatabase &db, DataTableInfo &table, idx_t segment_idx, + RowGroup &row_group) { + vector map_keys = {"segment_idx", "start", "count"}; + vector map_values = {to_string(segment_idx), to_string(row_group.start), to_string(row_group.count.load())}; + return CreateLog(db, table, "checkpoint", std::move(map_keys), std::move(map_values)); +} } // namespace duckdb diff --git a/src/duckdb/src/main/attached_database.cpp b/src/duckdb/src/main/attached_database.cpp index 6a4180f09..aaf061e2f 100644 --- a/src/duckdb/src/main/attached_database.cpp +++ b/src/duckdb/src/main/attached_database.cpp @@ -22,6 +22,10 @@ StoredDatabasePath::~StoredDatabasePath() { manager.EraseDatabasePath(path); } +void StoredDatabasePath::OnDetach() { + manager.DetachDatabase(path); +} + //===--------------------------------------------------------------------===// // Attach Options //===--------------------------------------------------------------------===// @@ -155,6 +159,13 @@ bool AttachedDatabase::NameIsReserved(const string &name) { return name == DEFAULT_SCHEMA || name == TEMP_CATALOG || name == SYSTEM_CATALOG; } +string AttachedDatabase::StoredPath() const { + if (stored_database_path) { + return stored_database_path->path; + } + return string(); +} + static string RemoveQueryParams(const string &name) { auto vec = StringUtil::Split(name, "?"); D_ASSERT(!vec.empty()); @@ -227,10 +238,12 @@ void AttachedDatabase::SetReadOnlyDatabase() { } void AttachedDatabase::OnDetach(ClientContext &context) { - if (!catalog) { - return; + if (catalog) { + catalog->OnDetach(context); + } + if (stored_database_path) { + stored_database_path->OnDetach(); } - catalog->OnDetach(context); } void AttachedDatabase::Close() { diff --git a/src/duckdb/src/main/client_context.cpp b/src/duckdb/src/main/client_context.cpp index 46bf97b82..f52fbabdd 100644 --- a/src/duckdb/src/main/client_context.cpp +++ b/src/duckdb/src/main/client_context.cpp @@ -217,15 +217,15 @@ void ClientContext::BeginQueryInternal(ClientContextLock &lock, const string &qu state->QueryBegin(*this); } - // Flush the old Logger + // Flush the old logger. logger->Flush(); - // Refresh the logger to ensure we are in sync with global log settings - LoggingContext context(LogContextScope::CONNECTION); - context.connection_id = connection_id; - context.transaction_id = transaction.ActiveTransaction().global_transaction_id; - context.query_id = transaction.GetActiveQuery(); - logger = db->GetLogManager().CreateLogger(context, true); + // Refresh the logger to ensure we are in sync with the global log settings. + LoggingContext logging_context(LogContextScope::CONNECTION); + logging_context.connection_id = connection_id; + logging_context.transaction_id = transaction.ActiveTransaction().global_transaction_id; + logging_context.query_id = transaction.GetActiveQuery(); + logger = db->GetLogManager().CreateLogger(logging_context, true); DUCKDB_LOG(*this, QueryLogType, query); } @@ -889,6 +889,10 @@ unique_ptr ClientContext::PendingStatementOrPreparedStatemen shared_ptr &prepared, const PendingQueryParameters ¶meters) { unique_ptr pending; + // Start the profiler. + auto &profiler = QueryProfiler::Get(*this); + profiler.StartQuery(query, IsExplainAnalyze(statement ? statement.get() : prepared->unbound_statement.get())); + try { BeginQueryInternal(lock, query); } catch (std::exception &ex) { @@ -900,9 +904,6 @@ unique_ptr ClientContext::PendingStatementOrPreparedStatemen } return ErrorResult(std::move(error), query); } - // start the profiler - auto &profiler = QueryProfiler::Get(*this); - profiler.StartQuery(query, IsExplainAnalyze(statement ? statement.get() : prepared->unbound_statement.get())); bool invalidate_query = true; try { @@ -1406,15 +1407,7 @@ unique_ptr ClientContext::Execute(const shared_ptr &relat return ErrorResult(ErrorData(err_str)); } -SettingLookupResult ClientContext::TryGetCurrentSetting(const std::string &key, Value &result) const { - // first check the built-in settings - auto &db_config = DBConfig::GetConfig(*this); - auto option = db_config.GetOptionByName(key); - if (option && option->get_setting) { - result = option->get_setting(*this); - return SettingLookupResult(SettingScope::LOCAL); - } - +SettingLookupResult ClientContext::TryGetCurrentSettingInternal(const string &key, Value &result) const { // check the client session values const auto &session_config_map = config.set_variables; @@ -1428,6 +1421,21 @@ SettingLookupResult ClientContext::TryGetCurrentSetting(const std::string &key, return db->TryGetCurrentSetting(key, result); } +SettingLookupResult ClientContext::TryGetCurrentSetting(const string &key, Value &result) const { + // first check the built-in settings + auto &db_config = DBConfig::GetConfig(*this); + auto option = db_config.GetOptionByName(key); + if (option) { + if (option->get_setting) { + result = option->get_setting(*this); + return SettingLookupResult(SettingScope::LOCAL); + } + // alias - search for the default key + return TryGetCurrentSettingInternal(option->name, result); + } + return TryGetCurrentSettingInternal(key, result); +} + ParserOptions ClientContext::GetParserOptions() const { auto &client_config = ClientConfig::GetConfig(*this); ParserOptions options; diff --git a/src/duckdb/src/main/connection.cpp b/src/duckdb/src/main/connection.cpp index e561a3cb9..af76cfd17 100644 --- a/src/duckdb/src/main/connection.cpp +++ b/src/duckdb/src/main/connection.cpp @@ -19,7 +19,7 @@ namespace duckdb { Connection::Connection(DatabaseInstance &database) - : context(make_shared_ptr(database.shared_from_this())), warning_cb(nullptr) { + : context(make_shared_ptr(database.shared_from_this())) { auto &connection_manager = ConnectionManager::Get(database); connection_manager.AddConnection(*context); connection_manager.AssignConnectionId(*this); @@ -31,18 +31,15 @@ Connection::Connection(DatabaseInstance &database) } Connection::Connection(DuckDB &database) : Connection(*database.instance) { - // Initialization of warning_cb happens in the other constructor } -Connection::Connection(Connection &&other) noexcept : warning_cb(nullptr) { +Connection::Connection(Connection &&other) noexcept { std::swap(context, other.context); - std::swap(warning_cb, other.warning_cb); std::swap(connection_id, other.connection_id); } Connection &Connection::operator=(Connection &&other) noexcept { std::swap(context, other.context); - std::swap(warning_cb, other.warning_cb); std::swap(connection_id, other.connection_id); return *this; } diff --git a/src/duckdb/src/main/database_file_path_manager.cpp b/src/duckdb/src/main/database_file_path_manager.cpp index 1c553c614..f1825780e 100644 --- a/src/duckdb/src/main/database_file_path_manager.cpp +++ b/src/duckdb/src/main/database_file_path_manager.cpp @@ -7,7 +7,7 @@ namespace duckdb { idx_t DatabaseFilePathManager::ApproxDatabaseCount() const { lock_guard path_lock(db_paths_lock); - return db_paths_to_name.size(); + return db_paths.size(); } InsertDatabasePathResult DatabaseFilePathManager::InsertDatabasePath(const string &path, const string &name, @@ -18,14 +18,20 @@ InsertDatabasePathResult DatabaseFilePathManager::InsertDatabasePath(const strin } lock_guard path_lock(db_paths_lock); - auto entry = db_paths_to_name.emplace(path, name); + auto entry = db_paths.emplace(path, DatabasePathInfo(name)); if (!entry.second) { - if (on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT && entry.first->second == name) { - return InsertDatabasePathResult::ALREADY_EXISTS; + auto &existing = entry.first->second; + if (on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT && existing.name == name) { + if (existing.is_attached) { + return InsertDatabasePathResult::ALREADY_EXISTS; + } + throw BinderException("Unique file handle conflict: Cannot attach \"%s\" - the database file \"%s\" is in " + "the process of being detached", + name, path); } throw BinderException("Unique file handle conflict: Cannot attach \"%s\" - the database file \"%s\" is already " "attached by database \"%s\"", - name, path, entry.first->second); + name, path, existing.name); } options.stored_database_path = make_uniq(*this, path, name); return InsertDatabasePathResult::SUCCESS; @@ -36,7 +42,18 @@ void DatabaseFilePathManager::EraseDatabasePath(const string &path) { return; } lock_guard path_lock(db_paths_lock); - db_paths_to_name.erase(path); + db_paths.erase(path); +} + +void DatabaseFilePathManager::DetachDatabase(const string &path) { + if (path.empty() || path == IN_MEMORY_PATH) { + return; + } + lock_guard path_lock(db_paths_lock); + auto entry = db_paths.find(path); + if (entry != db_paths.end()) { + entry->second.is_attached = false; + } } } // namespace duckdb diff --git a/src/duckdb/src/main/database_manager.cpp b/src/duckdb/src/main/database_manager.cpp index 29e4e1d0a..003e440da 100644 --- a/src/duckdb/src/main/database_manager.cpp +++ b/src/duckdb/src/main/database_manager.cpp @@ -52,18 +52,33 @@ optional_ptr DatabaseManager::GetDatabase(ClientContext &conte return database; } lock_guard guard(databases_lock); + shared_ptr db; if (StringUtil::Lower(name) == TEMP_CATALOG) { - return meta_transaction.UseDatabase(context.client_data->temporary_objects); + db = context.client_data->temporary_objects; + } else { + db = GetDatabaseInternal(guard, name); } + if (!db) { + return nullptr; + } + return meta_transaction.UseDatabase(db); +} + +shared_ptr DatabaseManager::GetDatabase(const string &name) { + lock_guard guard(databases_lock); + return GetDatabaseInternal(guard, name); +} + +shared_ptr DatabaseManager::GetDatabaseInternal(const lock_guard &, const string &name) { if (StringUtil::Lower(name) == SYSTEM_CATALOG) { - return meta_transaction.UseDatabase(system); + return system; } auto entry = databases.find(name); if (entry == databases.end()) { // not found return nullptr; } - return meta_transaction.UseDatabase(entry->second); + return entry->second; } shared_ptr DatabaseManager::AttachDatabase(ClientContext &context, AttachInfo &info, diff --git a/src/duckdb/src/main/http/http_util.cpp b/src/duckdb/src/main/http/http_util.cpp index a51fb3e7f..554346489 100644 --- a/src/duckdb/src/main/http/http_util.cpp +++ b/src/duckdb/src/main/http/http_util.cpp @@ -367,7 +367,9 @@ HTTPUtil::RunRequestWithRetry(const std::function(void) try { response = on_request(); - response->url = request.url; + if (response) { + response->url = request.url; + } } catch (IOException &e) { exception_error = e.what(); caught_e = std::current_exception(); diff --git a/src/duckdb/src/optimizer/rule/comparison_simplification.cpp b/src/duckdb/src/optimizer/rule/comparison_simplification.cpp index 1e377528d..dc778cfff 100644 --- a/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +++ b/src/duckdb/src/optimizer/rule/comparison_simplification.cpp @@ -56,13 +56,8 @@ unique_ptr ComparisonSimplificationRule::Apply(LogicalOperator &op, // Is the constant cast invertible? if (!cast_constant.IsNull() && !BoundCastExpression::CastIsInvertible(cast_expression.return_type, target_type)) { - // Is it actually invertible? - Value uncast_constant; - if (!cast_constant.TryCastAs(rewriter.context, constant_value.type(), uncast_constant, &error_message, - true) || - uncast_constant != constant_value) { - return nullptr; - } + // Cast is not invertible, so we do not rewrite this expression to ensure that the cast is executed + return nullptr; } //! We can cast, now we change our column_ref_expression from an operator cast to a column reference @@ -75,6 +70,7 @@ unique_ptr ComparisonSimplificationRule::Apply(LogicalOperator &op, expr.left = std::move(new_constant_expr); expr.right = std::move(child_expression); } + changes_made = true; } return nullptr; } diff --git a/src/duckdb/src/parser/transform/statement/transform_create_function.cpp b/src/duckdb/src/parser/transform/statement/transform_create_function.cpp index 6490e29e5..1525f634d 100644 --- a/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +++ b/src/duckdb/src/parser/transform/statement/transform_create_function.cpp @@ -47,6 +47,8 @@ unique_ptr Transformer::TransformMacroFunction(duckdb_libpgquery: default_expr = make_uniq(std::move(default_value)); default_expr->SetAlias(param.name); macro_func->default_parameters[param.name] = std::move(default_expr); + } else if (!macro_func->default_parameters.empty()) { + throw ParserException("Parameter without a default follows parameter with a default"); } } diff --git a/src/duckdb/src/planner/binder.cpp b/src/duckdb/src/planner/binder.cpp index 1246f5a25..309422a5f 100644 --- a/src/duckdb/src/planner/binder.cpp +++ b/src/duckdb/src/planner/binder.cpp @@ -440,7 +440,7 @@ void Binder::MoveCorrelatedExpressions(Binder &other) { other.correlated_columns.clear(); } -void Binder::MergeCorrelatedColumns(vector &other) { +void Binder::MergeCorrelatedColumns(CorrelatedColumns &other) { for (idx_t i = 0; i < other.size(); i++) { AddCorrelatedColumn(other[i]); } @@ -449,7 +449,7 @@ void Binder::MergeCorrelatedColumns(vector &other) { void Binder::AddCorrelatedColumn(const CorrelatedColumnInfo &info) { // we only add correlated columns to the list if they are not already there if (std::find(correlated_columns.begin(), correlated_columns.end(), info) == correlated_columns.end()) { - correlated_columns.push_back(info); + correlated_columns.AddColumn(info); } } diff --git a/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp b/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp index 2664903d3..6b979bf17 100644 --- a/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +++ b/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp @@ -186,9 +186,10 @@ static unique_ptr PlanUncorrelatedSubquery(Binder &binder, BoundSubq } } -static unique_ptr -CreateDuplicateEliminatedJoin(const vector &correlated_columns, JoinType join_type, - unique_ptr original_plan, bool perform_delim) { +static unique_ptr CreateDuplicateEliminatedJoin(const CorrelatedColumns &correlated_columns, + JoinType join_type, + unique_ptr original_plan, + bool perform_delim) { auto delim_join = make_uniq(join_type); delim_join->correlated_columns = correlated_columns; delim_join->perform_delim = perform_delim; @@ -216,7 +217,7 @@ static bool PerformDelimOnType(const LogicalType &type) { return true; } -static bool PerformDuplicateElimination(Binder &binder, vector &correlated_columns) { +static bool PerformDuplicateElimination(Binder &binder, CorrelatedColumns &correlated_columns) { if (!ClientConfig::GetConfig(binder.context).enable_optimizer) { // if optimizations are disabled we always do a delim join return true; @@ -235,7 +236,8 @@ static bool PerformDuplicateElimination(Binder &binder, vector &expr_ptr, unique_ptr Binder::PlanLateralJoin(unique_ptr left, unique_ptr right, - vector &correlated, JoinType join_type, + CorrelatedColumns &correlated, JoinType join_type, unique_ptr condition) { // scan the right operator for correlated columns // correlated LATERAL JOIN diff --git a/src/duckdb/src/planner/binder/statement/bind_create.cpp b/src/duckdb/src/planner/binder/statement/bind_create.cpp index 76b43f60a..f1ffd7496 100644 --- a/src/duckdb/src/planner/binder/statement/bind_create.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_create.cpp @@ -345,11 +345,7 @@ SchemaCatalogEntry &Binder::BindCreateFunctionInfo(CreateInfo &info) { try { dummy_binder->Bind(*query_node); } catch (const std::exception &ex) { - // TODO: we would like to do something like "error = ErrorData(ex);" here, - // but that breaks macro's like "create macro m(x) as table (from query_table(x));", - // because dummy-binding these always throws an error instead of a ParameterNotResolvedException. - // So, for now, we allow macro's with bind errors to be created. - // Binding is still useful because we can create the dependencies. + error = ErrorData(ex); } } diff --git a/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp b/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp index 87a9726ec..b52a04cf2 100644 --- a/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp @@ -232,10 +232,18 @@ BoundStatement Binder::Bind(MergeIntoStatement &stmt) { auto bound_join_node = Bind(join); auto root = CreatePlan(*bound_join_node); + auto join_ref = reference(*root); + while (join_ref.get().children.size() == 1) { + join_ref = *join_ref.get().children[0]; + } + if (join_ref.get().children.size() != 2) { + throw NotImplementedException("Expected a join after binding a join operator - but got a %s", + join_ref.get().type); + } // kind of hacky, CreatePlan turns a RIGHT join into a LEFT join so the children get reversed from what we need bool inverted = join.type == JoinType::RIGHT; - auto &source = root->children[inverted ? 1 : 0]; - auto &get = root->children[inverted ? 0 : 1]->Cast(); + auto &source = join_ref.get().children[inverted ? 1 : 0]; + auto &get = join_ref.get().children[inverted ? 0 : 1]->Cast(); auto merge_into = make_uniq(table); merge_into->table_index = GenerateTableIndex(); diff --git a/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp b/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp index 2eb211530..b0d0fffcb 100644 --- a/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +++ b/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp @@ -58,10 +58,15 @@ static void ConstructPivots(PivotRef &ref, vector &pivot_valu } } -static void ExtractPivotExpressions(ParsedExpression &root_expr, case_insensitive_set_t &handled_columns) { +static void ExtractPivotExpressions(ParsedExpression &root_expr, case_insensitive_set_t &handled_columns, + optional_ptr macro_binding) { ParsedExpressionIterator::VisitExpression( root_expr, [&](const ColumnRefExpression &child_colref) { if (child_colref.IsQualified()) { + if (child_colref.column_names[0].find(DummyBinding::DUMMY_NAME) != string::npos && macro_binding && + macro_binding->HasMatchingBinding(child_colref.GetName())) { + throw ParameterNotResolvedException(); + } throw BinderException(child_colref, "PIVOT expression cannot contain qualified columns"); } handled_columns.insert(child_colref.GetColumnName()); @@ -492,7 +497,7 @@ unique_ptr Binder::BindPivot(PivotRef &ref, vector Binder::BindPivot(PivotRef &ref, vector Binder::Bind(TableFunctionRef &ref) { } } - auto get = BindTableFunctionInternal(table_function, ref, std::move(parameters), std::move(named_parameters), - std::move(input_table_types), std::move(input_table_names)); + unique_ptr get; + try { + get = BindTableFunctionInternal(table_function, ref, std::move(parameters), std::move(named_parameters), + std::move(input_table_types), std::move(input_table_names)); + } catch (std::exception &ex) { + error = ErrorData(ex); + error.AddQueryLocation(ref); + error.Throw(); + } auto table_function_ref = make_uniq(std::move(get)); table_function_ref->subquery = std::move(subquery); return std::move(table_function_ref); diff --git a/src/duckdb/src/planner/expression_binder.cpp b/src/duckdb/src/planner/expression_binder.cpp index 5141765bb..220714733 100644 --- a/src/duckdb/src/planner/expression_binder.cpp +++ b/src/duckdb/src/planner/expression_binder.cpp @@ -103,7 +103,9 @@ BindResult ExpressionBinder::BindExpression(unique_ptr &expr, case ExpressionClass::STAR: return BindResult(BinderException::Unsupported(expr_ref, "STAR expression is not supported here")); default: - throw NotImplementedException("Unimplemented expression class"); + return BindResult( + NotImplementedException("Unimplemented expression class in ExpressionBinder::BindExpression: %s", + EnumUtil::ToString(expr_ref.GetExpressionClass()))); } } diff --git a/src/duckdb/src/planner/expression_binder/lateral_binder.cpp b/src/duckdb/src/planner/expression_binder/lateral_binder.cpp index 205b644e8..d532a7a40 100644 --- a/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +++ b/src/duckdb/src/planner/expression_binder/lateral_binder.cpp @@ -17,7 +17,7 @@ void LateralBinder::ExtractCorrelatedColumns(Expression &expr) { // add the correlated column info CorrelatedColumnInfo info(bound_colref); if (std::find(correlated_columns.begin(), correlated_columns.end(), info) == correlated_columns.end()) { - correlated_columns.push_back(std::move(info)); + correlated_columns.AddColumn(std::move(info)); // TODO is adding to the front OK here? } } } @@ -54,8 +54,7 @@ string LateralBinder::UnsupportedAggregateMessage() { return "LATERAL join cannot contain aggregates!"; } -static void ReduceColumnRefDepth(BoundColumnRefExpression &expr, - const vector &correlated_columns) { +static void ReduceColumnRefDepth(BoundColumnRefExpression &expr, const CorrelatedColumns &correlated_columns) { // don't need to reduce this if (expr.depth == 0) { return; @@ -69,8 +68,7 @@ static void ReduceColumnRefDepth(BoundColumnRefExpression &expr, } } -static void ReduceColumnDepth(vector &columns, - const vector &affected_columns) { +static void ReduceColumnDepth(CorrelatedColumns &columns, const CorrelatedColumns &affected_columns) { for (auto &s_correlated : columns) { for (auto &affected : affected_columns) { if (affected == s_correlated) { @@ -83,8 +81,7 @@ static void ReduceColumnDepth(vector &columns, class ExpressionDepthReducerRecursive : public BoundNodeVisitor { public: - explicit ExpressionDepthReducerRecursive(const vector &correlated) - : correlated_columns(correlated) { + explicit ExpressionDepthReducerRecursive(const CorrelatedColumns &correlated) : correlated_columns(correlated) { } void VisitExpression(unique_ptr &expression) override { @@ -106,20 +103,19 @@ class ExpressionDepthReducerRecursive : public BoundNodeVisitor { BoundNodeVisitor::VisitBoundTableRef(ref); } - static void ReduceExpressionSubquery(BoundSubqueryExpression &expr, - const vector &correlated_columns) { + static void ReduceExpressionSubquery(BoundSubqueryExpression &expr, const CorrelatedColumns &correlated_columns) { ReduceColumnDepth(expr.binder->correlated_columns, correlated_columns); ExpressionDepthReducerRecursive recursive(correlated_columns); recursive.VisitBoundQueryNode(*expr.subquery); } private: - const vector &correlated_columns; + const CorrelatedColumns &correlated_columns; }; class ExpressionDepthReducer : public LogicalOperatorVisitor { public: - explicit ExpressionDepthReducer(const vector &correlated) : correlated_columns(correlated) { + explicit ExpressionDepthReducer(const CorrelatedColumns &correlated) : correlated_columns(correlated) { } protected: @@ -133,10 +129,10 @@ class ExpressionDepthReducer : public LogicalOperatorVisitor { return nullptr; } - const vector &correlated_columns; + const CorrelatedColumns &correlated_columns; }; -void LateralBinder::ReduceExpressionDepth(LogicalOperator &op, const vector &correlated) { +void LateralBinder::ReduceExpressionDepth(LogicalOperator &op, const CorrelatedColumns &correlated) { ExpressionDepthReducer depth_reducer(correlated); depth_reducer.VisitOperator(op); } diff --git a/src/duckdb/src/planner/expression_binder/table_function_binder.cpp b/src/duckdb/src/planner/expression_binder/table_function_binder.cpp index 198bd072b..720dbe37d 100644 --- a/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +++ b/src/duckdb/src/planner/expression_binder/table_function_binder.cpp @@ -27,9 +27,13 @@ BindResult TableFunctionBinder::BindColumnReference(unique_ptr if (lambda_ref) { return BindLambdaReference(lambda_ref->Cast(), depth); } + if (binder.macro_binding && binder.macro_binding->HasMatchingBinding(col_ref.GetName())) { throw ParameterNotResolvedException(); } + } else if (col_ref.column_names[0].find(DummyBinding::DUMMY_NAME) != string::npos && binder.macro_binding && + binder.macro_binding->HasMatchingBinding(col_ref.GetName())) { + throw ParameterNotResolvedException(); } auto query_location = col_ref.GetQueryLocation(); diff --git a/src/duckdb/src/planner/operator/logical_dependent_join.cpp b/src/duckdb/src/planner/operator/logical_dependent_join.cpp index 2e46dbc78..70af8444a 100644 --- a/src/duckdb/src/planner/operator/logical_dependent_join.cpp +++ b/src/duckdb/src/planner/operator/logical_dependent_join.cpp @@ -3,7 +3,7 @@ namespace duckdb { LogicalDependentJoin::LogicalDependentJoin(unique_ptr left, unique_ptr right, - vector correlated_columns, JoinType type, + CorrelatedColumns correlated_columns, JoinType type, unique_ptr condition) : LogicalComparisonJoin(type, LogicalOperatorType::LOGICAL_DEPENDENT_JOIN), join_condition(std::move(condition)), correlated_columns(std::move(correlated_columns)) { @@ -17,7 +17,7 @@ LogicalDependentJoin::LogicalDependentJoin(JoinType join_type) unique_ptr LogicalDependentJoin::Create(unique_ptr left, unique_ptr right, - vector correlated_columns, JoinType type, + CorrelatedColumns correlated_columns, JoinType type, unique_ptr condition) { return make_uniq(std::move(left), std::move(right), std::move(correlated_columns), type, std::move(condition)); diff --git a/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp b/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp index 0b0ddc672..a4a00f185 100644 --- a/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +++ b/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp @@ -18,9 +18,8 @@ namespace duckdb { -FlattenDependentJoins::FlattenDependentJoins(Binder &binder, const vector &correlated, - bool perform_delim, bool any_join, - optional_ptr parent) +FlattenDependentJoins::FlattenDependentJoins(Binder &binder, const CorrelatedColumns &correlated, bool perform_delim, + bool any_join, optional_ptr parent) : binder(binder), delim_offset(DConstants::INVALID_INDEX), correlated_columns(correlated), perform_delim(perform_delim), any_join(any_join), parent(parent) { for (idx_t i = 0; i < correlated_columns.size(); i++) { @@ -30,8 +29,7 @@ FlattenDependentJoins::FlattenDependentJoins(Binder &binder, const vector &correlated_columns, +static void CreateDelimJoinConditions(LogicalComparisonJoin &delim_join, const CorrelatedColumns &correlated_columns, vector bindings, idx_t base_offset, bool perform_delim) { auto col_count = perform_delim ? correlated_columns.size() : 1; for (idx_t i = 0; i < col_count; i++) { @@ -50,7 +48,7 @@ static void CreateDelimJoinConditions(LogicalComparisonJoin &delim_join, unique_ptr FlattenDependentJoins::DecorrelateIndependent(Binder &binder, unique_ptr plan) { - vector correlated; + CorrelatedColumns correlated; FlattenDependentJoins flatten(binder, correlated); return flatten.Decorrelate(std::move(plan)); } @@ -80,12 +78,12 @@ unique_ptr FlattenDependentJoins::Decorrelate(unique_ptrsecond = false; // rewrite - idx_t lateral_depth = 0; + idx_t next_lateral_depth = 0; - RewriteCorrelatedExpressions rewriter(base_binding, correlated_map, lateral_depth); + RewriteCorrelatedExpressions rewriter(base_binding, correlated_map, next_lateral_depth); rewriter.VisitOperator(*plan); - RewriteCorrelatedExpressions recursive_rewriter(base_binding, correlated_map, lateral_depth, true); + RewriteCorrelatedExpressions recursive_rewriter(base_binding, correlated_map, next_lateral_depth, true); recursive_rewriter.VisitOperator(*plan); } else { op.children[0] = Decorrelate(std::move(op.children[0])); @@ -94,8 +92,8 @@ unique_ptr FlattenDependentJoins::Decorrelate(unique_ptr(op.correlated_columns[0].binding.table_index); + const auto &op_col = op.correlated_columns[op.correlated_columns.GetDelimIndex()]; + auto window = make_uniq(op_col.binding.table_index); auto row_number = make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; @@ -114,9 +112,9 @@ unique_ptr FlattenDependentJoins::Decorrelate(unique_ptrchildren[1], op.is_lateral_join, lateral_depth); if (delim_join->children[1]->type == LogicalOperatorType::LOGICAL_MATERIALIZED_CTE) { - auto &cte = delim_join->children[1]->Cast(); + auto &cte_ref = delim_join->children[1]->Cast(); // check if the left side of the CTE has correlated expressions - auto entry = flatten.has_correlated_expressions.find(*cte.children[0]); + auto entry = flatten.has_correlated_expressions.find(*cte_ref.children[0]); if (entry != flatten.has_correlated_expressions.end()) { if (!entry->second) { // the left side of the CTE has no correlated expressions, we can push the DEPENDENT_JOIN down @@ -132,7 +130,7 @@ unique_ptr FlattenDependentJoins::Decorrelate(unique_ptrchildren[1] = flatten.PushDownDependentJoin(std::move(delim_join->children[1]), propagate_null_values, lateral_depth); data_offset = flatten.data_offset; - auto left_offset = delim_join->children[0]->GetColumnBindings().size(); + const auto left_offset = delim_join->children[0]->GetColumnBindings().size(); if (!parent) { delim_offset = left_offset + flatten.delim_offset; } diff --git a/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp b/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp index 9f1c679a1..8554f3f5b 100644 --- a/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +++ b/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp @@ -7,7 +7,7 @@ namespace duckdb { -HasCorrelatedExpressions::HasCorrelatedExpressions(const vector &correlated, bool lateral, +HasCorrelatedExpressions::HasCorrelatedExpressions(const CorrelatedColumns &correlated, bool lateral, idx_t lateral_depth) : has_correlated_expressions(false), lateral(lateral), correlated_columns(correlated), lateral_depth(lateral_depth) { diff --git a/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp b/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp index 78b3b21ec..f846d9b36 100644 --- a/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp +++ b/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp @@ -14,7 +14,7 @@ namespace duckdb { -RewriteCTEScan::RewriteCTEScan(idx_t table_index, const vector &correlated_columns) +RewriteCTEScan::RewriteCTEScan(idx_t table_index, const CorrelatedColumns &correlated_columns) : table_index(table_index), correlated_columns(correlated_columns) { } @@ -49,7 +49,7 @@ void RewriteCTEScan::VisitOperator(LogicalOperator &op) { // The correlated columns must be placed at the beginning of the // correlated_columns list. Otherwise, further column accesses // and rewrites will fail. - join.correlated_columns.emplace(join.correlated_columns.begin(), corr); + join.correlated_columns.AddColumn(std::move(corr)); } } } diff --git a/src/duckdb/src/storage/compression/bitpacking.cpp b/src/duckdb/src/storage/compression/bitpacking.cpp index fa1ffaeba..43cb27692 100644 --- a/src/duckdb/src/storage/compression/bitpacking.cpp +++ b/src/duckdb/src/storage/compression/bitpacking.cpp @@ -341,8 +341,6 @@ unique_ptr BitpackingInitAnalyze(ColumnData &col_data, PhysicalTyp template bool BitpackingAnalyze(AnalyzeState &state, Vector &input, idx_t count) { - auto &analyze_state = state.Cast>(); - // We use BITPACKING_METADATA_GROUP_SIZE tuples, which can exceed the block size. // In that case, we disable bitpacking. // we are conservative here by multiplying by 2 @@ -351,6 +349,7 @@ bool BitpackingAnalyze(AnalyzeState &state, Vector &input, idx_t count) { return false; } + auto &analyze_state = state.Cast>(); UnifiedVectorFormat vdata; input.ToUnifiedFormat(count, vdata); diff --git a/src/duckdb/src/storage/metadata/metadata_manager.cpp b/src/duckdb/src/storage/metadata/metadata_manager.cpp index 55b8790e4..8674f742d 100644 --- a/src/duckdb/src/storage/metadata/metadata_manager.cpp +++ b/src/duckdb/src/storage/metadata/metadata_manager.cpp @@ -54,6 +54,8 @@ MetadataManager::~MetadataManager() { MetadataHandle MetadataManager::AllocateHandle() { // check if there is any free space left in an existing block // if not allocate a new block + MetadataPointer pointer; + unique_lock guard(block_lock); block_id_t free_block = INVALID_BLOCK; for (auto &kv : blocks) { auto &block = kv.second; @@ -63,13 +65,16 @@ MetadataHandle MetadataManager::AllocateHandle() { break; } } + guard.unlock(); if (free_block == INVALID_BLOCK || free_block > PeekNextBlockId()) { - free_block = AllocateNewBlock(); + free_block = AllocateNewBlock(guard); + } else { + guard.lock(); } + D_ASSERT(guard.owns_lock()); D_ASSERT(free_block != INVALID_BLOCK); // select the first free metadata block we can find - MetadataPointer pointer; pointer.block_index = UnsafeNumericCast(free_block); auto &block = blocks[free_block]; // the block is now dirty @@ -77,7 +82,7 @@ MetadataHandle MetadataManager::AllocateHandle() { if (block.block->BlockId() < MAXIMUM_BLOCK) { // this block is a disk-backed block, yet we are planning to write to it // we need to convert it into a transient block before we can write to it - ConvertToTransient(block); + ConvertToTransient(guard, block); D_ASSERT(block.block->BlockId() >= MAXIMUM_BLOCK); } D_ASSERT(!block.free_blocks.empty()); @@ -85,6 +90,7 @@ MetadataHandle MetadataManager::AllocateHandle() { // mark the block as used block.free_blocks.pop_back(); D_ASSERT(pointer.index < METADATA_BLOCK_COUNT); + guard.unlock(); // pin the block return Pin(pointer); } @@ -95,25 +101,34 @@ MetadataHandle MetadataManager::Pin(const MetadataPointer &pointer) { MetadataHandle MetadataManager::Pin(QueryContext context, const MetadataPointer &pointer) { D_ASSERT(pointer.index < METADATA_BLOCK_COUNT); - auto &block = blocks[UnsafeNumericCast(pointer.block_index)]; + shared_ptr block_handle; + { + lock_guard guard(block_lock); + auto &block = blocks[UnsafeNumericCast(pointer.block_index)]; #ifdef DEBUG - for (auto &free_block : block.free_blocks) { - if (free_block == pointer.index) { - throw InternalException("Pinning block %d.%d but it is marked as a free block", block.block_id, free_block); + for (auto &free_block : block.free_blocks) { + if (free_block == pointer.index) { + throw InternalException("Pinning block %d.%d but it is marked as a free block", block.block_id, + free_block); + } } - } #endif + block_handle = block.block; + } MetadataHandle handle; handle.pointer.block_index = pointer.block_index; handle.pointer.index = pointer.index; - handle.handle = buffer_manager.Pin(block.block); + handle.handle = buffer_manager.Pin(block_handle); return handle; } -void MetadataManager::ConvertToTransient(MetadataBlock &metadata_block) { +void MetadataManager::ConvertToTransient(unique_lock &block_lock, MetadataBlock &metadata_block) { + D_ASSERT(block_lock.owns_lock()); + auto old_block = metadata_block.block; + block_lock.unlock(); // pin the old block - auto old_buffer = buffer_manager.Pin(metadata_block.block); + auto old_buffer = buffer_manager.Pin(old_block); // allocate a new transient block to replace it auto new_buffer = buffer_manager.Allocate(MemoryTag::METADATA, &block_manager, false); @@ -121,14 +136,17 @@ void MetadataManager::ConvertToTransient(MetadataBlock &metadata_block) { // copy the data to the transient block memcpy(new_buffer.Ptr(), old_buffer.Ptr(), block_manager.GetBlockSize()); - metadata_block.block = std::move(new_block); - metadata_block.dirty = true; // unregister the old block block_manager.UnregisterBlock(metadata_block.block_id); + + block_lock.lock(); + metadata_block.block = std::move(new_block); + metadata_block.dirty = true; } -block_id_t MetadataManager::AllocateNewBlock() { +block_id_t MetadataManager::AllocateNewBlock(unique_lock &block_lock) { + D_ASSERT(!block_lock.owns_lock()); auto new_block_id = GetNextBlockId(); MetadataBlock new_block; @@ -141,11 +159,14 @@ block_id_t MetadataManager::AllocateNewBlock() { new_block.dirty = true; // zero-initialize the handle memset(handle.Ptr(), 0, block_manager.GetBlockSize()); - AddBlock(std::move(new_block)); + + block_lock.lock(); + AddBlock(block_lock, std::move(new_block)); return new_block_id; } -void MetadataManager::AddBlock(MetadataBlock new_block, bool if_exists) { +void MetadataManager::AddBlock(unique_lock &block_lock, MetadataBlock new_block, bool if_exists) { + D_ASSERT(block_lock.owns_lock()); if (blocks.find(new_block.block_id) != blocks.end()) { if (if_exists) { return; @@ -155,15 +176,17 @@ void MetadataManager::AddBlock(MetadataBlock new_block, bool if_exists) { blocks[new_block.block_id] = std::move(new_block); } -void MetadataManager::AddAndRegisterBlock(MetadataBlock block) { +void MetadataManager::AddAndRegisterBlock(unique_lock &block_lock, MetadataBlock block) { if (block.block) { throw InternalException("Calling AddAndRegisterBlock on block that already exists"); } if (block.block_id >= MAXIMUM_BLOCK) { throw InternalException("AddAndRegisterBlock called with a transient block id"); } + block_lock.unlock(); block.block = block_manager.RegisterBlock(block.block_id); - AddBlock(std::move(block), true); + block_lock.lock(); + AddBlock(block_lock, std::move(block), true); } MetaBlockPointer MetadataManager::GetDiskPointer(const MetadataPointer &pointer, uint32_t offset) { @@ -181,8 +204,14 @@ uint32_t MetaBlockPointer::GetBlockIndex() const { } MetadataPointer MetadataManager::FromDiskPointer(MetaBlockPointer pointer) { + unique_lock guard(block_lock); + return FromDiskPointerInternal(guard, pointer); +} + +MetadataPointer MetadataManager::FromDiskPointerInternal(unique_lock &block_lock, MetaBlockPointer pointer) { auto block_id = pointer.GetBlockId(); auto index = pointer.GetBlockIndex(); + auto entry = blocks.find(block_id); if (entry == blocks.end()) { // LCOV_EXCL_START throw InternalException("Failed to load metadata pointer (id %llu, idx %llu, ptr %llu)\n", block_id, index, @@ -195,11 +224,13 @@ MetadataPointer MetadataManager::FromDiskPointer(MetaBlockPointer pointer) { } MetadataPointer MetadataManager::RegisterDiskPointer(MetaBlockPointer pointer) { + unique_lock guard(block_lock); + auto block_id = pointer.GetBlockId(); MetadataBlock block; block.block_id = block_id; - AddAndRegisterBlock(std::move(block)); - return FromDiskPointer(pointer); + AddAndRegisterBlock(guard, std::move(block)); + return FromDiskPointerInternal(guard, pointer); } BlockPointer MetadataManager::ToBlockPointer(MetaBlockPointer meta_pointer, const idx_t metadata_block_size) { @@ -232,6 +263,7 @@ void MetadataManager::Flush() { // Write the blocks of the metadata manager to disk. const idx_t total_metadata_size = GetMetadataBlockSize() * METADATA_BLOCK_COUNT; + unique_lock guard(block_lock, std::defer_lock); for (auto &kv : blocks) { auto &block = kv.second; if (!block.dirty) { @@ -245,9 +277,13 @@ void MetadataManager::Flush() { memset(handle.Ptr() + total_metadata_size, 0, block_manager.GetBlockSize() - total_metadata_size); D_ASSERT(kv.first == block.block_id); if (block.block->BlockId() >= MAXIMUM_BLOCK) { + auto new_block = + block_manager.ConvertToPersistent(QueryContext(), kv.first, block.block, std::move(handle)); + // Convert the temporary block to a persistent block. - block.block = - block_manager.ConvertToPersistent(QueryContext(), kv.first, std::move(block.block), std::move(handle)); + guard.lock(); + block.block = std::move(new_block); + guard.unlock(); } else { // Already a persistent block, so we only need to write it. D_ASSERT(block.block->BlockId() == block.block_id); @@ -269,10 +305,12 @@ void MetadataManager::Read(ReadStream &source) { auto block_count = source.Read(); for (idx_t i = 0; i < block_count; i++) { auto block = MetadataBlock::Read(source); + + unique_lock guard(block_lock); auto entry = blocks.find(block.block_id); if (entry == blocks.end()) { // block does not exist yet - AddAndRegisterBlock(std::move(block)); + AddAndRegisterBlock(guard, std::move(block)); } else { // block was already created - only copy over the free list entry->second.free_blocks = std::move(block.free_blocks); @@ -349,6 +387,7 @@ void MetadataManager::MarkBlocksAsModified() { } modified_blocks.clear(); + for (auto &kv : blocks) { auto &block = kv.second; idx_t free_list = block.FreeBlocksToInteger(); @@ -361,6 +400,7 @@ void MetadataManager::ClearModifiedBlocks(const vector &pointe if (pointers.empty()) { return; } + unique_lock guard(block_lock); for (auto &pointer : pointers) { auto block_id = pointer.GetBlockId(); auto block_index = pointer.GetBlockIndex(); @@ -376,6 +416,7 @@ void MetadataManager::ClearModifiedBlocks(const vector &pointe vector MetadataManager::GetMetadataInfo() const { vector result; + unique_lock guard(block_lock); for (auto &block : blocks) { MetadataBlockInfo block_info; block_info.block_id = block.second.block_id; @@ -393,17 +434,18 @@ vector MetadataManager::GetMetadataInfo() const { vector> MetadataManager::GetBlocks() const { vector> result; + unique_lock guard(block_lock); for (auto &entry : blocks) { result.push_back(entry.second.block); } return result; } -block_id_t MetadataManager::PeekNextBlockId() { +block_id_t MetadataManager::PeekNextBlockId() const { return block_manager.PeekFreeBlockId(); } -block_id_t MetadataManager::GetNextBlockId() { +block_id_t MetadataManager::GetNextBlockId() const { return block_manager.GetFreeBlockId(); } diff --git a/src/duckdb/src/storage/statistics/string_stats.cpp b/src/duckdb/src/storage/statistics/string_stats.cpp index e7d232692..3fe22ecac 100644 --- a/src/duckdb/src/storage/statistics/string_stats.cpp +++ b/src/duckdb/src/storage/statistics/string_stats.cpp @@ -170,6 +170,14 @@ void StringStats::Update(BaseStatistics &stats, const string_t &value) { } } +void StringStats::SetMin(BaseStatistics &stats, const string_t &value) { + ConstructValue(const_data_ptr_cast(value.GetData()), value.GetSize(), GetDataUnsafe(stats).min); +} + +void StringStats::SetMax(BaseStatistics &stats, const string_t &value) { + ConstructValue(const_data_ptr_cast(value.GetData()), value.GetSize(), GetDataUnsafe(stats).max); +} + void StringStats::Merge(BaseStatistics &stats, const BaseStatistics &other) { if (other.GetType().id() == LogicalTypeId::VALIDITY) { return; diff --git a/src/duckdb/src/storage/table/array_column_data.cpp b/src/duckdb/src/storage/table/array_column_data.cpp index 05964339a..7c8a12f13 100644 --- a/src/duckdb/src/storage/table/array_column_data.cpp +++ b/src/duckdb/src/storage/table/array_column_data.cpp @@ -97,7 +97,7 @@ idx_t ArrayColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t c void ArrayColumnData::Select(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result, SelectionVector &sel, idx_t sel_count) { - bool is_supported = !child_column->type.IsNested(); + bool is_supported = !child_column->type.IsNested() && child_column->type.InternalType() != PhysicalType::VARCHAR; if (!is_supported) { ColumnData::Select(transaction, vector_index, state, result, sel, sel_count); return; diff --git a/src/duckdb/src/storage/table/column_data.cpp b/src/duckdb/src/storage/table/column_data.cpp index 9200d7bef..c212fcb18 100644 --- a/src/duckdb/src/storage/table/column_data.cpp +++ b/src/duckdb/src/storage/table/column_data.cpp @@ -873,6 +873,17 @@ PersistentColumnData ColumnData::Serialize() { return result; } +void RealignColumnData(PersistentColumnData &column_data, idx_t new_start) { + idx_t current_start = new_start; + for (auto &pointer : column_data.pointers) { + pointer.row_start = current_start; + current_start += pointer.tuple_count; + } + for (auto &child : column_data.child_columns) { + RealignColumnData(child, new_start); + } +} + shared_ptr ColumnData::Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row, ReadStream &source, const LogicalType &type) { auto entry = ColumnData::CreateColumn(block_manager, info, column_index, start_row, type, nullptr); @@ -890,6 +901,9 @@ shared_ptr ColumnData::Deserialize(BlockManager &block_manager, Data deserializer.Unset(); deserializer.End(); + // re-align data segments, in case our start_row has changed + RealignColumnData(persistent_column_data, start_row); + // initialize the column entry->InitializeColumn(persistent_column_data, entry->stats->statistics); return entry; diff --git a/src/duckdb/src/storage/table/column_data_checkpointer.cpp b/src/duckdb/src/storage/table/column_data_checkpointer.cpp index 1cf1d7de2..68c35f842 100644 --- a/src/duckdb/src/storage/table/column_data_checkpointer.cpp +++ b/src/duckdb/src/storage/table/column_data_checkpointer.cpp @@ -376,8 +376,22 @@ void ColumnDataCheckpointer::WritePersistentSegments(ColumnCheckpointState &stat for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) { auto segment = nodes[segment_idx].node.get(); if (segment->start != current_row) { + string extra_info; + for (auto &s : nodes) { + extra_info += "\n"; + extra_info += StringUtil::Format("Start %d, count %d", s.node->start, s.node->count.load()); + } + const_reference root = col_data; + while (root.get().HasParent()) { + root = root.get().Parent(); + } throw InternalException( - "Failure in RowGroup::Checkpoint - column data pointer is unaligned with row group start"); + "Failure in RowGroup::Checkpoint - column data pointer is unaligned with row group " + "start\nRow group start: %d\nRow group count %d\nCurrent row: %d\nSegment start: %d\nColumn index: " + "%d\nColumn type: %s\nRoot type: %s\nTable: %s.%s\nAll segments:%s", + row_group.start, row_group.count.load(), current_row, segment->start, root.get().column_index, + col_data.type, root.get().type, root.get().info.GetSchemaName(), root.get().info.GetTableName(), + extra_info); } current_row += segment->count; auto pointer = segment->GetDataPointer(); diff --git a/src/duckdb/src/storage/table/row_group_collection.cpp b/src/duckdb/src/storage/table/row_group_collection.cpp index 7e195cdc1..0b14bdf29 100644 --- a/src/duckdb/src/storage/table/row_group_collection.cpp +++ b/src/duckdb/src/storage/table/row_group_collection.cpp @@ -1008,8 +1008,8 @@ bool RowGroupCollection::ScheduleVacuumTasks(CollectionCheckpointState &checkpoi } idx_t merge_rows; idx_t next_idx = 0; - idx_t merge_count; - idx_t target_count; + idx_t merge_count = 0; + idx_t target_count = 0; bool perform_merge = false; // check if we can merge row groups adjacent to the current segment_idx // we try merging row groups into batches of 1-3 row groups @@ -1061,6 +1061,8 @@ bool RowGroupCollection::ScheduleVacuumTasks(CollectionCheckpointState &checkpoi return false; } // schedule the vacuum task + DUCKDB_LOG(checkpoint_state.writer.GetDatabase(), CheckpointLogType, GetAttached(), *info, segment_idx, merge_count, + target_count, merge_rows, state.row_start); auto vacuum_task = make_uniq(checkpoint_state, state, segment_idx, merge_count, target_count, merge_rows, state.row_start); checkpoint_state.executor->ScheduleTask(std::move(vacuum_task)); @@ -1107,6 +1109,8 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl // schedule a checkpoint task for this row group entry.node->MoveToCollection(*this, vacuum_state.row_start); if (writer.GetCheckpointType() != CheckpointType::VACUUM_ONLY) { + DUCKDB_LOG(checkpoint_state.writer.GetDatabase(), CheckpointLogType, GetAttached(), *info, segment_idx, + *entry.node); auto checkpoint_task = GetCheckpointTask(checkpoint_state, segment_idx); checkpoint_state.executor->ScheduleTask(std::move(checkpoint_task)); } diff --git a/src/duckdb/third_party/httplib/httplib.hpp b/src/duckdb/third_party/httplib/httplib.hpp index 4aa0458dc..409c47d0b 100644 --- a/src/duckdb/third_party/httplib/httplib.hpp +++ b/src/duckdb/third_party/httplib/httplib.hpp @@ -7077,7 +7077,12 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) { } auto location = res.get_header_value("location"); - if (location.empty()) { return false; } + if (location.empty()) { + // s3 requests will not return a location header, and instead a + // X-Amx-Region-Bucket header. Return true so all response headers + // are returned to the httpfs/calling extension + return true; + } const Regex re( R"((?:(https?):)?(?://(?:\[([\d:]+)\]|([^:/?#]+))(?::(\d+))?)?([^?#]*)(\?[^#]*)?(?:#.*)?)"); diff --git a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp index f59aac815..fc4104f94 100644 --- a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +++ b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp @@ -348,17 +348,17 @@ #include "extension/icu/third_party/icu/i18n/wintzimpl.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp" - -#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" #include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp" #include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" + +#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" diff --git a/src/duckdb/ub_src_function_table_system.cpp b/src/duckdb/ub_src_function_table_system.cpp index afa17b21b..5ca818791 100644 --- a/src/duckdb/ub_src_function_table_system.cpp +++ b/src/duckdb/ub_src_function_table_system.cpp @@ -1,3 +1,5 @@ +#include "src/function/table/system/duckdb_connection_count.cpp" + #include "src/function/table/system/duckdb_approx_database_count.cpp" #include "src/function/table/system/duckdb_columns.cpp"