diff --git a/CMakeLists.txt b/CMakeLists.txt index e340e7b60..92ac9f2da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -340,46 +340,46 @@ set(DUCKDB_SRC_FILES src/duckdb/third_party/zstd/dict/divsufsort.cpp src/duckdb/third_party/zstd/dict/fastcover.cpp src/duckdb/third_party/zstd/dict/zdict.cpp + src/duckdb/extension/core_functions/function_list.cpp src/duckdb/extension/core_functions/core_functions_extension.cpp src/duckdb/extension/core_functions/lambda_functions.cpp - src/duckdb/extension/core_functions/function_list.cpp - src/duckdb/ub_extension_core_functions_aggregate_nested.cpp - src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp - src/duckdb/ub_extension_core_functions_aggregate_regression.cpp - src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp - src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp src/duckdb/ub_extension_core_functions_scalar_string.cpp - src/duckdb/ub_extension_core_functions_scalar_enum.cpp - src/duckdb/ub_extension_core_functions_scalar_blob.cpp - src/duckdb/ub_extension_core_functions_scalar_map.cpp - src/duckdb/ub_extension_core_functions_scalar_array.cpp - src/duckdb/ub_extension_core_functions_scalar_bit.cpp src/duckdb/ub_extension_core_functions_scalar_union.cpp + src/duckdb/ub_extension_core_functions_scalar_blob.cpp src/duckdb/ub_extension_core_functions_scalar_debug.cpp - src/duckdb/ub_extension_core_functions_scalar_struct.cpp src/duckdb/ub_extension_core_functions_scalar_random.cpp - src/duckdb/ub_extension_core_functions_scalar_date.cpp + src/duckdb/ub_extension_core_functions_scalar_operators.cpp src/duckdb/ub_extension_core_functions_scalar_list.cpp src/duckdb/ub_extension_core_functions_scalar_math.cpp - src/duckdb/ub_extension_core_functions_scalar_operators.cpp + src/duckdb/ub_extension_core_functions_scalar_enum.cpp + src/duckdb/ub_extension_core_functions_scalar_array.cpp + src/duckdb/ub_extension_core_functions_scalar_date.cpp + src/duckdb/ub_extension_core_functions_scalar_map.cpp src/duckdb/ub_extension_core_functions_scalar_generic.cpp + src/duckdb/ub_extension_core_functions_scalar_bit.cpp + src/duckdb/ub_extension_core_functions_scalar_struct.cpp + src/duckdb/ub_extension_core_functions_aggregate_regression.cpp + src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp + src/duckdb/ub_extension_core_functions_aggregate_nested.cpp + src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp + src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp src/duckdb/extension/parquet/parquet_extension.cpp - src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp - src/duckdb/extension/parquet/serialize_parquet.cpp - src/duckdb/extension/parquet/parquet_metadata.cpp - src/duckdb/extension/parquet/parquet_crypto.cpp - src/duckdb/extension/parquet/parquet_float16.cpp + src/duckdb/extension/parquet/zstd_file_system.cpp + src/duckdb/extension/parquet/geo_parquet.cpp src/duckdb/extension/parquet/parquet_writer.cpp - src/duckdb/extension/parquet/parquet_statistics.cpp - src/duckdb/extension/parquet/column_reader.cpp - src/duckdb/extension/parquet/parquet_reader.cpp src/duckdb/extension/parquet/column_writer.cpp - src/duckdb/extension/parquet/geo_parquet.cpp src/duckdb/extension/parquet/parquet_timestamp.cpp + src/duckdb/extension/parquet/parquet_crypto.cpp + src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp + src/duckdb/extension/parquet/parquet_reader.cpp src/duckdb/extension/parquet/parquet_multi_file_info.cpp - src/duckdb/extension/parquet/zstd_file_system.cpp - src/duckdb/ub_extension_parquet_decoder.cpp + src/duckdb/extension/parquet/column_reader.cpp + src/duckdb/extension/parquet/serialize_parquet.cpp + src/duckdb/extension/parquet/parquet_float16.cpp + src/duckdb/extension/parquet/parquet_metadata.cpp + src/duckdb/extension/parquet/parquet_statistics.cpp src/duckdb/ub_extension_parquet_writer.cpp + src/duckdb/ub_extension_parquet_decoder.cpp src/duckdb/ub_extension_parquet_reader.cpp src/duckdb/third_party/parquet/parquet_types.cpp src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp @@ -419,32 +419,32 @@ set(DUCKDB_SRC_FILES src/duckdb/third_party/brotli/enc/metablock.cpp src/duckdb/third_party/brotli/enc/static_dict.cpp src/duckdb/third_party/brotli/enc/utf8_util.cpp - src/duckdb/extension/icu/./icu-datesub.cpp - src/duckdb/extension/icu/./icu_extension.cpp + src/duckdb/extension/icu/./icu-makedate.cpp + src/duckdb/extension/icu/./icu-datefunc.cpp + src/duckdb/extension/icu/./icu-timezone.cpp + src/duckdb/extension/icu/./icu-current.cpp src/duckdb/extension/icu/./icu-list-range.cpp - src/duckdb/extension/icu/./icu-strptime.cpp src/duckdb/extension/icu/./icu-dateadd.cpp src/duckdb/extension/icu/./icu-table-range.cpp - src/duckdb/extension/icu/./icu-current.cpp + src/duckdb/extension/icu/./icu-datesub.cpp + src/duckdb/extension/icu/./icu-datetrunc.cpp src/duckdb/extension/icu/./icu-timebucket.cpp - src/duckdb/extension/icu/./icu-timezone.cpp src/duckdb/extension/icu/./icu-datepart.cpp - src/duckdb/extension/icu/./icu-datefunc.cpp - src/duckdb/extension/icu/./icu-makedate.cpp - src/duckdb/extension/icu/./icu-datetrunc.cpp + src/duckdb/extension/icu/./icu-strptime.cpp + src/duckdb/extension/icu/./icu_extension.cpp src/duckdb/ub_extension_icu_third_party_icu_common.cpp src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp - src/duckdb/extension/json/json_extension.cpp src/duckdb/extension/json/json_enums.cpp - src/duckdb/extension/json/json_multi_file_info.cpp - src/duckdb/extension/json/json_reader.cpp + src/duckdb/extension/json/json_scan.cpp src/duckdb/extension/json/json_serializer.cpp src/duckdb/extension/json/json_common.cpp - src/duckdb/extension/json/json_functions.cpp - src/duckdb/extension/json/serialize_json.cpp - src/duckdb/extension/json/json_scan.cpp + src/duckdb/extension/json/json_multi_file_info.cpp src/duckdb/extension/json/json_deserializer.cpp + src/duckdb/extension/json/json_reader.cpp + src/duckdb/extension/json/serialize_json.cpp + src/duckdb/extension/json/json_functions.cpp + src/duckdb/extension/json/json_extension.cpp src/duckdb/ub_extension_json_json_functions.cpp) set(JEMALLOC_SRC_FILES diff --git a/src/duckdb/extension/core_functions/scalar/random/random.cpp b/src/duckdb/extension/core_functions/scalar/random/random.cpp index 4ffaaa3eb..2be666b69 100644 --- a/src/duckdb/extension/core_functions/scalar/random/random.cpp +++ b/src/duckdb/extension/core_functions/scalar/random/random.cpp @@ -29,7 +29,9 @@ struct ExtractTimestampUuidOperator { } // UUID v7 begins with a 48 bit big-endian Unix Epoch timestamp with millisecond granularity. - const int64_t upper = input.upper; + int64_t upper = input.upper; + // flip the top byte + upper ^= NumericLimits::Minimum(); int64_t unix_ts_milli = upper; unix_ts_milli = unix_ts_milli >> 16; diff --git a/src/duckdb/extension/icu/icu-timezone.cpp b/src/duckdb/extension/icu/icu-timezone.cpp index a5029df2c..be1a8d986 100644 --- a/src/duckdb/extension/icu/icu-timezone.cpp +++ b/src/duckdb/extension/icu/icu-timezone.cpp @@ -247,6 +247,10 @@ struct ICUToNaiveTimestamp : public ICUDateFunc { if (!input.context) { throw InternalException("Missing context for TIMESTAMPTZ to TIMESTAMP cast."); } + if (input.context->config.disable_timestamptz_casts) { + throw BinderException("Casting from TIMESTAMP WITH TIME ZONE to TIMESTAMP without an explicit time zone " + "has been disabled - use \"AT TIME ZONE ...\""); + } auto cast_data = make_uniq(make_uniq(*input.context)); diff --git a/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp b/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp index 0cb5c9be6..ff2bc270e 100644 --- a/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +++ b/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp @@ -403,7 +403,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter { break; } case duckdb_parquet::Encoding::BYTE_STREAM_SPLIT: { - if (page_state.bss_initialized) { + if (!page_state.bss_initialized) { page_state.bss_encoder.BeginWrite(BufferAllocator::Get(writer.GetContext())); page_state.bss_initialized = true; } diff --git a/src/duckdb/extension/parquet/parquet_float16.cpp b/src/duckdb/extension/parquet/parquet_float16.cpp index 1694d70d6..968b6533b 100644 --- a/src/duckdb/extension/parquet/parquet_float16.cpp +++ b/src/duckdb/extension/parquet/parquet_float16.cpp @@ -11,7 +11,9 @@ float Float16ToFloat32(const uint16_t &float16_value) { uint32_t sign = float16_value >> 15; uint32_t exponent = (float16_value >> 10) & 0x1F; uint32_t fraction = (float16_value & 0x3FF); - uint32_t float32_value; + // Avoid strict aliasing issues and compiler warnings + uint32_t float32_value = 0; + if (exponent == 0) { if (fraction == 0) { // zero @@ -39,7 +41,7 @@ float Float16ToFloat32(const uint16_t &float16_value) { float32_value = (sign << 31) | ((exponent + (127 - 15)) << 23) | (fraction << 13); } - return *reinterpret_cast(&float32_value); + return Load(const_data_ptr_cast(&float32_value)); } } // namespace duckdb diff --git a/src/duckdb/extension/parquet/parquet_reader.cpp b/src/duckdb/extension/parquet/parquet_reader.cpp index 618cc7a91..638a0732d 100644 --- a/src/duckdb/extension/parquet/parquet_reader.cpp +++ b/src/duckdb/extension/parquet/parquet_reader.cpp @@ -1116,7 +1116,7 @@ void ParquetReader::InitializeScan(ClientContext &context, ParquetReaderScanStat state.prefetch_mode = false; } - state.file_handle = fs.OpenFile(file_handle->GetPath(), flags); + state.file_handle = fs.OpenFile(file, flags); } state.adaptive_filter.reset(); state.scan_filters.clear(); diff --git a/src/duckdb/extension/parquet/reader/struct_column_reader.cpp b/src/duckdb/extension/parquet/reader/struct_column_reader.cpp index 334837335..996c356c9 100644 --- a/src/duckdb/extension/parquet/reader/struct_column_reader.cpp +++ b/src/duckdb/extension/parquet/reader/struct_column_reader.cpp @@ -118,12 +118,21 @@ static bool TypeHasExactRowCount(const LogicalType &type) { } idx_t StructColumnReader::GroupRowsAvailable() { - for (idx_t i = 0; i < child_readers.size(); i++) { - if (TypeHasExactRowCount(child_readers[i]->Type())) { - return child_readers[i]->GroupRowsAvailable(); + for (auto &child : child_readers) { + if (!child) { + continue; + } + if (TypeHasExactRowCount(child->Type())) { + return child->GroupRowsAvailable(); + } + } + for (auto &child : child_readers) { + if (!child) { + continue; } + return child->GroupRowsAvailable(); } - return child_readers[0]->GroupRowsAvailable(); + throw InternalException("No projected columns in struct?"); } } // namespace duckdb diff --git a/src/duckdb/src/common/local_file_system.cpp b/src/duckdb/src/common/local_file_system.cpp index 2c2f17545..1ba97c09b 100644 --- a/src/duckdb/src/common/local_file_system.cpp +++ b/src/duckdb/src/common/local_file_system.cpp @@ -838,13 +838,13 @@ static string AdditionalLockInfo(const std::wstring path) { status = RmStartSession(&session, 0, session_key); if (status != ERROR_SUCCESS) { - return ""; + return string(); } PCWSTR path_ptr = path.c_str(); status = RmRegisterResources(session, 1, &path_ptr, 0, NULL, 0, NULL); if (status != ERROR_SUCCESS) { - return ""; + return string(); } UINT process_info_size_needed, process_info_size; @@ -852,7 +852,7 @@ static string AdditionalLockInfo(const std::wstring path) { process_info_size = 0; status = RmGetList(session, &process_info_size_needed, &process_info_size, NULL, &reason); if (status != ERROR_MORE_DATA || process_info_size_needed == 0) { - return ""; + return string(); } // allocate @@ -866,8 +866,7 @@ static string AdditionalLockInfo(const std::wstring path) { return ""; } - string conflict_string = "File is already open in "; - + string conflict_string; for (UINT process_idx = 0; process_idx < process_info_size; process_idx++) { string process_name = WindowsUtil::UnicodeToUTF8(process_info[process_idx].strAppName); auto pid = process_info[process_idx].Process.dwProcessId; @@ -886,7 +885,10 @@ static string AdditionalLockInfo(const std::wstring path) { } RmEndSession(session); - return conflict_string; + if (conflict_string.empty()) { + return string(); + } + return "File is already open in " + conflict_string; } bool LocalFileSystem::IsPrivateFile(const string &path_p, FileOpener *opener) { @@ -950,12 +952,11 @@ unique_ptr LocalFileSystem::OpenFile(const string &path_p, FileOpenF } auto error = LocalFileSystem::GetLastErrorAsString(); - auto better_error = AdditionalLockInfo(unicode_path); - if (!better_error.empty()) { - throw IOException(better_error); - } else { - throw IOException("Cannot open file \"%s\": %s", path.c_str(), error); + auto extended_error = AdditionalLockInfo(unicode_path); + if (!extended_error.empty()) { + extended_error = "\n" + extended_error; } + throw IOException("Cannot open file \"%s\": %s%s", path.c_str(), error, extended_error); } auto handle = make_uniq(*this, path.c_str(), hFile, flags); if (flags.OpenForAppending()) { diff --git a/src/duckdb/src/common/multi_file/multi_file_column_mapper.cpp b/src/duckdb/src/common/multi_file/multi_file_column_mapper.cpp index b55999552..df824c22d 100644 --- a/src/duckdb/src/common/multi_file/multi_file_column_mapper.cpp +++ b/src/duckdb/src/common/multi_file/multi_file_column_mapper.cpp @@ -297,6 +297,19 @@ ColumnMapResult MapColumnList(ClientContext &context, const MultiFileColumnDefin result.column_map = Value::STRUCT(std::move(child_list)); } } + if (is_selected && child_map.default_value) { + // we have default values at a previous level wrap it in a "list" + child_list_t default_type_list; + default_type_list.emplace_back("list", child_map.default_value->return_type); + vector> default_expressions; + child_map.default_value->alias = "list"; + default_expressions.push_back(std::move(child_map.default_value)); + auto default_type = LogicalType::STRUCT(std::move(default_type_list)); + auto struct_pack_fun = StructPackFun::GetFunction(); + auto bind_data = make_uniq(default_type); + result.default_value = make_uniq(std::move(default_type), std::move(struct_pack_fun), + std::move(default_expressions), std::move(bind_data)); + } result.column_index = make_uniq(local_id.GetId(), std::move(child_indexes)); result.mapping = std::move(mapping); return result; @@ -347,7 +360,7 @@ ColumnMapResult MapColumnMap(ClientContext &context, const MultiFileColumnDefini auto nested_mapper = mapper.Create(local_key_value.children); child_list_t column_mapping; - unique_ptr default_expression; + vector> default_expressions; unordered_map> selected_children; if (global_index.HasChildren()) { //! FIXME: is this expected for maps?? @@ -378,6 +391,10 @@ ColumnMapResult MapColumnMap(ClientContext &context, const MultiFileColumnDefini // found a column mapping for the component - emplace it column_mapping.emplace_back(name, std::move(map_result.column_map)); } + if (map_result.default_value) { + map_result.default_value->alias = name; + default_expressions.push_back(std::move(map_result.default_value)); + } } ColumnMapResult result; @@ -393,6 +410,18 @@ ColumnMapResult MapColumnMap(ClientContext &context, const MultiFileColumnDefini result.column_map = Value::STRUCT(std::move(child_list)); } } + if (!default_expressions.empty()) { + // we have default values at a previous level wrap it in a "list" + child_list_t default_type_list; + for (auto &expr : default_expressions) { + default_type_list.emplace_back(expr->GetAlias(), expr->return_type); + } + auto default_type = LogicalType::STRUCT(std::move(default_type_list)); + auto struct_pack_fun = StructPackFun::GetFunction(); + auto bind_data = make_uniq(default_type); + result.default_value = make_uniq(std::move(default_type), std::move(struct_pack_fun), + std::move(default_expressions), std::move(bind_data)); + } vector map_indexes; map_indexes.emplace_back(0, std::move(child_indexes)); @@ -563,11 +592,6 @@ unique_ptr ConstructMapExpression(ClientContext &context, idx_t loca } return expr; } - // struct column - generate a remap_struct - but only if we have any columns to remap - if (mapping.column_map.IsNull()) { - // no columns to map - emit the default value directly - return std::move(mapping.default_value); - } // generate the remap_struct function call vector> children; children.push_back(std::move(expr)); diff --git a/src/duckdb/src/common/types.cpp b/src/duckdb/src/common/types.cpp index d0d5041f8..c31852891 100644 --- a/src/duckdb/src/common/types.cpp +++ b/src/duckdb/src/common/types.cpp @@ -648,6 +648,34 @@ bool LogicalType::IsIntegral() const { } } +bool LogicalType::IsSigned() const { + switch (id_) { + case LogicalTypeId::TINYINT: + case LogicalTypeId::SMALLINT: + case LogicalTypeId::INTEGER: + case LogicalTypeId::BIGINT: + case LogicalTypeId::HUGEINT: + return true; + default: + break; + } + return false; +} + +bool LogicalType::IsUnsigned() const { + switch (id_) { + case LogicalTypeId::UTINYINT: + case LogicalTypeId::USMALLINT: + case LogicalTypeId::UINTEGER: + case LogicalTypeId::UBIGINT: + case LogicalTypeId::UHUGEINT: + return true; + default: + break; + } + return false; +} + bool LogicalType::IsFloating() const { switch (id_) { case LogicalTypeId::FLOAT: diff --git a/src/duckdb/src/common/types/row/tuple_data_segment.cpp b/src/duckdb/src/common/types/row/tuple_data_segment.cpp index 1b69dbf83..73660e15a 100644 --- a/src/duckdb/src/common/types/row/tuple_data_segment.cpp +++ b/src/duckdb/src/common/types/row/tuple_data_segment.cpp @@ -26,7 +26,7 @@ static inline void SwapTupleDataChunk(TupleDataChunk &a, TupleDataChunk &b) noex std::swap(a.lock, b.lock); } -TupleDataChunk::TupleDataChunk(TupleDataChunk &&other) noexcept { +TupleDataChunk::TupleDataChunk(TupleDataChunk &&other) noexcept : count(0) { SwapTupleDataChunk(*this, other); } diff --git a/src/duckdb/src/common/types/uuid.cpp b/src/duckdb/src/common/types/uuid.cpp index 16563a66c..9ab53eef9 100644 --- a/src/duckdb/src/common/types/uuid.cpp +++ b/src/duckdb/src/common/types/uuid.cpp @@ -210,7 +210,10 @@ hugeint_t UUIDv7::GenerateRandomUUID(RandomEngine &engine) { // Fill in variant field. bytes[8] = (bytes[8] & 0x3f) | 0x80; - return Convert(bytes); + // Flip the top byte + auto result = Convert(bytes); + result.upper ^= NumericLimits::Minimum(); + return result; } hugeint_t UUIDv7::GenerateRandomUUID() { diff --git a/src/duckdb/src/execution/column_binding_resolver.cpp b/src/duckdb/src/execution/column_binding_resolver.cpp index f561051c3..ff4c23afc 100644 --- a/src/duckdb/src/execution/column_binding_resolver.cpp +++ b/src/duckdb/src/execution/column_binding_resolver.cpp @@ -44,40 +44,32 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) { } // finally update the bindings with the result bindings of the join bindings = op.GetColumnBindings(); + types = op.types; return; } case LogicalOperatorType::LOGICAL_DELIM_JOIN: { auto &comp_join = op.Cast(); - // depending on whether the delim join has been flipped, get the appropriate bindings - if (comp_join.delim_flipped) { - VisitOperator(*comp_join.children[1]); - for (auto &cond : comp_join.conditions) { - VisitExpression(&cond.right); - } - } else { - VisitOperator(*comp_join.children[0]); - for (auto &cond : comp_join.conditions) { - VisitExpression(&cond.left); - } + // get bindings from the duplicate-eliminated side + auto &delim_side = comp_join.delim_flipped ? *comp_join.children[1] : *comp_join.children[0]; + VisitOperator(delim_side); + for (auto &cond : comp_join.conditions) { + auto &expr = comp_join.delim_flipped ? cond.right : cond.left; + VisitExpression(&expr); } // visit the duplicate eliminated columns for (auto &expr : comp_join.duplicate_eliminated_columns) { VisitExpression(&expr); } - // now get the other side - if (comp_join.delim_flipped) { - VisitOperator(*comp_join.children[0]); - for (auto &cond : comp_join.conditions) { - VisitExpression(&cond.left); - } - } else { - VisitOperator(*comp_join.children[1]); - for (auto &cond : comp_join.conditions) { - VisitExpression(&cond.right); - } + // now the other side + auto &other_side = comp_join.delim_flipped ? *comp_join.children[0] : *comp_join.children[1]; + VisitOperator(other_side); + for (auto &cond : comp_join.conditions) { + auto &expr = comp_join.delim_flipped ? cond.left : cond.right; + VisitExpression(&expr); } // finally update the bindings with the result bindings of the join bindings = op.GetColumnBindings(); + types = op.types; return; } case LogicalOperatorType::LOGICAL_ANY_JOIN: { @@ -86,10 +78,13 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) { // this operator VisitOperatorChildren(op); bindings = op.GetColumnBindings(); + types = op.types; auto &any_join = op.Cast(); if (any_join.join_type == JoinType::SEMI || any_join.join_type == JoinType::ANTI) { auto right_bindings = op.children[1]->GetColumnBindings(); bindings.insert(bindings.end(), right_bindings.begin(), right_bindings.end()); + auto &right_types = op.children[1]->types; + types.insert(types.end(), right_types.begin(), right_types.end()); } if (any_join.join_type == JoinType::RIGHT_SEMI || any_join.join_type == JoinType::RIGHT_ANTI) { throw InternalException("RIGHT SEMI/ANTI any join not supported yet"); @@ -102,12 +97,15 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) { // afterwards bind the expressions of the CREATE INDEX statement auto &create_index = op.Cast(); bindings = LogicalOperator::GenerateColumnBindings(0, create_index.table.GetColumns().LogicalColumnCount()); + // TODO: fill types in too (clearing skips type checks) + types.clear(); VisitOperatorExpressions(op); return; } case LogicalOperatorType::LOGICAL_GET: { //! We first need to update the current set of bindings and then visit operator expressions bindings = op.GetColumnBindings(); + types = op.types; VisitOperatorExpressions(op); return; } @@ -123,6 +121,8 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) { // Now insert our dummy bindings at the start of the bindings, // so the first 'column_count' indices of the chunk are reserved for our 'excluded' columns bindings.insert(bindings.begin(), dummy_bindings.begin(), dummy_bindings.end()); + // TODO: fill types in too (clearing skips type checks) + types.clear(); if (insert_op.on_conflict_condition) { VisitExpression(&insert_op.on_conflict_condition); } @@ -131,20 +131,25 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) { } VisitOperatorExpressions(op); bindings = op.GetColumnBindings(); + types = op.types; return; } break; } case LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR: { auto &ext_op = op.Cast(); + // Just to be very sure, we clear before and after resolving extension operator column bindings + // This skips checks, but makes sure we don't break any extension operators with type verification + types.clear(); ext_op.ResolveColumnBindings(*this, bindings); + types.clear(); return; } case LogicalOperatorType::LOGICAL_RECURSIVE_CTE: { auto &rec = op.Cast(); VisitOperatorChildren(op); bindings = op.GetColumnBindings(); - + types = op.types; for (auto &expr : rec.key_targets) { VisitExpression(&expr); } @@ -161,6 +166,7 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) { VisitOperatorExpressions(op); // finally update the current set of bindings to the current set of column bindings bindings = op.GetColumnBindings(); + types = op.types; } unique_ptr ColumnBindingResolver::VisitReplace(BoundColumnRefExpression &expr, @@ -169,6 +175,19 @@ unique_ptr ColumnBindingResolver::VisitReplace(BoundColumnRefExpress // check the current set of column bindings to see which index corresponds to the column reference for (idx_t i = 0; i < bindings.size(); i++) { if (expr.binding == bindings[i]) { + if (!types.empty()) { + if (bindings.size() != types.size()) { + throw InternalException( + "Failed to bind column reference \"%s\" [%d.%d]: inequal num bindings/types (%llu != %llu)", + expr.GetAlias(), expr.binding.table_index, expr.binding.column_index, bindings.size(), + types.size()); + } + if (expr.return_type != types[i]) { + throw InternalException("Failed to bind column reference \"%s\" [%d.%d]: inequal types (%s != %s)", + expr.GetAlias(), expr.binding.table_index, expr.binding.column_index, + expr.return_type.ToString(), types[i].ToString()); + } + } if (verify_only) { // in verification mode return nullptr; @@ -210,6 +229,7 @@ unordered_set ColumnBindingResolver::VerifyInternal(LogicalOperator &op) void ColumnBindingResolver::Verify(LogicalOperator &op) { #ifdef DEBUG + op.ResolveOperatorTypes(); ColumnBindingResolver resolver(true); resolver.VisitOperator(op); VerifyInternal(op); diff --git a/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp b/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp index 7ec754d4e..920613a6c 100644 --- a/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +++ b/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp @@ -4,7 +4,6 @@ #include "duckdb/execution/expression_executor.hpp" #include "duckdb/function/aggregate_function.hpp" #include "duckdb/parallel/thread_context.hpp" -#include "duckdb/planner/expression/bound_reference_expression.hpp" #include "duckdb/planner/expression/bound_window_expression.hpp" namespace duckdb { @@ -413,6 +412,7 @@ void StreamingWindowState::AggregateState::Execute(ExecutionContext &context, Da // Compute the arguments auto &arg_chunk = aggr_state.arg_chunk; + arg_chunk.Reset(); executor.Execute(input, arg_chunk); arg_chunk.Flatten(); diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp index cbed64993..086b6145b 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp @@ -26,7 +26,9 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m : ScannerResult(states, state_machine, result_size_p), number_of_columns(NumericCast(state_machine.dialect_options.num_cols)), null_padding(state_machine.options.null_padding), ignore_errors(state_machine.options.ignore_errors.GetValue()), - extra_delimiter_bytes(state_machine.dialect_options.state_machine_options.delimiter.GetValue().size() - 1), + extra_delimiter_bytes(state_machine.dialect_options.state_machine_options.delimiter.GetValue().empty() + ? 0 + : state_machine.dialect_options.state_machine_options.delimiter.GetValue().size() - 1), error_handler(error_hander_p), iterator(iterator_p), store_line_size(store_line_size_p), csv_file_scan(std::move(csv_file_scan_p)), lines_read(lines_read_p), current_errors(scan_id, state_machine.options.IgnoreErrors()), sniffing(sniffing_p), path(std::move(path_p)) { @@ -565,6 +567,22 @@ void StringValueResult::AddPossiblyEscapedValue(StringValueResult &result, const result.cur_col_id++; result.chunk_col_id++; } else { + if (result.parse_chunk.data[result.chunk_col_id].GetType() != LogicalType::VARCHAR) { + // We cant have escapes on non varchar columns + result.current_errors.Insert(CAST_ERROR, result.cur_col_id, result.chunk_col_id, + result.last_position); + if (!result.state_machine.options.IgnoreErrors()) { + // We have to write the cast error message. + std::ostringstream error; + // Casting Error Message + error << "Could not convert string \"" << std::string(value_ptr, length) << "\" to \'" + << LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id) << "\'"; + auto error_string = error.str(); + FullLinePosition::SanitizeError(error_string); + result.current_errors.ModifyErrorMessageOfLastError(error_string); + } + return; + } auto value = StringValueScanner::RemoveEscape( value_ptr, length, result.state_machine.dialect_options.state_machine_options.escape.GetValue(), result.state_machine.dialect_options.state_machine_options.quote.GetValue(), @@ -1446,6 +1464,23 @@ void StringValueScanner::ProcessOverBufferValue() { if (result.escaped) { if (!result.HandleTooManyColumnsError(over_buffer_string.c_str(), over_buffer_string.size())) { const auto str_ptr = over_buffer_string.c_str() + result.quoted_position; + if (result.parse_chunk.data[result.chunk_col_id].GetType() != LogicalType::VARCHAR) { + // We cant have escapes on non varchar columns + result.current_errors.Insert(CAST_ERROR, result.cur_col_id, result.chunk_col_id, + result.last_position); + if (!result.state_machine.options.IgnoreErrors()) { + // We have to write the cast error message. + std::ostringstream error; + // Casting Error Message + error << "Could not convert string \"" + << std::string(over_buffer_string.c_str(), over_buffer_string.size()) << "\" to \'" + << LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id) << "\'"; + auto error_string = error.str(); + FullLinePosition::SanitizeError(error_string); + result.current_errors.ModifyErrorMessageOfLastError(error_string); + } + return; + } value = RemoveEscape(str_ptr, over_buffer_string.size() - 2, state_machine->dialect_options.state_machine_options.escape.GetValue(), @@ -1457,6 +1492,23 @@ void StringValueScanner::ProcessOverBufferValue() { } else { value = string_t(over_buffer_string.c_str(), UnsafeNumericCast(over_buffer_string.size())); if (result.escaped) { + if (result.parse_chunk.data[result.chunk_col_id].GetType() != LogicalType::VARCHAR) { + // We cant have escapes on non varchar columns + result.current_errors.Insert(CAST_ERROR, result.cur_col_id, result.chunk_col_id, + result.last_position); + if (!result.state_machine.options.IgnoreErrors()) { + // We have to write the cast error message. + std::ostringstream error; + // Casting Error Message + error << "Could not convert string \"" + << std::string(over_buffer_string.c_str(), over_buffer_string.size()) << "\" to \'" + << LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id) << "\'"; + auto error_string = error.str(); + FullLinePosition::SanitizeError(error_string); + result.current_errors.ModifyErrorMessageOfLastError(error_string); + } + return; + } if (!result.HandleTooManyColumnsError(over_buffer_string.c_str(), over_buffer_string.size())) { value = RemoveEscape(over_buffer_string.c_str(), over_buffer_string.size(), diff --git a/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp b/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp index 69ba3d252..0dac4bf55 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp @@ -118,6 +118,7 @@ class FixedBatchCopyGlobalState : public GlobalSinkState { op.function.copy_to_get_written_statistics(context, *op.bind_data, *global_state, *written_file_info->file_stats); } + initialized = true; } void AddBatchData(idx_t batch_index, unique_ptr new_batch, idx_t memory_usage) { diff --git a/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp b/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp index 33117db51..ede8495be 100644 --- a/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +++ b/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp @@ -408,12 +408,9 @@ void CheckDirectory(FileSystem &fs, const string &file_path, CopyOverwriteMode o unique_ptr PhysicalCopyToFile::GetGlobalSinkState(ClientContext &context) const { if (partition_output || per_thread_output || rotate) { auto &fs = FileSystem::GetFileSystem(context); - if (fs.FileExists(file_path)) { - // the target file exists AND is a file (not a directory) - if (fs.IsRemoteFile(file_path)) { - // for remote files we cannot do anything - as we cannot delete the file - throw IOException("Cannot write to \"%s\" - it exists and is a file, not a directory!", file_path); - } else { + if (!fs.IsRemoteFile(file_path)) { + if (fs.FileExists(file_path)) { + // the target file exists AND is a file (not a directory) // for local files we can remove the file if OVERWRITE_OR_IGNORE is enabled if (overwrite_mode == CopyOverwriteMode::COPY_OVERWRITE) { fs.RemoveFile(file_path); diff --git a/src/duckdb/src/execution/physical_plan_generator.cpp b/src/duckdb/src/execution/physical_plan_generator.cpp index eb14ba813..9f5f66dca 100644 --- a/src/duckdb/src/execution/physical_plan_generator.cpp +++ b/src/duckdb/src/execution/physical_plan_generator.cpp @@ -28,17 +28,17 @@ unique_ptr PhysicalPlanGenerator::Plan(unique_ptr PhysicalOperator &PhysicalPlanGenerator::ResolveAndPlan(unique_ptr op) { auto &profiler = QueryProfiler::Get(context); + // Resolve the types of each operator. + profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_RESOLVE_TYPES); + op->ResolveOperatorTypes(); + profiler.EndPhase(); + // Resolve the column references. profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_COLUMN_BINDING); ColumnBindingResolver resolver; resolver.VisitOperator(*op); profiler.EndPhase(); - // Resolve the types of each operator. - profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_RESOLVE_TYPES); - op->ResolveOperatorTypes(); - profiler.EndPhase(); - // Create the main physical plan. profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_CREATE_PLAN); physical_plan = PlanInternal(*op); diff --git a/src/duckdb/src/function/cast/vector_cast_helpers.cpp b/src/duckdb/src/function/cast/vector_cast_helpers.cpp index 444c37c5e..b8071f6b8 100644 --- a/src/duckdb/src/function/cast/vector_cast_helpers.cpp +++ b/src/duckdb/src/function/cast/vector_cast_helpers.cpp @@ -124,7 +124,8 @@ static string_t HandleString(Vector &vec, const char *buf, idx_t start, idx_t en bool escaped = false; bool quoted = false; - char quote_char; + // Satisfy GCC warning about uninitialized variable + char quote_char = '\0'; stack scopes; for (idx_t i = 0; i < length; i++) { auto current_char = buf[start + i]; diff --git a/src/duckdb/src/function/function.cpp b/src/duckdb/src/function/function.cpp index f434a1b84..7c9ea0cd7 100644 --- a/src/duckdb/src/function/function.cpp +++ b/src/duckdb/src/function/function.cpp @@ -36,6 +36,10 @@ bool TableFunctionData::Equals(const FunctionData &other) const { return false; } +bool FunctionData::SupportStatementCache() const { + return true; +} + Function::Function(string name_p) : name(std::move(name_p)) { } Function::~Function() { diff --git a/src/duckdb/src/function/scalar/struct/remap_struct.cpp b/src/duckdb/src/function/scalar/struct/remap_struct.cpp index be60c0fda..ad4ffb55b 100644 --- a/src/duckdb/src/function/scalar/struct/remap_struct.cpp +++ b/src/duckdb/src/function/scalar/struct/remap_struct.cpp @@ -401,6 +401,9 @@ struct RemapEntry { auto &child_types = StructType::GetChildTypes(default_type); for (idx_t child_idx = 0; child_idx < child_types.size(); child_idx++) { auto &child_default = child_types[child_idx]; + if (!result_entry->second.child_remaps || !entry->second.child_map) { + throw BinderException("No child remaps found"); + } HandleDefault(child_idx, child_default.first, child_default.second, *entry->second.child_map, *result_entry->second.child_remaps); } @@ -542,6 +545,10 @@ static unique_ptr RemapStructBind(ClientContext &context, ScalarFu if (arg->return_type.id() == LogicalTypeId::UNKNOWN) { throw ParameterNotResolvedException(); } + if (arg->return_type.id() == LogicalTypeId::SQLNULL && arg_idx == 2) { + // remap target can be NULL + continue; + } if (!arg->return_type.IsNested()) { throw BinderException("Struct remap can only remap nested types, not '%s'", arg->return_type.ToString()); } else if (arg->return_type.id() == LogicalTypeId::STRUCT && StructType::IsUnnamed(arg->return_type)) { @@ -571,11 +578,11 @@ static unique_ptr RemapStructBind(ClientContext &context, ScalarFu auto target_map = RemapIndex::GetMap(to_type); Value remap_val = ExpressionExecutor::EvaluateScalar(context, *arguments[2]); - auto &remap_types = StructType::GetChildTypes(arguments[2]->return_type); // (recursively) generate the remap entries case_insensitive_map_t remap_map; if (!remap_val.IsNull()) { + auto &remap_types = StructType::GetChildTypes(arguments[2]->return_type); auto &remap_values = StructValue::GetChildren(remap_val); for (idx_t remap_idx = 0; remap_idx < remap_values.size(); remap_idx++) { auto &remap_val = remap_values[remap_idx]; diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index dbbe6f6bc..0905aac15 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "0" +#define DUCKDB_PATCH_VERSION "1-dev69" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 3 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.3.0" +#define DUCKDB_VERSION "v1.3.1-dev69" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "71c5c07cdd" +#define DUCKDB_SOURCE_ID "51070f2fb6" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp index a8b755ffe..681abbd57 100644 --- a/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp +++ b/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp @@ -84,6 +84,9 @@ struct MultiFileBindData : public TableFunctionData { void Initialize(ClientContext &, BaseUnionData &union_data) { Initialize(std::move(union_data.reader)); } + bool SupportStatementCache() const override { + return false; + } unique_ptr Copy() const override; }; diff --git a/src/duckdb/src/include/duckdb/common/types.hpp b/src/duckdb/src/include/duckdb/common/types.hpp index ef7db7f74..75f48668d 100644 --- a/src/duckdb/src/include/duckdb/common/types.hpp +++ b/src/duckdb/src/include/duckdb/common/types.hpp @@ -349,6 +349,9 @@ struct LogicalType { DUCKDB_API void Verify() const; + DUCKDB_API bool IsSigned() const; + DUCKDB_API bool IsUnsigned() const; + DUCKDB_API bool IsValid() const; DUCKDB_API bool IsComplete() const; diff --git a/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp b/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp index f98aeb2cf..f24dfd96f 100644 --- a/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp +++ b/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp @@ -26,6 +26,7 @@ class ColumnBindingResolver : public LogicalOperatorVisitor { protected: vector bindings; + vector types; bool verify_only; unique_ptr VisitReplace(BoundColumnRefExpression &expr, unique_ptr *expr_ptr) override; diff --git a/src/duckdb/src/include/duckdb/function/function.hpp b/src/duckdb/src/include/duckdb/function/function.hpp index cdb407f10..587216421 100644 --- a/src/duckdb/src/include/duckdb/function/function.hpp +++ b/src/duckdb/src/include/duckdb/function/function.hpp @@ -61,6 +61,7 @@ struct FunctionData { DUCKDB_API virtual unique_ptr Copy() const = 0; DUCKDB_API virtual bool Equals(const FunctionData &other) const = 0; DUCKDB_API static bool Equals(const FunctionData *left, const FunctionData *right); + DUCKDB_API virtual bool SupportStatementCache() const; template TARGET &Cast() { diff --git a/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp b/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp index 644c45aeb..4366d6cd9 100644 --- a/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +++ b/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp @@ -222,6 +222,7 @@ class DefaultSecretGenerator : public DefaultGenerator { unique_ptr CreateDefaultEntryInternal(const string &entry_name); SecretManager &secret_manager; + mutex lock; case_insensitive_set_t persistent_secrets; }; diff --git a/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp b/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp index aab077005..29a4dbeaa 100644 --- a/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +++ b/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp @@ -8,6 +8,7 @@ #pragma once +#include "duckdb/main/client_context.hpp" #include "duckdb/common/constants.hpp" namespace duckdb { @@ -17,13 +18,18 @@ class Optimizer; class TopN { public: + explicit TopN(ClientContext &context); + //! Optimize ORDER BY + LIMIT to TopN unique_ptr Optimize(unique_ptr op); //! Whether we can perform the optimization on this operator - static bool CanOptimize(LogicalOperator &op); + static bool CanOptimize(LogicalOperator &op, optional_ptr context = nullptr); private: void PushdownDynamicFilters(LogicalTopN &op); + +private: + ClientContext &context; }; } // namespace duckdb diff --git a/src/duckdb/src/main/secret/secret_manager.cpp b/src/duckdb/src/main/secret/secret_manager.cpp index 67246be18..0a1c6c49a 100644 --- a/src/duckdb/src/main/secret/secret_manager.cpp +++ b/src/duckdb/src/main/secret/secret_manager.cpp @@ -646,6 +646,7 @@ DefaultSecretGenerator::DefaultSecretGenerator(Catalog &catalog, SecretManager & } unique_ptr DefaultSecretGenerator::CreateDefaultEntryInternal(const string &entry_name) { + lock_guard guard(lock); auto secret_lu = persistent_secrets.find(entry_name); if (secret_lu == persistent_secrets.end()) { return nullptr; @@ -718,6 +719,7 @@ unique_ptr DefaultSecretGenerator::CreateDefaultEntry(ClientContex vector DefaultSecretGenerator::GetDefaultEntries() { vector ret; + lock_guard guard(lock); for (const auto &res : persistent_secrets) { ret.push_back(res); } diff --git a/src/duckdb/src/main/settings/custom_settings.cpp b/src/duckdb/src/main/settings/custom_settings.cpp index 2f3d90b8f..5f779908c 100644 --- a/src/duckdb/src/main/settings/custom_settings.cpp +++ b/src/duckdb/src/main/settings/custom_settings.cpp @@ -38,6 +38,18 @@ const string GetDefaultUserAgent() { return StringUtil::Format("duckdb/%s(%s)", DuckDB::LibraryVersion(), DuckDB::Platform()); } +namespace { + +template +static DatabaseInstance &GetDB(DatabaseInstance *db) { + if (!db) { + throw InvalidInputException("Cannot change/set %s before the database is started", T::Name); + } + return *db; +} + +} // namespace + //===----------------------------------------------------------------------===// // Access Mode //===----------------------------------------------------------------------===// @@ -554,20 +566,16 @@ Value DisabledCompressionMethodsSetting::GetSetting(const ClientContext &context //===----------------------------------------------------------------------===// // Disabled Filesystems //===----------------------------------------------------------------------===// -void DisabledFilesystemsSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { - if (!db) { - throw InternalException("disabled_filesystems can only be set in an active database"); - } - auto &fs = FileSystem::GetFileSystem(*db); +void DisabledFilesystemsSetting::SetGlobal(DatabaseInstance *db_p, DBConfig &config, const Value &input) { + auto &db = GetDB(db_p); + auto &fs = FileSystem::GetFileSystem(db); auto list = StringUtil::Split(input.ToString(), ","); fs.SetDisabledFileSystems(list); } -void DisabledFilesystemsSetting::ResetGlobal(DatabaseInstance *db, DBConfig &config) { - if (!db) { - throw InternalException("disabled_filesystems can only be set in an active database"); - } - auto &fs = FileSystem::GetFileSystem(*db); +void DisabledFilesystemsSetting::ResetGlobal(DatabaseInstance *db_p, DBConfig &config) { + auto &db = GetDB(db_p); + auto &fs = FileSystem::GetFileSystem(db); fs.SetDisabledFileSystems(vector()); } @@ -692,12 +700,14 @@ Value EnableExternalFileCacheSetting::GetSetting(const ClientContext &context) { Value EnableLogging::GetSetting(const ClientContext &context) { return context.db->GetLogManager().GetConfig().enabled; } -void EnableLogging::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter) { - db->GetLogManager().SetEnableLogging(parameter.GetValue()); +void EnableLogging::SetGlobal(DatabaseInstance *db_p, DBConfig &config, const Value ¶meter) { + auto &db = GetDB(db_p); + db.GetLogManager().SetEnableLogging(parameter.GetValue()); } -void EnableLogging::ResetGlobal(DatabaseInstance *db, DBConfig &config) { - db->GetLogManager().SetEnableLogging(false); +void EnableLogging::ResetGlobal(DatabaseInstance *db_p, DBConfig &config) { + auto &db = GetDB(db_p); + db.GetLogManager().SetEnableLogging(false); } //===----------------------------------------------------------------------===// @@ -706,12 +716,14 @@ void EnableLogging::ResetGlobal(DatabaseInstance *db, DBConfig &config) { Value LoggingMode::GetSetting(const ClientContext &context) { return EnumUtil::ToString(context.db->GetLogManager().GetConfig().mode); } -void LoggingMode::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter) { - db->GetLogManager().SetLogMode(EnumUtil::FromString(parameter.GetValue())); +void LoggingMode::SetGlobal(DatabaseInstance *db_p, DBConfig &config, const Value ¶meter) { + auto &db = GetDB(db_p); + db.GetLogManager().SetLogMode(EnumUtil::FromString(parameter.GetValue())); } -void LoggingMode::ResetGlobal(DatabaseInstance *db, DBConfig &config) { - db->GetLogManager().SetLogMode(LogMode::LEVEL_ONLY); +void LoggingMode::ResetGlobal(DatabaseInstance *db_p, DBConfig &config) { + auto &db = GetDB(db_p); + db.GetLogManager().SetLogMode(LogMode::LEVEL_ONLY); } //===----------------------------------------------------------------------===// @@ -720,12 +732,14 @@ void LoggingMode::ResetGlobal(DatabaseInstance *db, DBConfig &config) { Value LoggingLevel::GetSetting(const ClientContext &context) { return EnumUtil::ToString(context.db->GetLogManager().GetConfig().level); } -void LoggingLevel::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter) { - db->GetLogManager().SetLogLevel(EnumUtil::FromString(parameter.GetValue())); +void LoggingLevel::SetGlobal(DatabaseInstance *db_p, DBConfig &config, const Value ¶meter) { + auto &db = GetDB(db_p); + db.GetLogManager().SetLogLevel(EnumUtil::FromString(parameter.GetValue())); } -void LoggingLevel::ResetGlobal(DatabaseInstance *db, DBConfig &config) { - db->GetLogManager().SetLogLevel(LogConfig::DEFAULT_LOG_LEVEL); +void LoggingLevel::ResetGlobal(DatabaseInstance *db_p, DBConfig &config) { + auto &db = GetDB(db_p); + db.GetLogManager().SetLogLevel(LogConfig::DEFAULT_LOG_LEVEL); } //===----------------------------------------------------------------------===// @@ -734,12 +748,14 @@ void LoggingLevel::ResetGlobal(DatabaseInstance *db, DBConfig &config) { Value LoggingStorage::GetSetting(const ClientContext &context) { return context.db->GetLogManager().GetConfig().storage; } -void LoggingStorage::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter) { - db->GetLogManager().SetLogStorage(*db, parameter.GetValue()); +void LoggingStorage::SetGlobal(DatabaseInstance *db_p, DBConfig &config, const Value ¶meter) { + auto &db = GetDB(db_p); + db.GetLogManager().SetLogStorage(db, parameter.GetValue()); } -void LoggingStorage::ResetGlobal(DatabaseInstance *db, DBConfig &config) { - db->GetLogManager().SetLogStorage(*db, LogConfig::DEFAULT_LOG_STORAGE); +void LoggingStorage::ResetGlobal(DatabaseInstance *db_p, DBConfig &config) { + auto &db = GetDB(db_p); + db.GetLogManager().SetLogStorage(db, LogConfig::DEFAULT_LOG_STORAGE); } //===----------------------------------------------------------------------===// @@ -752,18 +768,21 @@ Value EnabledLogTypes::GetSetting(const ClientContext &context) { } return StringUtil::Join(loggers, ","); } -void EnabledLogTypes::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter) { +void EnabledLogTypes::SetGlobal(DatabaseInstance *db_p, DBConfig &config, const Value ¶meter) { + auto &db = GetDB(db_p); + auto values = StringUtil::Split(parameter.GetValue(), ","); unordered_set set; for (const auto &value : values) { set.insert(value); } - db->GetLogManager().SetEnabledLogTypes(set); + db.GetLogManager().SetEnabledLogTypes(set); } -void EnabledLogTypes::ResetGlobal(DatabaseInstance *db, DBConfig &config) { +void EnabledLogTypes::ResetGlobal(DatabaseInstance *db_p, DBConfig &config) { + auto &db = GetDB(db_p); unordered_set set; - db->GetLogManager().SetEnabledLogTypes(set); + db.GetLogManager().SetEnabledLogTypes(set); } //===----------------------------------------------------------------------===// @@ -776,18 +795,20 @@ Value DisabledLogTypes::GetSetting(const ClientContext &context) { } return StringUtil::Join(loggers, ","); } -void DisabledLogTypes::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter) { +void DisabledLogTypes::SetGlobal(DatabaseInstance *db_p, DBConfig &config, const Value ¶meter) { + auto &db = GetDB(db_p); auto values = StringUtil::Split(parameter.GetValue(), ","); unordered_set set; for (const auto &value : values) { set.insert(value); } - db->GetLogManager().SetDisabledLogTypes(set); + db.GetLogManager().SetDisabledLogTypes(set); } -void DisabledLogTypes::ResetGlobal(DatabaseInstance *db, DBConfig &config) { +void DisabledLogTypes::ResetGlobal(DatabaseInstance *db_p, DBConfig &config) { + auto &db = GetDB(db_p); unordered_set set; - db->GetLogManager().SetDisabledLogTypes(set); + db.GetLogManager().SetDisabledLogTypes(set); } //===----------------------------------------------------------------------===// diff --git a/src/duckdb/src/optimizer/optimizer.cpp b/src/duckdb/src/optimizer/optimizer.cpp index 8c16e83a6..2e10efeba 100644 --- a/src/duckdb/src/optimizer/optimizer.cpp +++ b/src/duckdb/src/optimizer/optimizer.cpp @@ -224,7 +224,7 @@ void Optimizer::RunBuiltInOptimizers() { // transform ORDER BY + LIMIT to TopN RunOptimizer(OptimizerType::TOP_N, [&]() { - TopN topn; + TopN topn(context); plan = topn.Optimize(std::move(plan)); }); diff --git a/src/duckdb/src/optimizer/topn_optimizer.cpp b/src/duckdb/src/optimizer/topn_optimizer.cpp index bc5f75ae0..e42c748cb 100644 --- a/src/duckdb/src/optimizer/topn_optimizer.cpp +++ b/src/duckdb/src/optimizer/topn_optimizer.cpp @@ -14,7 +14,10 @@ namespace duckdb { -bool TopN::CanOptimize(LogicalOperator &op) { +TopN::TopN(ClientContext &context_p) : context(context_p) { +} + +bool TopN::CanOptimize(LogicalOperator &op, optional_ptr context) { if (op.type == LogicalOperatorType::LOGICAL_LIMIT) { auto &limit = op.Cast(); @@ -28,14 +31,21 @@ bool TopN::CanOptimize(LogicalOperator &op) { } auto child_op = op.children[0].get(); + if (context) { + // estimate child cardinality if the context is available + child_op->EstimateCardinality(*context); + } - auto constant_limit = static_cast(limit.limit_val.GetConstantValue()); - auto child_card = static_cast(child_op->estimated_cardinality); + if (child_op->has_estimated_cardinality) { + // only check if we should switch to full sorting if we have estimated cardinality + auto constant_limit = static_cast(limit.limit_val.GetConstantValue()); + auto child_card = static_cast(child_op->estimated_cardinality); - // if the child cardinality is not 98 times more than the - bool limit_is_large = constant_limit > 5000; - if (constant_limit > child_card * 0.007 && limit_is_large) { - return false; + // if the limit is > 0.7% of the child cardinality, sorting the whole table is faster + bool limit_is_large = constant_limit > 5000; + if (constant_limit > child_card * 0.007 && limit_is_large) { + return false; + } } while (child_op->type == LogicalOperatorType::LOGICAL_PROJECTION) { @@ -116,7 +126,7 @@ void TopN::PushdownDynamicFilters(LogicalTopN &op) { } unique_ptr TopN::Optimize(unique_ptr op) { - if (CanOptimize(*op)) { + if (CanOptimize(*op, &context)) { vector> projections; diff --git a/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp b/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp index 75b476c6f..5f3af038b 100644 --- a/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +++ b/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp @@ -298,6 +298,9 @@ unique_ptr Binder::Bind(BaseTableRef &ref) { unique_ptr bind_data; auto scan_function = table.GetScanFunction(context, bind_data, table_lookup); + if (bind_data && !bind_data->SupportStatementCache()) { + SetAlwaysRequireRebind(); + } // TODO: bundle the type and name vector in a struct (e.g PackedColumnMetadata) vector table_types; vector table_names; diff --git a/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp b/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp index 0864d95c7..905998854 100644 --- a/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +++ b/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp @@ -240,6 +240,9 @@ unique_ptr Binder::BindTableFunctionInternal(TableFunction &tab throw InvalidInputException("Cannot call function \"%s\" directly - it has no bind function", table_function.name); } + if (bind_data && !bind_data->SupportStatementCache()) { + SetAlwaysRequireRebind(); + } if (return_types.size() != return_names.size()) { throw InternalException("Failed to bind \"%s\": return_types/names must have same size", table_function.name); } diff --git a/src/duckdb/src/planner/expression/bound_cast_expression.cpp b/src/duckdb/src/planner/expression/bound_cast_expression.cpp index 4168bf3e5..9419a8db0 100644 --- a/src/duckdb/src/planner/expression/bound_cast_expression.cpp +++ b/src/duckdb/src/planner/expression/bound_cast_expression.cpp @@ -191,6 +191,9 @@ bool BoundCastExpression::CastIsInvertible(const LogicalType &source_type, const return false; } } + if (source_type.IsSigned() && target_type.IsUnsigned()) { + return false; + } return true; } diff --git a/src/duckdb/src/planner/expression/bound_columnref_expression.cpp b/src/duckdb/src/planner/expression/bound_columnref_expression.cpp index 1c7b15365..36ac805a3 100644 --- a/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +++ b/src/duckdb/src/planner/expression/bound_columnref_expression.cpp @@ -37,7 +37,7 @@ bool BoundColumnRefExpression::Equals(const BaseExpression &other_p) const { string BoundColumnRefExpression::GetName() const { #ifdef DEBUG if (DBConfigOptions::debug_print_bindings) { - return binding.ToString(); + return StringUtil::Format("%s (%s)", binding.ToString(), return_type.ToString()); } #endif return Expression::GetName(); diff --git a/src/duckdb/src/planner/expression/bound_reference_expression.cpp b/src/duckdb/src/planner/expression/bound_reference_expression.cpp index d96221e9b..a163b7c35 100644 --- a/src/duckdb/src/planner/expression/bound_reference_expression.cpp +++ b/src/duckdb/src/planner/expression/bound_reference_expression.cpp @@ -17,7 +17,7 @@ BoundReferenceExpression::BoundReferenceExpression(LogicalType type, storage_t i string BoundReferenceExpression::ToString() const { #ifdef DEBUG if (DBConfigOptions::debug_print_bindings) { - return "#" + to_string(index); + return StringUtil::Format("#%llu (%s)", index, return_type.ToString()); } #endif if (!alias.empty()) { diff --git a/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp b/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp index 607a0d065..58b2b5474 100644 --- a/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +++ b/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp @@ -423,7 +423,8 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal } base_binding.table_index = proj.table_index; - this->delim_offset = base_binding.column_index = plan->expressions.size() - correlated_columns.size(); + base_binding.column_index = plan->expressions.size() - correlated_columns.size(); + this->delim_offset = base_binding.column_index; this->data_offset = 0; return plan; } @@ -656,6 +657,7 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal // push into both sides plan->children[0] = PushDownDependentJoinInternal(std::move(plan->children[0]), parent_propagate_null_values, lateral_depth); + auto left_delim_offset = delim_offset; auto left_binding = this->base_binding; plan->children[1] = PushDownDependentJoinInternal(std::move(plan->children[1]), parent_propagate_null_values, lateral_depth); @@ -665,6 +667,7 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal // because the RIGHT binding might contain NULL values if (join.join_type == JoinType::LEFT) { this->base_binding = left_binding; + delim_offset = left_delim_offset; } else if (join.join_type == JoinType::RIGHT) { this->base_binding = right_binding; delim_offset += plan->children[0]->GetColumnBindings().size(); diff --git a/src/duckdb/src/storage/caching_file_system.cpp b/src/duckdb/src/storage/caching_file_system.cpp index d6781f34a..0fea0b017 100644 --- a/src/duckdb/src/storage/caching_file_system.cpp +++ b/src/duckdb/src/storage/caching_file_system.cpp @@ -113,7 +113,7 @@ BufferHandle CachingFileHandle::Read(data_ptr_t &buffer, idx_t &nr_bytes) { // If we can't seek, we can't use the cache for these calls, // because we won't be able to seek over any parts we skipped by reading from the cache - if (!external_file_cache.IsEnabled() || !GetFileHandle().CanSeek()) { + if (!external_file_cache.IsEnabled() || !CanSeek()) { result = external_file_cache.GetBufferManager().Allocate(MemoryTag::EXTERNAL_FILE_CACHE, nr_bytes); buffer = result.Ptr(); nr_bytes = NumericCast(GetFileHandle().Read(buffer, nr_bytes)); @@ -285,11 +285,16 @@ BufferHandle CachingFileHandle::TryInsertFileRange(BufferHandle &pin, data_ptr_t auto &ranges = cached_file.Ranges(guard); // Start at lower_bound (first range with location not less than location of newly created range) + const auto this_end = location + nr_bytes; auto it = ranges.lower_bound(location); if (it != ranges.begin()) { --it; } while (it != ranges.end()) { + if (it->second->location >= this_end) { + // We're past the requested location + break; + } if (it->second->GetOverlap(*new_file_range) == CachedFileRangeOverlap::FULL) { // Another thread has read a range that fully contains the requested range in the meantime auto other_pin = TryReadFromFileRange(guard, *it->second, buffer, nr_bytes, location); @@ -300,11 +305,9 @@ BufferHandle CachingFileHandle::TryInsertFileRange(BufferHandle &pin, data_ptr_t continue; } // Check if the new range overlaps with a cached one - bool break_loop = false; switch (new_file_range->GetOverlap(*it->second)) { case CachedFileRangeOverlap::NONE: - break_loop = true; // We iterated past potential overlaps - break; + break; // No overlap, still useful case CachedFileRangeOverlap::PARTIAL: break; // The newly created range does not fully contain this range, so it is still useful case CachedFileRangeOverlap::FULL: @@ -315,9 +318,6 @@ BufferHandle CachingFileHandle::TryInsertFileRange(BufferHandle &pin, data_ptr_t default: throw InternalException("Unknown CachedFileRangeOverlap"); } - if (break_loop) { - break; - } ++it; } diff --git a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp index 27e73a2cc..997d11b33 100644 --- a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +++ b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp @@ -350,15 +350,15 @@ #include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" + #include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" #include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" #include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" - -#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp"