diff --git a/src/duckdb/extension/icu/icu-timezone.cpp b/src/duckdb/extension/icu/icu-timezone.cpp index be1a8d986..b17853c95 100644 --- a/src/duckdb/extension/icu/icu-timezone.cpp +++ b/src/duckdb/extension/icu/icu-timezone.cpp @@ -185,16 +185,20 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc { } } + static void AddCast(CastFunctionSet &casts, const LogicalType &source, const LogicalType &target) { + const auto implicit_cost = CastRules::ImplicitCast(source, target); + casts.RegisterCastFunction(source, target, BindCastFromNaive, implicit_cost); + } + static void AddCasts(DatabaseInstance &db) { auto &config = DBConfig::GetConfig(db); auto &casts = config.GetCastFunctions(); - const auto implicit_cost = CastRules::ImplicitCast(LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ); - casts.RegisterCastFunction(LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ, BindCastFromNaive, implicit_cost); - casts.RegisterCastFunction(LogicalType::TIMESTAMP_MS, LogicalType::TIMESTAMP_TZ, BindCastFromNaive); - casts.RegisterCastFunction(LogicalType::TIMESTAMP_NS, LogicalType::TIMESTAMP_TZ, BindCastFromNaive); - casts.RegisterCastFunction(LogicalType::TIMESTAMP_S, LogicalType::TIMESTAMP_TZ, BindCastFromNaive); - casts.RegisterCastFunction(LogicalType::DATE, LogicalType::TIMESTAMP_TZ, BindCastFromNaive); + AddCast(casts, LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ); + AddCast(casts, LogicalType::TIMESTAMP_MS, LogicalType::TIMESTAMP_TZ); + AddCast(casts, LogicalType::TIMESTAMP_NS, LogicalType::TIMESTAMP_TZ); + AddCast(casts, LogicalType::TIMESTAMP_S, LogicalType::TIMESTAMP_TZ); + AddCast(casts, LogicalType::DATE, LogicalType::TIMESTAMP_TZ); } }; diff --git a/src/duckdb/extension/parquet/column_reader.cpp b/src/duckdb/extension/parquet/column_reader.cpp index e74912c80..6b6568407 100644 --- a/src/duckdb/extension/parquet/column_reader.cpp +++ b/src/duckdb/extension/parquet/column_reader.cpp @@ -763,6 +763,8 @@ unique_ptr CreateDecimalReader(ParquetReader &reader, const Parque return make_uniq>>(reader, schema); case PhysicalType::INT64: return make_uniq>>(reader, schema); + case PhysicalType::INT128: + return make_uniq>>(reader, schema); default: throw NotImplementedException("Unimplemented internal type for CreateDecimalReader"); } diff --git a/src/duckdb/extension/parquet/include/parquet_writer.hpp b/src/duckdb/extension/parquet/include/parquet_writer.hpp index 66aecf8c1..b5292aad1 100644 --- a/src/duckdb/extension/parquet/include/parquet_writer.hpp +++ b/src/duckdb/extension/parquet/include/parquet_writer.hpp @@ -82,8 +82,9 @@ class ParquetWriter { vector names, duckdb_parquet::CompressionCodec::type codec, ChildFieldIDs field_ids, const vector> &kv_metadata, shared_ptr encryption_config, idx_t dictionary_size_limit, - idx_t string_dictionary_page_size_limit, double bloom_filter_false_positive_ratio, - int64_t compression_level, bool debug_use_openssl, ParquetVersion parquet_version); + idx_t string_dictionary_page_size_limit, bool enable_bloom_filters, + double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl, + ParquetVersion parquet_version); ~ParquetWriter(); public: @@ -122,6 +123,9 @@ class ParquetWriter { idx_t StringDictionaryPageSizeLimit() const { return string_dictionary_page_size_limit; } + double EnableBloomFilters() const { + return enable_bloom_filters; + } double BloomFilterFalsePositiveRatio() const { return bloom_filter_false_positive_ratio; } @@ -164,6 +168,7 @@ class ParquetWriter { shared_ptr encryption_config; idx_t dictionary_size_limit; idx_t string_dictionary_page_size_limit; + bool enable_bloom_filters; double bloom_filter_false_positive_ratio; int64_t compression_level; bool debug_use_openssl; diff --git a/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp b/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp index ff2bc270e..392c2d815 100644 --- a/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +++ b/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp @@ -284,15 +284,19 @@ class StandardColumnWriter : public PrimitiveColumnWriter { auto &state = state_p.Cast>(); D_ASSERT(state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY); - state.bloom_filter = - make_uniq(state.dictionary.GetSize(), writer.BloomFilterFalsePositiveRatio()); + if (writer.EnableBloomFilters()) { + state.bloom_filter = + make_uniq(state.dictionary.GetSize(), writer.BloomFilterFalsePositiveRatio()); + } state.dictionary.IterateValues([&](const SRC &src_value, const TGT &tgt_value) { // update the statistics OP::template HandleStats(stats, tgt_value); - // update the bloom filter - auto hash = OP::template XXHash64(tgt_value); - state.bloom_filter->FilterInsert(hash); + if (state.bloom_filter) { + // update the bloom filter + auto hash = OP::template XXHash64(tgt_value); + state.bloom_filter->FilterInsert(hash); + } }); // flush the dictionary page and add it to the to-be-written pages diff --git a/src/duckdb/extension/parquet/parquet_extension.cpp b/src/duckdb/extension/parquet/parquet_extension.cpp index 66c7c9a40..234b58768 100644 --- a/src/duckdb/extension/parquet/parquet_extension.cpp +++ b/src/duckdb/extension/parquet/parquet_extension.cpp @@ -227,6 +227,7 @@ struct ParquetWriteBindData : public TableFunctionData { //! This is huge but we grow it starting from 1 MB idx_t string_dictionary_page_size_limit = PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE; + bool enable_bloom_filters = true; //! What false positive rate are we willing to accept for bloom filters double bloom_filter_false_positive_ratio = 0.01; @@ -373,6 +374,8 @@ unique_ptr ParquetWriteBind(ClientContext &context, CopyFunctionBi PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE); } bind_data->string_dictionary_page_size_limit = val; + } else if (loption == "write_bloom_filter") { + bind_data->enable_bloom_filters = BooleanValue::Get(option.second[0].DefaultCastAs(LogicalType::BOOLEAN)); } else if (loption == "bloom_filter_false_positive_ratio") { auto val = option.second[0].GetValue(); if (val <= 0) { @@ -436,8 +439,8 @@ unique_ptr ParquetWriteInitializeGlobal(ClientContext &conte context, fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec, parquet_bind.field_ids.Copy(), parquet_bind.kv_metadata, parquet_bind.encryption_config, parquet_bind.dictionary_size_limit, parquet_bind.string_dictionary_page_size_limit, - parquet_bind.bloom_filter_false_positive_ratio, parquet_bind.compression_level, parquet_bind.debug_use_openssl, - parquet_bind.parquet_version); + parquet_bind.enable_bloom_filters, parquet_bind.bloom_filter_false_positive_ratio, + parquet_bind.compression_level, parquet_bind.debug_use_openssl, parquet_bind.parquet_version); return std::move(global_state); } diff --git a/src/duckdb/extension/parquet/parquet_writer.cpp b/src/duckdb/extension/parquet/parquet_writer.cpp index e1ac44b9d..d07f92eb1 100644 --- a/src/duckdb/extension/parquet/parquet_writer.cpp +++ b/src/duckdb/extension/parquet/parquet_writer.cpp @@ -345,12 +345,14 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file vector names_p, CompressionCodec::type codec, ChildFieldIDs field_ids_p, const vector> &kv_metadata, shared_ptr encryption_config_p, idx_t dictionary_size_limit_p, - idx_t string_dictionary_page_size_limit_p, double bloom_filter_false_positive_ratio_p, - int64_t compression_level_p, bool debug_use_openssl_p, ParquetVersion parquet_version) + idx_t string_dictionary_page_size_limit_p, bool enable_bloom_filters_p, + double bloom_filter_false_positive_ratio_p, int64_t compression_level_p, + bool debug_use_openssl_p, ParquetVersion parquet_version) : context(context), file_name(std::move(file_name_p)), sql_types(std::move(types_p)), column_names(std::move(names_p)), codec(codec), field_ids(std::move(field_ids_p)), encryption_config(std::move(encryption_config_p)), dictionary_size_limit(dictionary_size_limit_p), string_dictionary_page_size_limit(string_dictionary_page_size_limit_p), + enable_bloom_filters(enable_bloom_filters_p), bloom_filter_false_positive_ratio(bloom_filter_false_positive_ratio_p), compression_level(compression_level_p), debug_use_openssl(debug_use_openssl_p), parquet_version(parquet_version), total_written(0), num_row_groups(0) { diff --git a/src/duckdb/src/catalog/catalog.cpp b/src/duckdb/src/catalog/catalog.cpp index 196940ca7..bb895d4d7 100644 --- a/src/duckdb/src/catalog/catalog.cpp +++ b/src/duckdb/src/catalog/catalog.cpp @@ -662,7 +662,7 @@ CatalogException Catalog::CreateMissingEntryException(CatalogEntryRetriever &ret break; } auto &catalog = database.get().GetCatalog(); - auto current_schemas = catalog.GetAllSchemas(context); + auto current_schemas = catalog.GetSchemas(context); for (auto ¤t_schema : current_schemas) { if (unseen_schemas.size() >= max_schema_count) { break; diff --git a/src/duckdb/src/common/arrow/arrow_type_extension.cpp b/src/duckdb/src/common/arrow/arrow_type_extension.cpp index dee514cad..9e50e7c25 100644 --- a/src/duckdb/src/common/arrow/arrow_type_extension.cpp +++ b/src/duckdb/src/common/arrow/arrow_type_extension.cpp @@ -358,7 +358,9 @@ struct ArrowBool8 { auto source_ptr = reinterpret_cast(format.data); auto result_ptr = reinterpret_cast(FlatVector::GetData(result)); for (idx_t i = 0; i < count; i++) { - result_ptr[i] = static_cast(source_ptr[i]); + if (format.validity.RowIsValid(i)) { + result_ptr[i] = static_cast(source_ptr[i]); + } } } }; @@ -380,7 +382,7 @@ void ArrowTypeExtensionSet::Initialize(const DBConfig &config) { // Types that are 1:n config.RegisterArrowExtension({"arrow.json", &ArrowJson::PopulateSchema, &ArrowJson::GetType, - make_shared_ptr(LogicalType::VARCHAR)}); + make_shared_ptr(LogicalType::JSON())}); config.RegisterArrowExtension({"DuckDB", "bit", &ArrowBit::PopulateSchema, &ArrowBit::GetType, make_shared_ptr(LogicalType::BIT), nullptr, nullptr}); diff --git a/src/duckdb/src/common/error_data.cpp b/src/duckdb/src/common/error_data.cpp index c1e564217..de6685ce9 100644 --- a/src/duckdb/src/common/error_data.cpp +++ b/src/duckdb/src/common/error_data.cpp @@ -65,6 +65,13 @@ string ErrorData::ConstructFinalMessage() const { error += "\nThis error signals an assertion failure within DuckDB. This usually occurs due to " "unexpected conditions or errors in the program's logic.\nFor more information, see " "https://duckdb.org/docs/stable/dev/internal_errors"; + + // Ensure that we print the stack trace for internal exceptions. + auto entry = extra_info.find("stack_trace_pointers"); + if (entry != extra_info.end()) { + auto stack_trace = StackTrace::ResolveStacktraceSymbols(entry->second); + error += "\n\nStack Trace:\n" + stack_trace; + } } return error; } diff --git a/src/duckdb/src/common/operator/string_cast.cpp b/src/duckdb/src/common/operator/string_cast.cpp index e152c71bf..911a966af 100644 --- a/src/duckdb/src/common/operator/string_cast.cpp +++ b/src/duckdb/src/common/operator/string_cast.cpp @@ -161,6 +161,9 @@ duckdb::string_t StringFromTimestamp(timestamp_t input, Vector &vector) { idx_t nano_length = 0; if (picos) { // If there are ps, we need all the µs + if (!time[3]) { + TimeToStringCast::FormatMicros(time[3], micro_buffer); + } time_length = 15; nano_length = 6; nano_length -= NumericCast(TimeToStringCast::FormatMicros(picos, nano_buffer)); diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp index bdb324d61..9daf97755 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp @@ -41,6 +41,11 @@ idx_t CSVIterator::BytesPerThread(const CSVReaderOptions &reader_options) { // If we are setting up the buffer size directly, we must make sure each thread will read the full buffer. return max_row_size; } + if (bytes_per_thread == 0) { + // Bytes per thread can never be zero, but it might happen if max_row_size = 0 + // Not sure why a human being would do that... + return 1; + } return bytes_per_thread; } diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp index 086b6145b..6a6ad454b 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp @@ -1373,7 +1373,7 @@ void StringValueScanner::ProcessOverBufferValue() { result.escaped = true; } if (states.IsComment()) { - result.comment = true; + result.SetComment(result, j); } if (states.IsInvalid()) { result.InvalidState(result); @@ -1435,7 +1435,7 @@ void StringValueScanner::ProcessOverBufferValue() { result.SetQuoted(result, j); } if (states.IsComment()) { - result.comment = true; + result.SetComment(result, j); } if (states.IsEscaped() && result.state_machine.dialect_options.state_machine_options.escape != '\0') { result.escaped = true; diff --git a/src/duckdb/src/execution/operator/join/physical_iejoin.cpp b/src/duckdb/src/execution/operator/join/physical_iejoin.cpp index 71c0e285c..384f7358a 100644 --- a/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +++ b/src/duckdb/src/execution/operator/join/physical_iejoin.cpp @@ -166,6 +166,10 @@ SinkResultType PhysicalIEJoin::Sink(ExecutionContext &context, DataChunk &chunk, auto &gstate = input.global_state.Cast(); auto &lstate = input.local_state.Cast(); + if (gstate.child == 0 && gstate.tables[1]->global_sort_state.sorted_blocks.empty() && EmptyResultIfRHSIsEmpty()) { + return SinkResultType::FINISHED; + } + gstate.Sink(chunk, lstate); if (filter_pushdown && !gstate.skip_filter_pushdown) { @@ -207,15 +211,19 @@ SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, Clie // for FULL/LEFT/RIGHT OUTER JOIN, initialize found_match to false for every tuple table.IntializeMatches(); } + + SinkFinalizeType res; if (gstate.child == 1 && global_sort_state.sorted_blocks.empty() && EmptyResultIfRHSIsEmpty()) { // Empty input! - return SinkFinalizeType::NO_OUTPUT_POSSIBLE; + res = SinkFinalizeType::NO_OUTPUT_POSSIBLE; + } else { + res = SinkFinalizeType::READY; } // Move to the next input child gstate.Finalize(pipeline, event); - return SinkFinalizeType::READY; + return res; } //===--------------------------------------------------------------------===// diff --git a/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp b/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp index 14cb7d4e8..c8c4077a6 100644 --- a/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +++ b/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp @@ -95,8 +95,13 @@ PhysicalPlanGenerator::PlanAsOfLoopJoin(LogicalComparisonJoin &op, PhysicalOpera asof_idx = i; arg_min_max = "arg_min"; break; - default: + case ExpressionType::COMPARE_EQUAL: + case ExpressionType::COMPARE_NOTEQUAL: + case ExpressionType::COMPARE_DISTINCT_FROM: break; + default: + // Unsupported NLJ comparison + return nullptr; } } @@ -271,7 +276,7 @@ PhysicalOperator &PhysicalPlanGenerator::PlanAsOfJoin(LogicalComparisonJoin &op) auto &config = ClientConfig::GetConfig(context); if (!config.force_asof_iejoin) { - if (op.children[0]->has_estimated_cardinality && lhs_cardinality <= config.asof_loop_join_threshold) { + if (op.children[0]->has_estimated_cardinality && lhs_cardinality < config.asof_loop_join_threshold) { auto result = PlanAsOfLoopJoin(op, left, right); if (result) { return *result; diff --git a/src/duckdb/src/function/function_list.cpp b/src/duckdb/src/function/function_list.cpp index 60b1b098c..2c77fcadb 100644 --- a/src/duckdb/src/function/function_list.cpp +++ b/src/duckdb/src/function/function_list.cpp @@ -53,8 +53,8 @@ static const StaticFunctionDefinition function[] = { DUCKDB_SCALAR_FUNCTION_SET(InternalCompressIntegralUintegerFun), DUCKDB_SCALAR_FUNCTION_SET(InternalCompressIntegralUsmallintFun), DUCKDB_SCALAR_FUNCTION_SET(InternalCompressIntegralUtinyintFun), - DUCKDB_SCALAR_FUNCTION(InternalCompressStringHugeintFun), DUCKDB_SCALAR_FUNCTION(InternalCompressStringUbigintFun), + DUCKDB_SCALAR_FUNCTION(InternalCompressStringUhugeintFun), DUCKDB_SCALAR_FUNCTION(InternalCompressStringUintegerFun), DUCKDB_SCALAR_FUNCTION(InternalCompressStringUsmallintFun), DUCKDB_SCALAR_FUNCTION(InternalCompressStringUtinyintFun), diff --git a/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp b/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp index 14576b12a..38a8b7200 100644 --- a/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +++ b/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp @@ -93,8 +93,8 @@ static scalar_function_t GetStringCompressFunctionSwitch(const LogicalType &resu return GetStringCompressFunction(result_type); case LogicalTypeId::UBIGINT: return GetStringCompressFunction(result_type); - case LogicalTypeId::HUGEINT: - return GetStringCompressFunction(result_type); + case LogicalTypeId::UHUGEINT: + return GetStringCompressFunction(result_type); default: throw InternalException("Unexpected type in GetStringCompressFunctionSwitch"); } @@ -189,8 +189,8 @@ static scalar_function_t GetStringDecompressFunctionSwitch(const LogicalType &in return GetStringDecompressFunction(input_type); case LogicalTypeId::UBIGINT: return GetStringDecompressFunction(input_type); - case LogicalTypeId::HUGEINT: - return GetStringDecompressFunction(input_type); + case LogicalTypeId::UHUGEINT: + return GetStringDecompressFunction(input_type); default: throw InternalException("Unexpected type in GetStringDecompressFunctionSwitch"); } @@ -262,8 +262,8 @@ ScalarFunction InternalCompressStringUbigintFun::GetFunction() { return CMStringCompressFun::GetFunction(LogicalType(LogicalTypeId::UBIGINT)); } -ScalarFunction InternalCompressStringHugeintFun::GetFunction() { - return CMStringCompressFun::GetFunction(LogicalType(LogicalTypeId::HUGEINT)); +ScalarFunction InternalCompressStringUhugeintFun::GetFunction() { + return CMStringCompressFun::GetFunction(LogicalType(LogicalTypeId::UHUGEINT)); } ScalarFunctionSet InternalDecompressStringFun::GetFunctions() { diff --git a/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp b/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp index 2d09a7e7f..3076027c7 100644 --- a/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +++ b/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp @@ -8,7 +8,7 @@ const vector CMUtils::IntegralTypes() { const vector CMUtils::StringTypes() { return {LogicalType::UTINYINT, LogicalType::USMALLINT, LogicalType::UINTEGER, LogicalType::UBIGINT, - LogicalType::HUGEINT}; + LogicalType::UHUGEINT}; } // LCOV_EXCL_START diff --git a/src/duckdb/src/function/table/table_scan.cpp b/src/duckdb/src/function/table/table_scan.cpp index 23c3134f5..1cfd8de96 100644 --- a/src/duckdb/src/function/table/table_scan.cpp +++ b/src/duckdb/src/function/table/table_scan.cpp @@ -26,6 +26,8 @@ #include "duckdb/planner/filter/conjunction_filter.hpp" #include "duckdb/common/types/value_map.hpp" +#include + namespace duckdb { struct TableScanLocalState : public LocalTableFunctionState { @@ -384,75 +386,40 @@ unique_ptr DuckIndexScanInitGlobal(ClientContext &cont return std::move(g_state); } -void ExtractExpressionsFromValues(value_set_t &unique_values, BoundColumnRefExpression &bound_ref, - vector> &expressions) { - for (const auto &value : unique_values) { - auto bound_constant = make_uniq(value); - auto filter_expr = make_uniq(ExpressionType::COMPARE_EQUAL, bound_ref.Copy(), - std::move(bound_constant)); - expressions.push_back(std::move(filter_expr)); +bool ExtractComparisonsAndInFilters(TableFilter &filter, vector> &comparisons, + vector> &in_filters) { + switch (filter.filter_type) { + case TableFilterType::CONSTANT_COMPARISON: { + auto &comparison = filter.Cast(); + comparisons.push_back(comparison); + return true; } -} - -void ExtractIn(InFilter &filter, BoundColumnRefExpression &bound_ref, vector> &expressions) { - // Eliminate any duplicates. - value_set_t unique_values; - for (const auto &value : filter.values) { - if (unique_values.find(value) == unique_values.end()) { - unique_values.insert(value); + case TableFilterType::OPTIONAL_FILTER: { + auto &optional_filter = filter.Cast(); + if (!optional_filter.child_filter) { + return true; // No child filters, always OK } + return ExtractComparisonsAndInFilters(*optional_filter.child_filter, comparisons, in_filters); } - ExtractExpressionsFromValues(unique_values, bound_ref, expressions); -} - -void ExtractConjunctionAnd(ConjunctionAndFilter &filter, BoundColumnRefExpression &bound_ref, - vector> &expressions) { - if (filter.child_filters.empty()) { - return; + case TableFilterType::IN_FILTER: { + in_filters.push_back(filter.Cast()); + return true; } - - // Extract the CONSTANT_COMPARISON and IN_FILTER children. - vector> comparisons; - vector> in_filters; - - for (idx_t i = 0; i < filter.child_filters.size(); i++) { - auto &child_filter = *filter.child_filters[i]; - switch (child_filter.filter_type) { - case TableFilterType::CONSTANT_COMPARISON: { - auto &comparison = child_filter.Cast(); - comparisons.push_back(comparison); - break; - } - case TableFilterType::CONJUNCTION_AND: { - auto &conjunction = child_filter.Cast(); - ExtractConjunctionAnd(conjunction, bound_ref, expressions); - break; - } - case TableFilterType::OPTIONAL_FILTER: { - auto &optional_filter = child_filter.Cast(); - if (!optional_filter.child_filter) { - return; - } - if (optional_filter.child_filter->filter_type != TableFilterType::IN_FILTER) { - // No support for other optional filter types yet. - return; + case TableFilterType::CONJUNCTION_AND: { + auto &conjunction_and = filter.Cast(); + for (idx_t i = 0; i < conjunction_and.child_filters.size(); i++) { + if (!ExtractComparisonsAndInFilters(*conjunction_and.child_filters[i], comparisons, in_filters)) { + return false; } - auto &in_filter = optional_filter.child_filter->Cast(); - in_filters.push_back(in_filter); - break; - } - default: - // Not yet supported: filter types than CONSTANT_COMPARISON/IN_FILTER/CONJUNCTION_AND in CONJUNCTION_AND. - expressions.clear(); - return; } + return true; } - - // No support for other CONJUNCTION_AND cases yet. - if (in_filters.empty()) { - return; + default: + return false; } +} +value_set_t GetUniqueValues(vector> &comparisons, vector> &in_filters) { // Get the combined unique values of the IN filters. value_set_t unique_values; for (idx_t filter_idx = 0; filter_idx < in_filters.size(); filter_idx++) { @@ -481,31 +448,16 @@ void ExtractConjunctionAnd(ConjunctionAndFilter &filter, BoundColumnRefExpressio } } - ExtractExpressionsFromValues(unique_values, bound_ref, expressions); + return unique_values; } -void ExtractFilter(TableFilter &filter, BoundColumnRefExpression &bound_ref, - vector> &expressions) { - switch (filter.filter_type) { - case TableFilterType::OPTIONAL_FILTER: { - auto &optional_filter = filter.Cast(); - if (!optional_filter.child_filter) { - return; - } - return ExtractFilter(*optional_filter.child_filter, bound_ref, expressions); - } - case TableFilterType::IN_FILTER: { - auto &in_filter = filter.Cast(); - ExtractIn(in_filter, bound_ref, expressions); - return; - } - case TableFilterType::CONJUNCTION_AND: { - auto &conjunction_and = filter.Cast(); - ExtractConjunctionAnd(conjunction_and, bound_ref, expressions); - return; - } - default: - return; +void ExtractExpressionsFromValues(const value_set_t &unique_values, BoundColumnRefExpression &bound_ref, + vector> &expressions) { + for (const auto &value : unique_values) { + auto bound_constant = make_uniq(value); + auto filter_expr = make_uniq(ExpressionType::COMPARE_EQUAL, bound_ref.Copy(), + std::move(bound_constant)); + expressions.push_back(std::move(filter_expr)); } } @@ -514,14 +466,21 @@ vector> ExtractFilterExpressions(const ColumnDefinition & ColumnBinding binding(0, storage_idx); auto bound_ref = make_uniq(col.Name(), col.Type(), binding); + // Extract all comparisons and IN filters from nested filters vector> expressions; - ExtractFilter(*filter, *bound_ref, expressions); + vector> comparisons; + vector> in_filters; + if (ExtractComparisonsAndInFilters(*filter, comparisons, in_filters)) { + // Deduplicate/deal with conflicting filters, then convert to expressions + ExtractExpressionsFromValues(GetUniqueValues(comparisons, in_filters), *bound_ref, expressions); + } // Attempt matching the top-level filter to the index expression. if (expressions.empty()) { auto filter_expr = filter->ToExpression(*bound_ref); expressions.push_back(std::move(filter_expr)); } + return expressions; } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 561950a2c..645b7bfcd 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "1" +#define DUCKDB_PATCH_VERSION "2-dev156" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 3 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.3.1" +#define DUCKDB_VERSION "v1.3.2-dev156" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "2063dda3e6" +#define DUCKDB_SOURCE_ID "0b83e5d2f6" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/file_opener.hpp b/src/duckdb/src/include/duckdb/common/file_opener.hpp index ff7aa9103..349dfbd92 100644 --- a/src/duckdb/src/include/duckdb/common/file_opener.hpp +++ b/src/duckdb/src/include/duckdb/common/file_opener.hpp @@ -35,6 +35,7 @@ class FileOpener { virtual SettingLookupResult TryGetCurrentSetting(const string &key, Value &result) = 0; virtual optional_ptr TryGetClientContext() = 0; virtual optional_ptr TryGetDatabase() = 0; + virtual shared_ptr &GetHTTPUtil() = 0; DUCKDB_API virtual Logger &GetLogger() const = 0; DUCKDB_API static unique_ptr TryGetCatalogTransaction(optional_ptr opener); diff --git a/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp b/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp index fac7a262a..5e5612a17 100644 --- a/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +++ b/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp @@ -9,9 +9,11 @@ #pragma once #include "duckdb/common/common.hpp" -#include "duckdb/common/types.hpp" #include "duckdb/common/exception.hpp" +#include "duckdb/common/types.hpp" +#include "duckdb/common/types/date.hpp" #include "duckdb/common/types/string_type.hpp" +#include "duckdb/common/types/timestamp.hpp" namespace duckdb { diff --git a/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp b/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp index 918888594..ac042a906 100644 --- a/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +++ b/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp @@ -95,8 +95,8 @@ struct InternalCompressStringUbigintFun { static ScalarFunction GetFunction(); }; -struct InternalCompressStringHugeintFun { - static constexpr const char *Name = "__internal_compress_string_hugeint"; +struct InternalCompressStringUhugeintFun { + static constexpr const char *Name = "__internal_compress_string_uhugeint"; static constexpr const char *Parameters = ""; static constexpr const char *Description = ""; static constexpr const char *Example = ""; diff --git a/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp b/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp index 189d7dfd8..373c4c405 100644 --- a/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +++ b/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp @@ -29,6 +29,7 @@ class ClientContextFileOpener : public FileOpener { return &context; } optional_ptr TryGetDatabase() override; + shared_ptr &GetHTTPUtil() override; private: ClientContext &context; diff --git a/src/duckdb/src/include/duckdb/main/database_file_opener.hpp b/src/duckdb/src/include/duckdb/main/database_file_opener.hpp index ae61f9272..950f1f11e 100644 --- a/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +++ b/src/duckdb/src/include/duckdb/main/database_file_opener.hpp @@ -36,6 +36,9 @@ class DatabaseFileOpener : public FileOpener { optional_ptr TryGetDatabase() override { return &db; } + shared_ptr &GetHTTPUtil() override { + return TryGetDatabase()->config.http_util; + } private: DatabaseInstance &db; diff --git a/src/duckdb/src/include/duckdb/main/extension_entries.hpp b/src/duckdb/src/include/duckdb/main/extension_entries.hpp index 8aec6ca7b..8f730b5fc 100644 --- a/src/duckdb/src/include/duckdb/main/extension_entries.hpp +++ b/src/duckdb/src/include/duckdb/main/extension_entries.hpp @@ -221,6 +221,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"iceberg_metadata", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"iceberg_scan", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"iceberg_snapshots", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, + {"iceberg_to_ducklake", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY}, {"icu_calendar_names", "icu", CatalogType::TABLE_FUNCTION_ENTRY}, {"icu_collate_af", "icu", CatalogType::SCALAR_FUNCTION_ENTRY}, {"icu_collate_am", "icu", CatalogType::SCALAR_FUNCTION_ENTRY}, @@ -624,6 +625,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"st_linestring2dfromwkb", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_linesubstring", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_m", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"st_makebox2d", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_makeenvelope", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_makeline", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_makepolygon", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, @@ -996,6 +998,9 @@ static constexpr ExtensionEntry EXTENSION_SETTINGS[] = { {"ca_cert_file", "httpfs"}, {"calendar", "icu"}, {"disable_parquet_prefetching", "parquet"}, + {"ducklake_max_retry_count", "ducklake"}, + {"ducklake_retry_backoff", "ducklake"}, + {"ducklake_retry_wait_ms", "ducklake"}, {"enable_geoparquet_conversion", "parquet"}, {"enable_server_cert_verification", "httpfs"}, {"force_download", "httpfs"}, diff --git a/src/duckdb/src/include/duckdb/main/settings.hpp b/src/duckdb/src/include/duckdb/main/settings.hpp index a49a155e9..5eb91b63f 100644 --- a/src/duckdb/src/include/duckdb/main/settings.hpp +++ b/src/duckdb/src/include/duckdb/main/settings.hpp @@ -560,7 +560,7 @@ struct EnableFSSTVectorsSetting { struct EnableHTTPLoggingSetting { using RETURN_TYPE = bool; static constexpr const char *Name = "enable_http_logging"; - static constexpr const char *Description = "Enables HTTP logging"; + static constexpr const char *Description = "(deprecated) Enables HTTP logging"; static constexpr const char *InputType = "BOOLEAN"; static void SetLocal(ClientContext &context, const Value ¶meter); static void ResetLocal(ClientContext &context); @@ -750,7 +750,7 @@ struct HTTPLoggingOutputSetting { using RETURN_TYPE = string; static constexpr const char *Name = "http_logging_output"; static constexpr const char *Description = - "The file to which HTTP logging output should be saved, or empty to print to the terminal"; + "(deprecated) The file to which HTTP logging output should be saved, or empty to print to the terminal"; static constexpr const char *InputType = "VARCHAR"; static void SetLocal(ClientContext &context, const Value ¶meter); static void ResetLocal(ClientContext &context); diff --git a/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp b/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp index 921e03a21..527ccf94d 100644 --- a/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +++ b/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp @@ -60,6 +60,7 @@ class JoinOrderOptimizer { unordered_map materialized_cte_stats; //! Stats of Delim Scans of the Delim Join that is currently being optimized optional_ptr delim_scan_stats; + idx_t depth; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/parallel/concurrentqueue.hpp b/src/duckdb/src/include/duckdb/parallel/concurrentqueue.hpp index 7c277ac49..4ab2a8165 100644 --- a/src/duckdb/src/include/duckdb/parallel/concurrentqueue.hpp +++ b/src/duckdb/src/include/duckdb/parallel/concurrentqueue.hpp @@ -84,6 +84,14 @@ class ConcurrentQueue { } return max; } + + template + bool enqueue_bulk(It itemFirst, size_t count) { + for (size_t i = 0; i < count; i++) { + q.push(std::move(*itemFirst++)); + } + return true; + } }; } // namespace duckdb_moodycamel diff --git a/src/duckdb/src/include/duckdb/planner/filter/expression_filter.hpp b/src/duckdb/src/include/duckdb/planner/filter/expression_filter.hpp index 2d947376b..62284b045 100644 --- a/src/duckdb/src/include/duckdb/planner/filter/expression_filter.hpp +++ b/src/duckdb/src/include/duckdb/planner/filter/expression_filter.hpp @@ -35,6 +35,8 @@ class ExpressionFilter : public TableFilter { unique_ptr ToExpression(const Expression &column) const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); + static void ReplaceExpressionRecursive(unique_ptr &expr, const Expression &column, + ExpressionType replace_type = ExpressionType::BOUND_REF); }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp b/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp index 1a461ac37..647ba06ea 100644 --- a/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp @@ -198,7 +198,7 @@ class SegmentTree { error += StringUtil::Format("Node %lld: Start %lld, Count %lld", i, nodes[i].row_start, nodes[i].node->count.load()); } - throw InternalException("Could not find node in column segment tree!\n%s%s", error, Exception::GetStackTrace()); + throw InternalException("Could not find node in column segment tree!\n%s", error); } bool TryGetSegmentIndex(SegmentLock &l, idx_t row_number, idx_t &result) { diff --git a/src/duckdb/src/main/client_context_file_opener.cpp b/src/duckdb/src/main/client_context_file_opener.cpp index f7a85dc7c..24ed2aa66 100644 --- a/src/duckdb/src/main/client_context_file_opener.cpp +++ b/src/duckdb/src/main/client_context_file_opener.cpp @@ -24,6 +24,10 @@ optional_ptr ClientContextFileOpener::TryGetDatabase() { return context.db.get(); } +shared_ptr &ClientContextFileOpener::GetHTTPUtil() { + return TryGetDatabase()->config.http_util; +} + unique_ptr FileOpener::TryGetCatalogTransaction(optional_ptr opener) { if (!opener) { return nullptr; diff --git a/src/duckdb/src/main/database_manager.cpp b/src/duckdb/src/main/database_manager.cpp index 604fe06d0..88e2c6dc6 100644 --- a/src/duckdb/src/main/database_manager.cpp +++ b/src/duckdb/src/main/database_manager.cpp @@ -256,8 +256,13 @@ vector> DatabaseManager::GetDatabases(ClientContext return true; }); - result.push_back(*system); - result.push_back(*context.client_data->temporary_objects); + if (!max_db_count.IsValid() || max_db_count.GetIndex() >= 1) { + result.push_back(*system); + } + if (!max_db_count.IsValid() || max_db_count.GetIndex() >= 2) { + result.push_back(*context.client_data->temporary_objects); + } + return result; } diff --git a/src/duckdb/src/main/database_path_and_type.cpp b/src/duckdb/src/main/database_path_and_type.cpp index 80cc5a03c..3cae9b54f 100644 --- a/src/duckdb/src/main/database_path_and_type.cpp +++ b/src/duckdb/src/main/database_path_and_type.cpp @@ -26,7 +26,7 @@ void DBPathAndType::CheckMagicBytes(FileSystem &fs, string &path, string &db_typ case DataFileType::PARQUET_FILE: case DataFileType::UNKNOWN_FILE: { // FIXME: we should get this from the registered replacement scans instead of hardcoding it here - vector supported_suffixes {"parquet", "csv", "json", "jsonl", "ndjson"}; + vector supported_suffixes {"parquet", "csv", "tsv", "json", "jsonl", "ndjson"}; if (ReplacementScan::CanReplace(path, supported_suffixes)) { db_type = "__open_file__"; break; diff --git a/src/duckdb/src/main/settings/autogenerated_settings.cpp b/src/duckdb/src/main/settings/autogenerated_settings.cpp index 06efddb76..859a0fccd 100644 --- a/src/duckdb/src/main/settings/autogenerated_settings.cpp +++ b/src/duckdb/src/main/settings/autogenerated_settings.cpp @@ -505,23 +505,6 @@ Value EnableFSSTVectorsSetting::GetSetting(const ClientContext &context) { return Value::BOOLEAN(config.options.enable_fsst_vectors); } -//===----------------------------------------------------------------------===// -// Enable H T T P Logging -//===----------------------------------------------------------------------===// -void EnableHTTPLoggingSetting::SetLocal(ClientContext &context, const Value &input) { - auto &config = ClientConfig::GetConfig(context); - config.enable_http_logging = input.GetValue(); -} - -void EnableHTTPLoggingSetting::ResetLocal(ClientContext &context) { - ClientConfig::GetConfig(context).enable_http_logging = ClientConfig().enable_http_logging; -} - -Value EnableHTTPLoggingSetting::GetSetting(const ClientContext &context) { - auto &config = ClientConfig::GetConfig(context); - return Value::BOOLEAN(config.enable_http_logging); -} - //===----------------------------------------------------------------------===// // Enable H T T P Metadata Cache //===----------------------------------------------------------------------===// @@ -678,23 +661,6 @@ Value HomeDirectorySetting::GetSetting(const ClientContext &context) { return Value(config.home_directory); } -//===----------------------------------------------------------------------===// -// H T T P Logging Output -//===----------------------------------------------------------------------===// -void HTTPLoggingOutputSetting::SetLocal(ClientContext &context, const Value &input) { - auto &config = ClientConfig::GetConfig(context); - config.http_logging_output = input.GetValue(); -} - -void HTTPLoggingOutputSetting::ResetLocal(ClientContext &context) { - ClientConfig::GetConfig(context).http_logging_output = ClientConfig().http_logging_output; -} - -Value HTTPLoggingOutputSetting::GetSetting(const ClientContext &context) { - auto &config = ClientConfig::GetConfig(context); - return Value(config.http_logging_output); -} - //===----------------------------------------------------------------------===// // H T T P Proxy //===----------------------------------------------------------------------===// diff --git a/src/duckdb/src/main/settings/custom_settings.cpp b/src/duckdb/src/main/settings/custom_settings.cpp index ba1805331..03e7c6839 100644 --- a/src/duckdb/src/main/settings/custom_settings.cpp +++ b/src/duckdb/src/main/settings/custom_settings.cpp @@ -1088,6 +1088,55 @@ void HomeDirectorySetting::SetLocal(ClientContext &context, const Value &input) config.home_directory = input.IsNull() ? string() : input.ToString(); } +//===----------------------------------------------------------------------===// +// Enable H T T P Logging +//===----------------------------------------------------------------------===// +void EnableHTTPLoggingSetting::SetLocal(ClientContext &context, const Value &input) { + auto &config = ClientConfig::GetConfig(context); + config.enable_http_logging = input.GetValue(); + + // NOTE: this is a deprecated setting: we mimick the old behaviour by setting the log storage output to STDOUT and + // enabling logging for http only. Note that this behaviour is slightly wonky in that it sets all sorts of logging + // config + auto &log_manager = LogManager::Get(context); + if (config.enable_http_logging) { + log_manager.SetEnableLogging(true); + log_manager.SetLogLevel(HTTPLogType::LEVEL); + unordered_set enabled_log_types = {HTTPLogType::NAME}; + log_manager.SetEnabledLogTypes(enabled_log_types); + log_manager.SetLogStorage(*context.db, LogConfig::STDOUT_STORAGE_NAME); + } else { + log_manager.SetEnableLogging(false); + } +} + +void EnableHTTPLoggingSetting::ResetLocal(ClientContext &context) { + ClientConfig::GetConfig(context).enable_http_logging = ClientConfig().enable_http_logging; +} + +Value EnableHTTPLoggingSetting::GetSetting(const ClientContext &context) { + auto &config = ClientConfig::GetConfig(context); + return Value::BOOLEAN(config.enable_http_logging); +} + +//===----------------------------------------------------------------------===// +// H T T P Logging Output +//===----------------------------------------------------------------------===// +void HTTPLoggingOutputSetting::SetLocal(ClientContext &context, const Value &input) { + throw NotImplementedException("This setting is deprecated and can no longer be used. Check out the DuckDB docs on " + "logging for more information"); +} + +void HTTPLoggingOutputSetting::ResetLocal(ClientContext &context) { + throw NotImplementedException("This setting is deprecated and can no longer be used. Check out the DuckDB docs on " + "logging for more information"); +} + +Value HTTPLoggingOutputSetting::GetSetting(const ClientContext &context) { + auto &config = ClientConfig::GetConfig(context); + return Value(config.http_logging_output); +} + //===----------------------------------------------------------------------===// // Index Scan Percentage //===----------------------------------------------------------------------===// diff --git a/src/duckdb/src/optimizer/compressed_materialization.cpp b/src/duckdb/src/optimizer/compressed_materialization.cpp index 0245110b5..316c701a0 100644 --- a/src/duckdb/src/optimizer/compressed_materialization.cpp +++ b/src/duckdb/src/optimizer/compressed_materialization.cpp @@ -330,7 +330,7 @@ static Value GetIntegralRangeValue(ClientContext &context, const LogicalType &ty auto min = NumericStats::Min(stats); auto max = NumericStats::Max(stats); if (max < min) { - return Value::HUGEINT(NumericLimits::Maximum()); + return Value::UHUGEINT(NumericLimits::Maximum()); } vector> arguments; @@ -342,8 +342,8 @@ static Value GetIntegralRangeValue(ClientContext &context, const LogicalType &ty if (ExpressionExecutor::TryEvaluateScalar(context, sub, result)) { return result; } else { - // Couldn't evaluate: Return max hugeint as range so GetIntegralCompress will return nullptr - return Value::HUGEINT(NumericLimits::Maximum()); + // Couldn't evaluate: Return max uhugeint as range so GetIntegralCompress will return nullptr + return Value::UHUGEINT(NumericLimits::Maximum()); } } @@ -354,7 +354,7 @@ unique_ptr CompressedMaterialization::GetIntegralCompress(un return nullptr; } - // Get range and cast to UBIGINT (might fail for HUGEINT, in which case we just return) + // Get range and cast to UBIGINT (might fail for UHUGEINT, in which case we just return) Value range_value = GetIntegralRangeValue(context, type, stats); if (!range_value.DefaultTryCastAs(LogicalType::UBIGINT)) { return nullptr; diff --git a/src/duckdb/src/optimizer/filter_combiner.cpp b/src/duckdb/src/optimizer/filter_combiner.cpp index 9742d2368..96c40c1f7 100644 --- a/src/duckdb/src/optimizer/filter_combiner.cpp +++ b/src/duckdb/src/optimizer/filter_combiner.cpp @@ -242,7 +242,7 @@ bool FilterCombiner::IsDenseRange(vector &in_list) { if (in_list.empty()) { return true; } - if (!in_list[0].type().IsIntegral()) { + if (!in_list[0].type().IsIntegral() || in_list[0].type() == LogicalType::UHUGEINT) { return false; } // sort the input list diff --git a/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp b/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp index e49798ac4..5bf3f6a98 100644 --- a/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +++ b/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp @@ -10,19 +10,27 @@ namespace duckdb { -JoinOrderOptimizer::JoinOrderOptimizer(ClientContext &context) : context(context), query_graph_manager(context) { +JoinOrderOptimizer::JoinOrderOptimizer(ClientContext &context) + : context(context), query_graph_manager(context), depth(1) { } JoinOrderOptimizer JoinOrderOptimizer::CreateChildOptimizer() { JoinOrderOptimizer child_optimizer(context); child_optimizer.materialized_cte_stats = materialized_cte_stats; child_optimizer.delim_scan_stats = delim_scan_stats; + child_optimizer.depth = depth + 1; return child_optimizer; } unique_ptr JoinOrderOptimizer::Optimize(unique_ptr plan, optional_ptr stats) { + if (depth > query_graph_manager.context.config.max_expression_depth) { + // Very deep plans will eventually consume quite some stack space + // Returning the current plan is always a valid choice + return plan; + } + // make sure query graph manager has not extracted a relation graph already LogicalOperator *op = plan.get(); diff --git a/src/duckdb/src/optimizer/remove_unused_columns.cpp b/src/duckdb/src/optimizer/remove_unused_columns.cpp index a601dce55..9f7aa4a09 100644 --- a/src/duckdb/src/optimizer/remove_unused_columns.cpp +++ b/src/duckdb/src/optimizer/remove_unused_columns.cpp @@ -256,6 +256,9 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) { ColumnBinding filter_binding(get.table_index, index.GetIndex()); auto column_ref = make_uniq(std::move(column_type), filter_binding); auto filter_expr = filter.second->ToExpression(*column_ref); + if (filter_expr->IsScalar()) { + filter_expr = std::move(column_ref); + } VisitExpression(&filter_expr); filter_expressions.push_back(std::move(filter_expr)); } diff --git a/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp b/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp index 2ff9be3ea..b01f7d704 100644 --- a/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +++ b/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp @@ -1,26 +1,51 @@ #include "duckdb/common/helper.hpp" #include "duckdb/optimizer/statistics_propagator.hpp" #include "duckdb/planner/expression/bound_columnref_expression.hpp" +#include "duckdb/planner/expression_iterator.hpp" #include "duckdb/planner/filter/conjunction_filter.hpp" #include "duckdb/planner/filter/constant_filter.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckdb/planner/expression/bound_reference_expression.hpp" #include "duckdb/planner/filter/expression_filter.hpp" #include "duckdb/planner/filter/null_filter.hpp" #include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/planner/table_filter.hpp" +#include "duckdb/function/scalar/generic_common.hpp" +#include "duckdb/function/scalar/generic_functions.hpp" namespace duckdb { +static void GetColumnIndex(unique_ptr &expr, idx_t &index) { + if (expr->type == ExpressionType::BOUND_REF) { + auto &bound_ref = expr->Cast(); + index = bound_ref.index; + return; + } + ExpressionIterator::EnumerateChildren(*expr, [&](unique_ptr &child) { GetColumnIndex(child, index); }); +} + FilterPropagateResult StatisticsPropagator::PropagateTableFilter(ColumnBinding stats_binding, BaseStatistics &stats, TableFilter &filter) { if (filter.filter_type == TableFilterType::EXPRESSION_FILTER) { auto &expr_filter = filter.Cast(); + + // get physical storage index of the filter + // since it is a table filter, every storage index is the same + idx_t physical_index = DConstants::INVALID_INDEX; + GetColumnIndex(expr_filter.expr, physical_index); + D_ASSERT(physical_index != DConstants::INVALID_INDEX); + auto column_ref = make_uniq(stats.GetType(), stats_binding); auto filter_expr = expr_filter.ToExpression(*column_ref); // handle the filter before updating the statistics // otherwise the filter can be pruned by the updated statistics - auto copy_expr = filter_expr->Copy(); auto propagate_result = HandleFilter(filter_expr); - UpdateFilterStatistics(*copy_expr); + auto colref = make_uniq(stats.GetType(), physical_index); + UpdateFilterStatistics(*filter_expr); + + // replace BoundColumnRefs with BoundRefs + ExpressionFilter::ReplaceExpressionRecursive(filter_expr, *colref, ExpressionType::BOUND_COLUMN_REF); + expr_filter.expr = std::move(filter_expr); return propagate_result; } return filter.CheckStatistics(stats); @@ -46,6 +71,41 @@ void StatisticsPropagator::UpdateFilterStatistics(BaseStatistics &input, TableFi } } +static bool IsConstantOrNullFilter(TableFilter &table_filter) { + if (table_filter.filter_type != TableFilterType::EXPRESSION_FILTER) { + return false; + } + auto &expr_filter = table_filter.Cast(); + if (expr_filter.expr->type != ExpressionType::BOUND_FUNCTION) { + return false; + } + auto &func = expr_filter.expr->Cast(); + return ConstantOrNull::IsConstantOrNull(func, Value::BOOLEAN(true)); +} + +static bool CanReplaceConstantOrNull(TableFilter &table_filter) { + if (!IsConstantOrNullFilter(table_filter)) { + throw InternalException("CanReplaceConstantOrNull() called on unexepected Table Filter"); + } + D_ASSERT(table_filter.filter_type == TableFilterType::EXPRESSION_FILTER); + auto &expr_filter = table_filter.Cast(); + auto &func = expr_filter.expr->Cast(); + if (ConstantOrNull::IsConstantOrNull(func, Value::BOOLEAN(true))) { + for (auto child = ++func.children.begin(); child != func.children.end(); child++) { + switch (child->get()->type) { + case ExpressionType::BOUND_REF: + case ExpressionType::VALUE_CONSTANT: + continue; + default: + // expression type could be a function like Coalesce + return false; + } + } + } + // all children of constant or null are bound refs to the table filter column + return true; +} + unique_ptr StatisticsPropagator::PropagateStatistics(LogicalGet &get, unique_ptr &node_ptr) { if (get.function.cardinality) { @@ -99,10 +159,15 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalGet // erase this condition get.table_filters.filters.erase(table_filter_column); break; - case FilterPropagateResult::FILTER_TRUE_OR_NULL: + case FilterPropagateResult::FILTER_TRUE_OR_NULL: { + if (IsConstantOrNullFilter(*get.table_filters.filters[table_filter_column]) && + !CanReplaceConstantOrNull(*get.table_filters.filters[table_filter_column])) { + break; + } // filter is true or null; we can replace this with a not null filter get.table_filters.filters[table_filter_column] = make_uniq(); break; + } case FilterPropagateResult::FILTER_FALSE_OR_NULL: case FilterPropagateResult::FILTER_ALWAYS_FALSE: // filter is always false; this entire filter should be replaced by an empty result block diff --git a/src/duckdb/src/parser/statement/set_statement.cpp b/src/duckdb/src/parser/statement/set_statement.cpp index c0cd75a52..6c4138643 100644 --- a/src/duckdb/src/parser/statement/set_statement.cpp +++ b/src/duckdb/src/parser/statement/set_statement.cpp @@ -14,7 +14,7 @@ SetVariableStatement::SetVariableStatement(string name_p, unique_ptrCopy(), other.scope) { + : SetStatement(other), value(other.value->Copy()) { } unique_ptr SetVariableStatement::Copy() const { diff --git a/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp b/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp index 5731bf3a1..026226221 100644 --- a/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +++ b/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp @@ -445,10 +445,9 @@ unique_ptr ExpressionBinder::QualifyColumnNameWithManyDots(Col } unique_ptr ExpressionBinder::QualifyColumnName(ColumnRefExpression &col_ref, ErrorData &error) { - - // try binding as a lambda parameter if (!col_ref.IsQualified()) { - auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetName()); + // Try binding as a lambda parameter. + auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName()); if (lambda_ref) { return lambda_ref; } diff --git a/src/duckdb/src/planner/expression_binder/alter_binder.cpp b/src/duckdb/src/planner/expression_binder/alter_binder.cpp index 797e37111..128ba6541 100644 --- a/src/duckdb/src/planner/expression_binder/alter_binder.cpp +++ b/src/duckdb/src/planner/expression_binder/alter_binder.cpp @@ -38,10 +38,9 @@ string AlterBinder::UnsupportedAggregateMessage() { } BindResult AlterBinder::BindColumnReference(ColumnRefExpression &col_ref, idx_t depth) { - - // try binding as a lambda parameter if (!col_ref.IsQualified()) { - auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetName()); + // Try binding as a lambda parameter. + auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName()); if (lambda_ref) { return BindLambdaReference(lambda_ref->Cast(), depth); } diff --git a/src/duckdb/src/planner/expression_binder/having_binder.cpp b/src/duckdb/src/planner/expression_binder/having_binder.cpp index 6f0c96a06..902add5e2 100644 --- a/src/duckdb/src/planner/expression_binder/having_binder.cpp +++ b/src/duckdb/src/planner/expression_binder/having_binder.cpp @@ -44,9 +44,9 @@ BindResult HavingBinder::BindColumnRef(unique_ptr &expr_ptr, i auto col_ref = expr_ptr->Cast(); const auto &column_name = col_ref.GetColumnName(); - // Try binding as a lambda parameter if (!col_ref.IsQualified()) { - auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetName()); + // Try binding as a lambda parameter. + auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName()); if (lambda_ref) { return BindLambdaReference(lambda_ref->Cast(), depth); } diff --git a/src/duckdb/src/planner/expression_binder/table_function_binder.cpp b/src/duckdb/src/planner/expression_binder/table_function_binder.cpp index 71ccdefe4..3c3c1491c 100644 --- a/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +++ b/src/duckdb/src/planner/expression_binder/table_function_binder.cpp @@ -18,18 +18,18 @@ BindResult TableFunctionBinder::BindLambdaReference(LambdaRefExpression &expr, i BindResult TableFunctionBinder::BindColumnReference(unique_ptr &expr_ptr, idx_t depth, bool root_expression) { - // try binding as a lambda parameter auto &col_ref = expr_ptr->Cast(); if (!col_ref.IsQualified()) { - auto column_name = col_ref.GetName(); - auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, column_name); + // Try binding as a lambda parameter. + auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName()); if (lambda_ref) { return BindLambdaReference(lambda_ref->Cast(), depth); } - if (binder.macro_binding && binder.macro_binding->HasMatchingBinding(column_name)) { + if (binder.macro_binding && binder.macro_binding->HasMatchingBinding(col_ref.GetName())) { throw ParameterNotResolvedException(); } } + auto query_location = col_ref.GetQueryLocation(); auto column_names = col_ref.column_names; auto result_name = StringUtil::Join(column_names, "."); diff --git a/src/duckdb/src/planner/filter/expression_filter.cpp b/src/duckdb/src/planner/filter/expression_filter.cpp index e2ff7d442..8e9b3299f 100644 --- a/src/duckdb/src/planner/filter/expression_filter.cpp +++ b/src/duckdb/src/planner/filter/expression_filter.cpp @@ -36,13 +36,14 @@ string ExpressionFilter::ToString(const string &column_name) const { return ToExpression(*name_expr)->ToString(); } -static void ReplaceExpressionRecursive(unique_ptr &expr, const Expression &column) { - if (expr->type == ExpressionType::BOUND_REF) { +void ExpressionFilter::ReplaceExpressionRecursive(unique_ptr &expr, const Expression &column, + ExpressionType replace_type) { + if (expr->type == replace_type) { expr = column.Copy(); return; } ExpressionIterator::EnumerateChildren( - *expr, [&](unique_ptr &child) { ReplaceExpressionRecursive(child, column); }); + *expr, [&](unique_ptr &child) { ReplaceExpressionRecursive(child, column, replace_type); }); } unique_ptr ExpressionFilter::ToExpression(const Expression &column) const { diff --git a/src/duckdb/src/planner/table_filter.cpp b/src/duckdb/src/planner/table_filter.cpp index ee6a20ac0..df6ca7eee 100644 --- a/src/duckdb/src/planner/table_filter.cpp +++ b/src/duckdb/src/planner/table_filter.cpp @@ -58,7 +58,8 @@ bool DynamicTableFilterSet::HasFilters() const { unique_ptr DynamicTableFilterSet::GetFinalTableFilters(const PhysicalTableScan &scan, optional_ptr existing_filters) const { - D_ASSERT(HasFilters()); + lock_guard l(lock); + D_ASSERT(!filters.empty()); auto result = make_uniq(); if (existing_filters) { for (auto &entry : existing_filters->filters) { diff --git a/src/duckdb/src/storage/buffer/buffer_pool.cpp b/src/duckdb/src/storage/buffer/buffer_pool.cpp index d62788abf..f2e35ef1a 100644 --- a/src/duckdb/src/storage/buffer/buffer_pool.cpp +++ b/src/duckdb/src/storage/buffer/buffer_pool.cpp @@ -211,7 +211,7 @@ void EvictionQueue::PurgeIteration(const idx_t purge_size) { } // bulk purge - idx_t actually_dequeued = q.try_dequeue_bulk(purge_nodes.begin(), purge_size); + const idx_t actually_dequeued = q.try_dequeue_bulk(purge_nodes.begin(), purge_size); // retrieve all alive nodes that have been wrongly dequeued idx_t alive_nodes = 0; @@ -219,11 +219,13 @@ void EvictionQueue::PurgeIteration(const idx_t purge_size) { auto &node = purge_nodes[i]; auto handle = node.TryGetBlockHandle(); if (handle) { - q.enqueue(std::move(node)); - alive_nodes++; + purge_nodes[alive_nodes++] = std::move(node); } } + // bulk re-add (TODO order them by timestamp to better retain the LRU behavior) + q.enqueue_bulk(purge_nodes.begin(), alive_nodes); + total_dead_nodes -= actually_dequeued - alive_nodes; } diff --git a/src/duckdb/src/storage/compression/fsst.cpp b/src/duckdb/src/storage/compression/fsst.cpp index a008c612f..800079a80 100644 --- a/src/duckdb/src/storage/compression/fsst.cpp +++ b/src/duckdb/src/storage/compression/fsst.cpp @@ -64,7 +64,7 @@ struct FSSTStorage { static char *FetchStringPointer(StringDictionaryContainer dict, data_ptr_t baseptr, int32_t dict_offset); static bp_delta_offsets_t CalculateBpDeltaOffsets(int64_t last_known_row, idx_t start, idx_t scan_count); static bool ParseFSSTSegmentHeader(data_ptr_t base_ptr, duckdb_fsst_decoder_t *decoder_out, - bitpacking_width_t *width_out); + bitpacking_width_t *width_out, const idx_t block_size); static bp_delta_offsets_t StartScan(FSSTScanState &scan_state, data_ptr_t base_data, idx_t start, idx_t vector_count); static void EndScan(FSSTScanState &scan_state, bp_delta_offsets_t &offsets, idx_t start, idx_t scan_count); @@ -335,14 +335,15 @@ class FSSTCompressionState : public CompressionState { idx_t Finalize() { auto &buffer_manager = BufferManager::GetBufferManager(current_segment->db); auto handle = buffer_manager.Pin(current_segment->block); - D_ASSERT(current_dictionary.end == info.GetBlockSize()); + if (current_dictionary.end != info.GetBlockSize()) { + throw InternalException("dictionary end does not match the block size in FSSTCompressionState::Finalize"); + } // calculate sizes auto compressed_index_buffer_size = BitpackingPrimitives::GetRequiredSize(current_segment->count, current_width); auto total_size = sizeof(fsst_compression_header_t) + compressed_index_buffer_size + current_dictionary.size + fsst_serialized_symbol_table_size; - if (total_size != last_fitting_size) { throw InternalException("FSST string compression failed due to incorrect size calculation"); } @@ -365,8 +366,12 @@ class FSSTCompressionState : public CompressionState { memset(base_ptr + symbol_table_offset, 0, fsst_serialized_symbol_table_size); } - Store(NumericCast(symbol_table_offset), - data_ptr_cast(&header_ptr->fsst_symbol_table_offset)); + auto cast_symbol_table_offset = NumericCast(symbol_table_offset); + if (cast_symbol_table_offset > info.GetBlockSize()) { + throw InternalException("invalid fsst_symbol_table_offset in FSSTCompressionState::Finalize"); + } + + Store(cast_symbol_table_offset, data_ptr_cast(&header_ptr->fsst_symbol_table_offset)); Store((uint32_t)current_width, data_ptr_cast(&header_ptr->bitpacking_width)); if (total_size >= info.GetCompactionFlushLimit()) { @@ -563,15 +568,16 @@ struct FSSTScanState : public StringScanState { }; unique_ptr FSSTStorage::StringInitScan(ColumnSegment &segment) { - auto string_block_limit = StringUncompressed::GetStringBlockLimit(segment.GetBlockManager().GetBlockSize()); + auto block_size = segment.GetBlockManager().GetBlockSize(); + auto string_block_limit = StringUncompressed::GetStringBlockLimit(block_size); auto state = make_uniq(string_block_limit); auto &buffer_manager = BufferManager::GetBufferManager(segment.db); state->handle = buffer_manager.Pin(segment.block); auto base_ptr = state->handle.Ptr() + segment.GetBlockOffset(); state->duckdb_fsst_decoder = make_buffer(); - auto retval = ParseFSSTSegmentHeader( - base_ptr, reinterpret_cast(state->duckdb_fsst_decoder.get()), &state->current_width); + auto decoder = reinterpret_cast(state->duckdb_fsst_decoder.get()); + auto retval = ParseFSSTSegmentHeader(base_ptr, decoder, &state->current_width, block_size); if (!retval) { state->duckdb_fsst_decoder = nullptr; } @@ -736,7 +742,8 @@ void FSSTStorage::StringFetchRow(ColumnSegment &segment, ColumnFetchState &state duckdb_fsst_decoder_t decoder; bitpacking_width_t width; - auto have_symbol_table = ParseFSSTSegmentHeader(base_ptr, &decoder, &width); + auto block_size = segment.GetBlockManager().GetBlockSize(); + auto have_symbol_table = ParseFSSTSegmentHeader(base_ptr, &decoder, &width, block_size); auto result_data = FlatVector::GetData(result); if (!have_symbol_table) { @@ -814,9 +821,12 @@ char *FSSTStorage::FetchStringPointer(StringDictionaryContainer dict, data_ptr_t // Returns false if no symbol table was found. This means all strings are either empty or null bool FSSTStorage::ParseFSSTSegmentHeader(data_ptr_t base_ptr, duckdb_fsst_decoder_t *decoder_out, - bitpacking_width_t *width_out) { + bitpacking_width_t *width_out, const idx_t block_size) { auto header_ptr = reinterpret_cast(base_ptr); auto fsst_symbol_table_offset = Load(data_ptr_cast(&header_ptr->fsst_symbol_table_offset)); + if (fsst_symbol_table_offset > block_size) { + throw InternalException("invalid fsst_symbol_table_offset in FSSTStorage::ParseFSSTSegmentHeader"); + } *width_out = (bitpacking_width_t)(Load(data_ptr_cast(&header_ptr->bitpacking_width))); return duckdb_fsst_import(decoder_out, base_ptr + fsst_symbol_table_offset); } diff --git a/src/duckdb/src/storage/compression/roaring/compress.cpp b/src/duckdb/src/storage/compression/roaring/compress.cpp index 172cc331a..fc2ba3625 100644 --- a/src/duckdb/src/storage/compression/roaring/compress.cpp +++ b/src/duckdb/src/storage/compression/roaring/compress.cpp @@ -308,24 +308,30 @@ void RoaringCompressState::FlushSegment() { base_ptr += sizeof(idx_t); // Size of the 'd' part - idx_t data_size = NumericCast(data_ptr - base_ptr); - data_size = AlignValue(data_size); + auto unaligned_data_size = NumericCast(data_ptr - base_ptr); + auto data_size = AlignValue(unaligned_data_size); + data_ptr += data_size - unaligned_data_size; // Size of the 'm' part - idx_t metadata_size = metadata_collection.GetMetadataSizeForSegment(); - + auto metadata_size = metadata_collection.GetMetadataSizeForSegment(); if (current_segment->count.load() == 0) { D_ASSERT(metadata_size == 0); return; } - idx_t serialized_metadata_size = metadata_collection.Serialize(data_ptr); + auto serialized_metadata_size = metadata_collection.Serialize(data_ptr); + if (metadata_size != serialized_metadata_size) { + throw InternalException("mismatch in metadata size during RoaringCompressState::FlushSegment"); + } + metadata_collection.FlushSegment(); - (void)serialized_metadata_size; - D_ASSERT(metadata_size == serialized_metadata_size); - idx_t metadata_start = static_cast(data_ptr - base_ptr); + auto metadata_start = static_cast(data_ptr - base_ptr); + if (metadata_start > info.GetBlockSize()) { + throw InternalException("metadata start outside of block size during RoaringCompressState::FlushSegment"); + } + Store(metadata_start, handle.Ptr()); - idx_t total_segment_size = sizeof(idx_t) + data_size + metadata_size; + auto total_segment_size = sizeof(idx_t) + data_size + metadata_size; state.FlushSegment(std::move(current_segment), std::move(handle), total_segment_size); } diff --git a/src/duckdb/src/storage/compression/roaring/scan.cpp b/src/duckdb/src/storage/compression/roaring/scan.cpp index cefc4df72..24c9a43c9 100644 --- a/src/duckdb/src/storage/compression/roaring/scan.cpp +++ b/src/duckdb/src/storage/compression/roaring/scan.cpp @@ -203,11 +203,20 @@ void BitsetContainerScanState::Verify() const { RoaringScanState::RoaringScanState(ColumnSegment &segment) : segment(segment) { auto &buffer_manager = BufferManager::GetBufferManager(segment.db); handle = buffer_manager.Pin(segment.block); - auto base_ptr = handle.Ptr() + segment.GetBlockOffset(); + auto segment_size = segment.SegmentSize(); + auto segment_block_offset = segment.GetBlockOffset(); + if (segment_block_offset >= segment_size) { + throw InternalException("invalid segment_block_offset in RoaringScanState constructor"); + } + + auto base_ptr = handle.Ptr() + segment_block_offset; data_ptr = base_ptr + sizeof(idx_t); // Deserialize the container metadata for this segment auto metadata_offset = Load(base_ptr); + if (metadata_offset >= segment_size) { + throw InternalException("invalid metadata offset in RoaringScanState constructor"); + } auto metadata_ptr = data_ptr + metadata_offset; auto segment_count = segment.count.load(); diff --git a/src/duckdb/src/storage/data_table.cpp b/src/duckdb/src/storage/data_table.cpp index 8ec2831df..ec65f9fb7 100644 --- a/src/duckdb/src/storage/data_table.cpp +++ b/src/duckdb/src/storage/data_table.cpp @@ -415,7 +415,7 @@ TableStorageInfo DataTable::GetStorageInfo() { //===--------------------------------------------------------------------===// void DataTable::Fetch(DuckTransaction &transaction, DataChunk &result, const vector &column_ids, const Vector &row_identifiers, idx_t fetch_count, ColumnFetchState &state) { - auto lock = info->checkpoint_lock.GetSharedLock(); + auto lock = transaction.SharedLockTable(*info); row_groups->Fetch(transaction, result, column_ids, row_identifiers, fetch_count, state); } diff --git a/src/duckdb/src/storage/statistics/numeric_stats.cpp b/src/duckdb/src/storage/statistics/numeric_stats.cpp index 3e30e4d9a..803c21f12 100644 --- a/src/duckdb/src/storage/statistics/numeric_stats.cpp +++ b/src/duckdb/src/storage/statistics/numeric_stats.cpp @@ -375,7 +375,8 @@ Value NumericValueUnionToValue(const LogicalType &type, const NumericValueUnion } bool NumericStats::HasMinMax(const BaseStatistics &stats) { - return NumericStats::HasMin(stats) && NumericStats::HasMax(stats); + return NumericStats::HasMin(stats) && NumericStats::HasMax(stats) && + NumericStats::Min(stats) <= NumericStats::Max(stats); } bool NumericStats::HasMin(const BaseStatistics &stats) { diff --git a/src/duckdb/src/storage/storage_info.cpp b/src/duckdb/src/storage/storage_info.cpp index a59474e2b..bfa36e92b 100644 --- a/src/duckdb/src/storage/storage_info.cpp +++ b/src/duckdb/src/storage/storage_info.cpp @@ -81,6 +81,7 @@ static const StorageVersionInfo storage_version_info[] = { {"v1.2.2", 65}, {"v1.3.0", 66}, {"v1.3.1", 66}, + {"v1.3.2", 66}, {nullptr, 0} }; // END OF STORAGE VERSION INFO @@ -104,6 +105,7 @@ static const SerializationVersionInfo serialization_version_info[] = { {"v1.2.2", 4}, {"v1.3.0", 5}, {"v1.3.1", 5}, + {"v1.3.2", 5}, {"latest", 5}, {nullptr, 0} }; diff --git a/src/duckdb/src/storage/table/column_checkpoint_state.cpp b/src/duckdb/src/storage/table/column_checkpoint_state.cpp index 57ce46dce..9ceac7ab6 100644 --- a/src/duckdb/src/storage/table/column_checkpoint_state.cpp +++ b/src/duckdb/src/storage/table/column_checkpoint_state.cpp @@ -117,7 +117,9 @@ void ColumnCheckpointState::FlushSegment(unique_ptr segment, Buff void ColumnCheckpointState::FlushSegmentInternal(unique_ptr segment, idx_t segment_size) { auto block_size = partial_block_manager.GetBlockManager().GetBlockSize(); - D_ASSERT(segment_size <= block_size); + if (segment_size > block_size) { + throw InternalException("segment size exceeds block size in ColumnCheckpointState::FlushSegmentInternal"); + } auto tuple_count = segment->count.load(); if (tuple_count == 0) { // LCOV_EXCL_START diff --git a/src/duckdb/src/transaction/duck_transaction_manager.cpp b/src/duckdb/src/transaction/duck_transaction_manager.cpp index a114bdc2d..dc4997b50 100644 --- a/src/duckdb/src/transaction/duck_transaction_manager.cpp +++ b/src/duckdb/src/transaction/duck_transaction_manager.cpp @@ -269,14 +269,17 @@ ErrorData DuckTransactionManager::CommitTransaction(ClientContext &context, Tran if (!error.HasError()) { error = transaction.Commit(db, commit_id, std::move(commit_state)); } + if (error.HasError()) { - // commit unsuccessful: rollback the transaction instead + // COMMIT not successful: ROLLBACK. checkpoint_decision = CheckpointDecision(error.Message()); transaction.commit_id = 0; + auto rollback_error = transaction.Rollback(); if (rollback_error.HasError()) { - throw FatalException("Failed to rollback transaction. Cannot continue operation.\nError: %s", - rollback_error.Message()); + throw FatalException( + "Failed to rollback transaction. Cannot continue operation.\nOriginal Error: %s\nRollback Error: %s", + error.Message(), rollback_error.Message()); } } else { // check if catalog changes were made