diff --git a/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp b/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp index 31fb26cc9..775160215 100644 --- a/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +++ b/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp @@ -18,7 +18,7 @@ class DbpDecoder { : buffer_(buffer, buffer_len), // block_size_in_values(ParquetDecodeUtils::VarintDecode(buffer_)), - number_of_miniblocks_per_block(ParquetDecodeUtils::VarintDecode(buffer_)), + number_of_miniblocks_per_block(DecodeNumberOfMiniblocksPerBlock(buffer_)), number_of_values_in_a_miniblock(block_size_in_values / number_of_miniblocks_per_block), total_value_count(ParquetDecodeUtils::VarintDecode(buffer_)), previous_value(ParquetDecodeUtils::ZigzagToInt(ParquetDecodeUtils::VarintDecode(buffer_))), @@ -31,7 +31,7 @@ class DbpDecoder { number_of_values_in_a_miniblock % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE == 0)) { throw InvalidInputException("Parquet file has invalid block sizes for DELTA_BINARY_PACKED"); } - }; + } ByteBuffer BufferPtr() const { return buffer_; @@ -68,6 +68,15 @@ class DbpDecoder { } private: + static idx_t DecodeNumberOfMiniblocksPerBlock(ByteBuffer &buffer) { + auto res = ParquetDecodeUtils::VarintDecode(buffer); + if (res == 0) { + throw InvalidInputException( + "Parquet file has invalid number of miniblocks per block for DELTA_BINARY_PACKED"); + } + return res; + } + template void GetBatchInternal(const data_ptr_t target_values_ptr, const idx_t batch_size) { if (batch_size == 0) { diff --git a/src/duckdb/extension/parquet/parquet_reader.cpp b/src/duckdb/extension/parquet/parquet_reader.cpp index 8287b7b0b..5b5ab57c7 100644 --- a/src/duckdb/extension/parquet/parquet_reader.cpp +++ b/src/duckdb/extension/parquet/parquet_reader.cpp @@ -740,7 +740,9 @@ unique_ptr ParquetReader::ParseSchema(ClientContext &contex throw InvalidInputException("Root element of Parquet file must be a struct"); } D_ASSERT(next_schema_idx == file_meta_data->schema.size() - 1); - D_ASSERT(file_meta_data->row_groups.empty() || next_file_idx == file_meta_data->row_groups[0].columns.size()); + if (!file_meta_data->row_groups.empty() && next_file_idx != file_meta_data->row_groups[0].columns.size()) { + throw InvalidInputException("Parquet reader: row group does not have enough columns"); + } if (parquet_options.file_row_number) { for (auto &column : root.children) { auto &name = column.name; diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 63e285e80..7998c12ea 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "2-dev31" +#define DUCKDB_PATCH_VERSION "2-dev54" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 4 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.4.2-dev31" +#define DUCKDB_VERSION "v1.4.2-dev54" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "46beeea72f" +#define DUCKDB_SOURCE_ID "abd077cd1e" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp b/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp index c935a713a..acf60f80c 100644 --- a/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +++ b/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp @@ -26,6 +26,7 @@ applications, and to alter it and redistribute it freely, subject to the followi #include "duckdb/common/helper.hpp" #include "duckdb/common/types.hpp" #include "duckdb/common/unique_ptr.hpp" +#include "duckdb/common/operator/numeric_cast.hpp" #include #include diff --git a/src/duckdb/src/include/duckdb/main/attached_database.hpp b/src/duckdb/src/include/duckdb/main/attached_database.hpp index e55e972d4..6ddf489ca 100644 --- a/src/duckdb/src/include/duckdb/main/attached_database.hpp +++ b/src/duckdb/src/include/duckdb/main/attached_database.hpp @@ -35,9 +35,10 @@ enum class AttachedDatabaseType { class DatabaseFilePathManager; struct StoredDatabasePath { - StoredDatabasePath(DatabaseFilePathManager &manager, string path, const string &name); + StoredDatabasePath(DatabaseManager &db_manager, DatabaseFilePathManager &manager, string path, const string &name); ~StoredDatabasePath(); + DatabaseManager &db_manager; DatabaseFilePathManager &manager; string path; diff --git a/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp b/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp index f84b496dd..3c51efa2e 100644 --- a/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +++ b/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp @@ -13,21 +13,21 @@ #include "duckdb/common/case_insensitive_map.hpp" #include "duckdb/common/enums/on_create_conflict.hpp" #include "duckdb/common/enums/access_mode.hpp" +#include "duckdb/common/reference_map.hpp" namespace duckdb { struct AttachInfo; struct AttachOptions; +class DatabaseManager; enum class InsertDatabasePathResult { SUCCESS, ALREADY_EXISTS }; struct DatabasePathInfo { - explicit DatabasePathInfo(string name_p, AccessMode access_mode) - : name(std::move(name_p)), access_mode(access_mode), is_attached(true) { - } + DatabasePathInfo(DatabaseManager &manager, string name_p, AccessMode access_mode); string name; AccessMode access_mode; - bool is_attached; + reference_set_t attached_databases; idx_t reference_count = 1; }; @@ -35,12 +35,12 @@ struct DatabasePathInfo { class DatabaseFilePathManager { public: idx_t ApproxDatabaseCount() const; - InsertDatabasePathResult InsertDatabasePath(const string &path, const string &name, OnCreateConflict on_conflict, - AttachOptions &options); + InsertDatabasePathResult InsertDatabasePath(DatabaseManager &manager, const string &path, const string &name, + OnCreateConflict on_conflict, AttachOptions &options); //! Erase a database path - indicating we are done with using it void EraseDatabasePath(const string &path); //! Called when a database is detached, but before it is fully finished being used - void DetachDatabase(const string &path); + void DetachDatabase(DatabaseManager &manager, const string &path); private: //! The lock to add entries to the database path map diff --git a/src/duckdb/src/main/attached_database.cpp b/src/duckdb/src/main/attached_database.cpp index aaf061e2f..0e4e8529c 100644 --- a/src/duckdb/src/main/attached_database.cpp +++ b/src/duckdb/src/main/attached_database.cpp @@ -14,8 +14,9 @@ namespace duckdb { -StoredDatabasePath::StoredDatabasePath(DatabaseFilePathManager &manager, string path_p, const string &name) - : manager(manager), path(std::move(path_p)) { +StoredDatabasePath::StoredDatabasePath(DatabaseManager &db_manager, DatabaseFilePathManager &manager, string path_p, + const string &name) + : db_manager(db_manager), manager(manager), path(std::move(path_p)) { } StoredDatabasePath::~StoredDatabasePath() { @@ -23,7 +24,7 @@ StoredDatabasePath::~StoredDatabasePath() { } void StoredDatabasePath::OnDetach() { - manager.DetachDatabase(path); + manager.DetachDatabase(db_manager, path); } //===--------------------------------------------------------------------===// diff --git a/src/duckdb/src/main/database_file_path_manager.cpp b/src/duckdb/src/main/database_file_path_manager.cpp index f9e991827..2e107210a 100644 --- a/src/duckdb/src/main/database_file_path_manager.cpp +++ b/src/duckdb/src/main/database_file_path_manager.cpp @@ -5,28 +5,43 @@ namespace duckdb { +DatabasePathInfo::DatabasePathInfo(DatabaseManager &manager, string name_p, AccessMode access_mode) + : name(std::move(name_p)), access_mode(access_mode) { + attached_databases.insert(manager); +} + idx_t DatabaseFilePathManager::ApproxDatabaseCount() const { lock_guard path_lock(db_paths_lock); return db_paths.size(); } -InsertDatabasePathResult DatabaseFilePathManager::InsertDatabasePath(const string &path, const string &name, - OnCreateConflict on_conflict, +InsertDatabasePathResult DatabaseFilePathManager::InsertDatabasePath(DatabaseManager &manager, const string &path, + const string &name, OnCreateConflict on_conflict, AttachOptions &options) { if (path.empty() || path == IN_MEMORY_PATH) { return InsertDatabasePathResult::SUCCESS; } lock_guard path_lock(db_paths_lock); - auto entry = db_paths.emplace(path, DatabasePathInfo(name, options.access_mode)); + auto entry = db_paths.emplace(path, DatabasePathInfo(manager, name, options.access_mode)); if (!entry.second) { auto &existing = entry.first->second; + bool already_exists = false; + bool attached_in_this_system = false; + if (on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT && existing.name == name) { + already_exists = true; + attached_in_this_system = existing.attached_databases.find(manager) != existing.attached_databases.end(); + } if (options.access_mode == AccessMode::READ_ONLY && existing.access_mode == AccessMode::READ_ONLY) { + if (attached_in_this_system) { + return InsertDatabasePathResult::ALREADY_EXISTS; + } // all attaches are in read-only mode - there is no conflict, just increase the reference count + existing.attached_databases.insert(manager); existing.reference_count++; } else { - if (on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT && existing.name == name) { - if (existing.is_attached) { + if (already_exists) { + if (attached_in_this_system) { return InsertDatabasePathResult::ALREADY_EXISTS; } throw BinderException( @@ -40,7 +55,7 @@ InsertDatabasePathResult DatabaseFilePathManager::InsertDatabasePath(const strin name, path, existing.name); } } - options.stored_database_path = make_uniq(*this, path, name); + options.stored_database_path = make_uniq(manager, *this, path, name); return InsertDatabasePathResult::SUCCESS; } @@ -59,14 +74,14 @@ void DatabaseFilePathManager::EraseDatabasePath(const string &path) { } } -void DatabaseFilePathManager::DetachDatabase(const string &path) { +void DatabaseFilePathManager::DetachDatabase(DatabaseManager &manager, const string &path) { if (path.empty() || path == IN_MEMORY_PATH) { return; } lock_guard path_lock(db_paths_lock); auto entry = db_paths.find(path); if (entry != db_paths.end()) { - entry->second.is_attached = false; + entry->second.attached_databases.erase(manager); } } diff --git a/src/duckdb/src/main/database_manager.cpp b/src/duckdb/src/main/database_manager.cpp index 003e440da..848f080fc 100644 --- a/src/duckdb/src/main/database_manager.cpp +++ b/src/duckdb/src/main/database_manager.cpp @@ -199,7 +199,7 @@ idx_t DatabaseManager::ApproxDatabaseCount() { } InsertDatabasePathResult DatabaseManager::InsertDatabasePath(const AttachInfo &info, AttachOptions &options) { - return path_manager->InsertDatabasePath(info.path, info.name, info.on_conflict, options); + return path_manager->InsertDatabasePath(*this, info.path, info.name, info.on_conflict, options); } vector DatabaseManager::GetAttachedDatabasePaths() {