Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/duckdb/extension/json/json_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,13 @@ void JsonExtension::Load(DuckDB &db) {

// JSON copy function
auto copy_fun = JSONFunctions::GetJSONCopyFunction();
ExtensionUtil::RegisterFunction(db_instance, std::move(copy_fun));
ExtensionUtil::RegisterFunction(db_instance, copy_fun);
copy_fun.extension = "ndjson";
copy_fun.name = "ndjson";
ExtensionUtil::RegisterFunction(db_instance, copy_fun);
copy_fun.extension = "jsonl";
copy_fun.name = "jsonl";
ExtensionUtil::RegisterFunction(db_instance, copy_fun);

// JSON macro's
for (idx_t index = 0; json_macros[index].name != nullptr; index++) {
Expand Down
5 changes: 5 additions & 0 deletions src/duckdb/src/catalog/catalog_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,11 @@ void CatalogSet::Scan(const std::function<void(CatalogEntry &)> &callback) {
}
}

void CatalogSet::SetDefaultGenerator(unique_ptr<DefaultGenerator> defaults_p) {
lock_guard<mutex> lock(catalog_lock);
defaults = std::move(defaults_p);
}

void CatalogSet::Verify(Catalog &catalog_p) {
D_ASSERT(&catalog_p == &catalog);
vector<reference<CatalogEntry>> entries;
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/catalog/duck_catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ void DuckCatalog::Initialize(bool load_builtin) {
CreateSchemaInfo info;
info.schema = DEFAULT_SCHEMA;
info.internal = true;
info.on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
CreateSchema(data, info);

if (load_builtin) {
Expand Down
7 changes: 4 additions & 3 deletions src/duckdb/src/common/enum_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1032,19 +1032,20 @@ const StringUtil::EnumStringLiteral *GetDataFileTypeValues() {
{ static_cast<uint32_t>(DataFileType::FILE_DOES_NOT_EXIST), "FILE_DOES_NOT_EXIST" },
{ static_cast<uint32_t>(DataFileType::DUCKDB_FILE), "DUCKDB_FILE" },
{ static_cast<uint32_t>(DataFileType::SQLITE_FILE), "SQLITE_FILE" },
{ static_cast<uint32_t>(DataFileType::PARQUET_FILE), "PARQUET_FILE" }
{ static_cast<uint32_t>(DataFileType::PARQUET_FILE), "PARQUET_FILE" },
{ static_cast<uint32_t>(DataFileType::UNKNOWN_FILE), "UNKNOWN_FILE" }
};
return values;
}

template<>
const char* EnumUtil::ToChars<DataFileType>(DataFileType value) {
return StringUtil::EnumToString(GetDataFileTypeValues(), 4, "DataFileType", static_cast<uint32_t>(value));
return StringUtil::EnumToString(GetDataFileTypeValues(), 5, "DataFileType", static_cast<uint32_t>(value));
}

template<>
DataFileType EnumUtil::FromString<DataFileType>(const char *value) {
return static_cast<DataFileType>(StringUtil::StringToEnum(GetDataFileTypeValues(), 4, "DataFileType", value));
return static_cast<DataFileType>(StringUtil::StringToEnum(GetDataFileTypeValues(), 5, "DataFileType", value));
}

const StringUtil::EnumStringLiteral *GetDateCastResultValues() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ bool LineError::HandleErrors(StringValueResult &result) {
default:
throw InvalidInputException("CSV Error not allowed when inserting row");
}
result.error_handler.Error(csv_error);
result.error_handler.Error(csv_error, result.try_row);
}
if (is_error_in_line && scan_id != StringValueScanner::LINE_FINDER_ID) {
if (result.sniffing) {
Expand Down Expand Up @@ -777,7 +777,7 @@ void StringValueResult::NullPaddingQuotedNewlineCheck() const {
// If we have null_padding set, we found a quoted new line, we are scanning the file in parallel; We error.
LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
auto csv_error = CSVError::NullPaddingFail(state_machine.options, lines_per_batch, path);
error_handler.Error(csv_error);
error_handler.Error(csv_error, try_row);
}
}

Expand Down Expand Up @@ -847,13 +847,13 @@ bool StringValueResult::AddRowInternal() {
state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
last_position.GetGlobalPosition(requested_size, first_nl), path);
error_handler.Error(csv_error);
error_handler.Error(csv_error, try_row);
} else {
auto csv_error = CSVError::IncorrectColumnAmountError(
state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
last_position.GetGlobalPosition(requested_size), path);
error_handler.Error(csv_error);
error_handler.Error(csv_error, try_row);
}
}
// If we are here we ignore_errors, so we delete this line
Expand Down Expand Up @@ -966,6 +966,7 @@ StringValueScanner::StringValueScanner(idx_t scanner_idx_p, const shared_ptr<CSV
lines_read += csv_file_scan->skipped_rows;
}
iterator.buffer_size = state_machine->options.buffer_size_option.GetValue();
result.try_row = scanner_idx == LINE_FINDER_ID;
}

StringValueScanner::StringValueScanner(const shared_ptr<CSVBufferManager> &buffer_manager,
Expand Down Expand Up @@ -1710,19 +1711,24 @@ bool StringValueScanner::IsRowValid(CSVIterator &current_iterator) const {
return false;
}
constexpr idx_t result_size = 1;
auto scan_finder = make_uniq<StringValueScanner>(StringValueScanner::LINE_FINDER_ID, buffer_manager,
state_machine_strict, make_shared_ptr<CSVErrorHandler>(),
csv_file_scan, false, current_iterator, result_size);
auto &tuples = scan_finder->ParseChunk();
current_iterator.pos = scan_finder->GetIteratorPosition();
bool has_error = false;
if (tuples.current_errors.HasError()) {
if (tuples.current_errors.Size() != 1 || !tuples.current_errors.HasErrorType(MAXIMUM_LINE_SIZE)) {
// We ignore maximum line size errors
has_error = true;
}
}
return (tuples.number_of_rows == 1 || tuples.first_line_is_comment) && !has_error && tuples.borked_rows.empty();
auto scan_finder = make_uniq<StringValueScanner>(LINE_FINDER_ID, buffer_manager, state_machine_strict,
make_shared_ptr<CSVErrorHandler>(), csv_file_scan, false,
current_iterator, result_size);
try {
auto &tuples = scan_finder->ParseChunk();
current_iterator.pos = scan_finder->GetIteratorPosition();
bool has_error = false;
if (tuples.current_errors.HasError()) {
if (tuples.current_errors.Size() != 1 || !tuples.current_errors.HasErrorType(MAXIMUM_LINE_SIZE)) {
// We ignore maximum line size errors
has_error = true;
}
}
return (tuples.number_of_rows == 1 || tuples.first_line_is_comment) && !has_error && tuples.borked_rows.empty();
} catch (const Exception &e) {
return false;
}
return true;
}

ValidRowInfo StringValueScanner::TryRow(CSVState state, idx_t start_pos, idx_t end_pos) const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void CSVErrorHandler::ThrowError(const CSVError &csv_error) {

void CSVErrorHandler::Error(const CSVError &csv_error, bool force_error) {
lock_guard<mutex> parallel_lock(main_mutex);
if ((ignore_errors && !force_error) || (PrintLineNumber(csv_error) && !CanGetLine(csv_error.GetBoundaryIndex()))) {
if (!force_error && (ignore_errors || (PrintLineNumber(csv_error) && !CanGetLine(csv_error.GetBoundaryIndex())))) {
// We store this error, we can't throw it now, or we are ignoring it
errors.push_back(csv_error);
return;
Expand Down
14 changes: 0 additions & 14 deletions src/duckdb/src/execution/operator/schema/physical_attach.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,20 +62,6 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c
}
}

string extension = "";
if (FileSystem::IsRemoteFile(path, extension)) {
if (!ExtensionHelper::TryAutoLoadExtension(context.client, extension)) {
throw MissingExtensionException("Attaching path '%s' requires extension '%s' to be loaded", path,
extension);
}
if (options.access_mode == AccessMode::AUTOMATIC) {
// Attaching of remote files gets bumped to READ_ONLY
// This is due to the fact that on most (all?) remote files writes to DB are not available
// and having this raised later is not super helpful
options.access_mode = AccessMode::READ_ONLY;
}
}

// Get the database type and attach the database.
db_manager.GetDatabaseType(context.client, *info, config, options);
auto attached_db = db_manager.AttachDatabase(context.client, *info, options);
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "0-dev3309"
#define DUCKDB_PATCH_VERSION "0-dev3365"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 3
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.3.0-dev3309"
#define DUCKDB_VERSION "v1.3.0-dev3365"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "027bc16ee8"
#define DUCKDB_SOURCE_ID "fda0ba6a7a"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb/catalog/catalog_entry/dschema_catalog_entry.hpp
// duckdb/catalog/catalog_entry/duck_schema_entry.hpp
//
//
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -70,11 +70,10 @@ class DuckSchemaEntry : public SchemaCatalogEntry {

void Verify(Catalog &catalog) override;

private:
void OnDropEntry(CatalogTransaction transaction, CatalogEntry &entry);

private:
//! Get the catalog set for the specified type
CatalogSet &GetCatalogSet(CatalogType type);

private:
void OnDropEntry(CatalogTransaction transaction, CatalogEntry &entry);
};
} // namespace duckdb
3 changes: 3 additions & 0 deletions src/duckdb/src/include/duckdb/catalog/catalog_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ class CatalogSet {

void Verify(Catalog &catalog);

//! Override the default generator - this should not be used after the catalog set has been used
void SetDefaultGenerator(unique_ptr<DefaultGenerator> defaults);

private:
bool DropDependencies(CatalogTransaction transaction, const string &name, bool cascade,
bool allow_drop_internal = false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@ class StringValueResult : public ScannerResult {
bool added_last_line = false;
bool quoted_new_line = false;

//! If we are trying a row or not when figuring out the next row to start from.
bool try_row = false;

unsafe_unique_array<ParseTypeInfo> parse_types;
vector<string> names;

Expand Down Expand Up @@ -376,7 +379,7 @@ class StringValueScanner : public BaseScanner {
idx_t start_pos;
//! Pointer to the previous buffer handle, necessary for over-buffer values
shared_ptr<CSVBufferHandle> previous_buffer_handle;
//! Strict state machine, is basically a state machine with rfc 4180 set to true, used to figure out new line.
//! Strict state machine is basically a state machine with rfc 4180 set to true, used to figure out a new line.
shared_ptr<CSVStateMachine> state_machine_strict;
};

Expand Down
5 changes: 2 additions & 3 deletions src/duckdb/src/include/duckdb/main/attached_database.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,10 @@ class AttachedDatabase : public CatalogEntry {
//! Create the built-in system database (without storage).
explicit AttachedDatabase(DatabaseInstance &db, AttachedDatabaseType type = AttachedDatabaseType::SYSTEM_DATABASE);
//! Create an attached database instance with the specified name and storage.
AttachedDatabase(DatabaseInstance &db, Catalog &catalog, string name, string file_path,
const AttachOptions &options);
AttachedDatabase(DatabaseInstance &db, Catalog &catalog, string name, string file_path, AttachOptions &options);
//! Create an attached database instance with the specified storage extension.
AttachedDatabase(DatabaseInstance &db, Catalog &catalog, StorageExtension &ext, ClientContext &context, string name,
const AttachInfo &info, const AttachOptions &options);
AttachInfo &info, AttachOptions &options);
~AttachedDatabase() override;

//! Initializes the catalog and storage of the attached database.
Expand Down
4 changes: 2 additions & 2 deletions src/duckdb/src/include/duckdb/main/database.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ class DatabaseInstance : public enable_shared_from_this<DatabaseInstance> {

DUCKDB_API SettingLookupResult TryGetCurrentSetting(const string &key, Value &result) const;

unique_ptr<AttachedDatabase> CreateAttachedDatabase(ClientContext &context, const AttachInfo &info,
const AttachOptions &options);
unique_ptr<AttachedDatabase> CreateAttachedDatabase(ClientContext &context, AttachInfo &info,
AttachOptions &options);

void AddExtensionInfo(const string &name, const ExtensionLoadedInfo &info);

Expand Down
3 changes: 1 addition & 2 deletions src/duckdb/src/include/duckdb/main/database_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ class DatabaseManager {
//! Get an attached database by its name
optional_ptr<AttachedDatabase> GetDatabase(ClientContext &context, const string &name);
//! Attach a new database
optional_ptr<AttachedDatabase> AttachDatabase(ClientContext &context, const AttachInfo &info,
const AttachOptions &options);
optional_ptr<AttachedDatabase> AttachDatabase(ClientContext &context, AttachInfo &info, AttachOptions &options);
//! Detach an existing database
void DetachDatabase(ClientContext &context, const string &name, OnEntryNotFound if_not_found);
//! Returns a reference to the system catalog
Expand Down
10 changes: 7 additions & 3 deletions src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "duckdb/common/unordered_map.hpp"
#include "duckdb/common/types/value.hpp"
#include "duckdb/common/case_insensitive_map.hpp"
#include "duckdb/parser/query_node.hpp"

namespace duckdb {

Expand All @@ -23,7 +24,7 @@ struct CopyInfo : public ParseInfo {
static constexpr const ParseInfoType TYPE = ParseInfoType::COPY_INFO;

public:
CopyInfo() : ParseInfo(TYPE), catalog(INVALID_CATALOG), schema(DEFAULT_SCHEMA) {
CopyInfo() : ParseInfo(TYPE), catalog(INVALID_CATALOG), schema(DEFAULT_SCHEMA), is_format_auto_detected(true) {
}

//! The catalog name to copy to/from
Expand All @@ -38,15 +39,18 @@ struct CopyInfo : public ParseInfo {
bool is_from;
//! The file format of the external file
string format;
//! If the format is manually set (i.e., via the format parameter) or was discovered by inspecting the file path
bool is_format_auto_detected;
//! The file path to copy to/from
string file_path;
//! Set of (key, value) options
case_insensitive_map_t<vector<Value>> options;
// The SQL statement used instead of a table when copying data out to a file
//! The SQL statement used instead of a table when copying data out to a file
unique_ptr<QueryNode> select_statement;

public:
static string CopyOptionsToString(const string &format, const case_insensitive_map_t<vector<Value>> &options);
static string CopyOptionsToString(const string &format, bool is_format_auto_detected,
const case_insensitive_map_t<vector<Value>> &options);

public:
unique_ptr<CopyInfo> Copy() const;
Expand Down
3 changes: 2 additions & 1 deletion src/duckdb/src/include/duckdb/storage/magic_bytes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ enum class DataFileType : uint8_t {
FILE_DOES_NOT_EXIST, // file does not exist
DUCKDB_FILE, // duckdb database file
SQLITE_FILE, // sqlite database file
PARQUET_FILE // parquet file
PARQUET_FILE, // parquet file
UNKNOWN_FILE // unknown file type
};

class MagicBytes {
Expand Down
4 changes: 4 additions & 0 deletions src/duckdb/src/include/duckdb/storage/storage_extension.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,8 @@ class StorageExtension {
}
};

struct OpenFileStorageExtension {
static unique_ptr<StorageExtension> Create();
};

} // namespace duckdb
10 changes: 4 additions & 6 deletions src/duckdb/src/main/attached_database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ AttachedDatabase::AttachedDatabase(DatabaseInstance &db, AttachedDatabaseType ty
}

AttachedDatabase::AttachedDatabase(DatabaseInstance &db, Catalog &catalog_p, string name_p, string file_path_p,
const AttachOptions &options)
AttachOptions &options)
: CatalogEntry(CatalogType::DATABASE_ENTRY, catalog_p, std::move(name_p)), db(db), parent_catalog(&catalog_p) {

if (options.access_mode == AccessMode::READ_ONLY) {
Expand Down Expand Up @@ -116,19 +116,17 @@ AttachedDatabase::AttachedDatabase(DatabaseInstance &db, Catalog &catalog_p, str
}

AttachedDatabase::AttachedDatabase(DatabaseInstance &db, Catalog &catalog_p, StorageExtension &storage_extension_p,
ClientContext &context, string name_p, const AttachInfo &info,
const AttachOptions &options)
ClientContext &context, string name_p, AttachInfo &info, AttachOptions &options)
: CatalogEntry(CatalogType::DATABASE_ENTRY, catalog_p, std::move(name_p)), db(db), parent_catalog(&catalog_p),
storage_extension(&storage_extension_p) {
StorageExtensionInfo *storage_info = storage_extension->storage_info.get();
catalog = storage_extension->attach(storage_info, context, *this, name, info, options.access_mode);

if (options.access_mode == AccessMode::READ_ONLY) {
type = AttachedDatabaseType::READ_ONLY_DATABASE;
} else {
type = AttachedDatabaseType::READ_WRITE_DATABASE;
}

StorageExtensionInfo *storage_info = storage_extension->storage_info.get();
catalog = storage_extension->attach(storage_info, context, *this, name, *info.Copy(), options.access_mode);
if (!catalog) {
throw InternalException("AttachedDatabase - attach function did not return a catalog");
}
Expand Down
Loading