From d30417356f4cebaeba1b4404ebb6a2a8306abf8e Mon Sep 17 00:00:00 2001 From: Alex Kasko Date: Sun, 1 Jun 2025 15:18:42 +0100 Subject: [PATCH] Use default alias and DB pinning for DuckLake This PR adds the following changes to DuckLake init: - if `ducklake_alias` option is not specified, then default value `ducklake` is used for it - when default URL `jdbc:duckdb:` or explicit memory URL `jdbc:duckdb:memory:` is used, then this URL is changed to `jdbc:duckdb:memory:ducklakemem` to be able to share this DB across multiple connections - DB pinning is enabled automatically (unless disabled by user) - Result set streaming is enabeld automatically (unless disabled by user) - DuckLake initialization is done only once for each DB (and each DuckLake path), `ATTACH IF NOT EXISTS` call is changed to `ATTACH` Testing: test coverage pending, DuckLake is not yet available in `main` branch. --- src/main/java/org/duckdb/DuckDBDriver.java | 82 +++++++++++++--------- src/main/java/org/duckdb/JdbcUtils.java | 18 ++++- 2 files changed, 65 insertions(+), 35 deletions(-) diff --git a/src/main/java/org/duckdb/DuckDBDriver.java b/src/main/java/org/duckdb/DuckDBDriver.java index 9117d00ae..94bdc9ad3 100644 --- a/src/main/java/org/duckdb/DuckDBDriver.java +++ b/src/main/java/org/duckdb/DuckDBDriver.java @@ -18,12 +18,15 @@ public class DuckDBDriver implements java.sql.Driver { public static final String JDBC_PIN_DB = "jdbc_pin_db"; static final String DUCKDB_URL_PREFIX = "jdbc:duckdb:"; + static final String MEMORY_DB = ":memory:"; private static final String DUCKLAKE_OPTION = "ducklake"; private static final String DUCKLAKE_ALIAS_OPTION = "ducklake_alias"; private static final Pattern DUCKLAKE_ALIAS_OPTION_PATTERN = Pattern.compile("[a-zA-Z0-9_]+"); private static final String DUCKLAKE_URL_PREFIX = "ducklake:"; - private static final ReentrantLock DUCKLAKE_INIT_LOCK = new ReentrantLock(); + private static final String DUCKLAKE_DEFAULT_DBNAME = MEMORY_DB + "ducklakemem"; + private static final LinkedHashSet ducklakeInstances = new LinkedHashSet<>(); + private static final ReentrantLock ducklakeInitLock = new ReentrantLock(); private static final LinkedHashMap pinnedDbRefs = new LinkedHashMap<>(); private static final ReentrantLock pinnedDbRefsLock = new ReentrantLock(); @@ -42,39 +45,52 @@ public Connection connect(String url, Properties info) throws SQLException { if (!acceptsURL(url)) { return null; } + final Properties props; if (info == null) { - info = new Properties(); + props = new Properties(); } else { // make a copy because we're removing the read only property below - info = (Properties) info.clone(); + props = (Properties) info.clone(); } ParsedProps pp = parsePropsFromUrl(url); for (Map.Entry en : pp.props.entrySet()) { - info.put(en.getKey(), en.getValue()); + props.put(en.getKey(), en.getValue()); } - url = pp.shortUrl; - String readOnlyStr = removeOption(info, DUCKDB_READONLY_PROPERTY); + String readOnlyStr = removeOption(props, DUCKDB_READONLY_PROPERTY); boolean readOnly = isStringTruish(readOnlyStr, false); - info.put("duckdb_api", "jdbc"); + props.put("duckdb_api", "jdbc"); // Apache Spark passes this option when SELECT on a JDBC DataSource // table is performed. It is the internal Spark option and is likely // passed by mistake, so we need to ignore it to allow the connection // to be established. - info.remove("path"); + props.remove("path"); - String pinDbOptStr = removeOption(info, JDBC_PIN_DB); - boolean pinDBOpt = isStringTruish(pinDbOptStr, false); + String ducklake = removeOption(props, DUCKLAKE_OPTION); + String ducklakeAlias = removeOption(props, DUCKLAKE_ALIAS_OPTION, DUCKLAKE_OPTION); + final String shortUrl; + if (null != ducklake) { + setDefaultOptionValue(props, JDBC_PIN_DB, true); + setDefaultOptionValue(props, JDBC_STREAM_RESULTS, true); + String dbName = dbNameFromUrl(pp.shortUrl); + if (MEMORY_DB.equals(dbName)) { + shortUrl = DUCKDB_URL_PREFIX + DUCKLAKE_DEFAULT_DBNAME; + } else { + shortUrl = pp.shortUrl; + } + } else { + shortUrl = pp.shortUrl; + } - String ducklake = removeOption(info, DUCKLAKE_OPTION); - String ducklakeAlias = removeOption(info, DUCKLAKE_ALIAS_OPTION); + String pinDbOptStr = removeOption(props, JDBC_PIN_DB); + boolean pinDBOpt = isStringTruish(pinDbOptStr, false); - DuckDBConnection conn = DuckDBConnection.newConnection(url, readOnly, info); + DuckDBConnection conn = DuckDBConnection.newConnection(shortUrl, readOnly, props); - pinDB(pinDBOpt, url, conn); + pinDB(pinDBOpt, shortUrl, conn); - initDucklake(conn, ducklake, ducklakeAlias); + initDucklake(conn, shortUrl, ducklake, ducklakeAlias); return conn; } @@ -104,23 +120,29 @@ public Logger getParentLogger() throws SQLFeatureNotSupportedException { throw new SQLFeatureNotSupportedException("no logger"); } - private static void initDucklake(Connection conn, String ducklake, String ducklakeAlias) throws SQLException { + private static void initDucklake(Connection conn, String url, String ducklake, String ducklakeAlias) + throws SQLException { if (null == ducklake) { return; } - DUCKLAKE_INIT_LOCK.lock(); + ducklakeInitLock.lock(); try { - String attachQuery = createAttachQuery(ducklake, ducklakeAlias); - try (Statement stmt = conn.createStatement()) { - stmt.execute("INSTALL ducklake"); - stmt.execute("LOAD ducklake"); - stmt.execute(attachQuery); - if (null != ducklakeAlias) { - stmt.execute("USE " + ducklakeAlias); + String dbName = dbNameFromUrl(url); + String key = dbName + "#" + ducklake; + if (!ducklakeInstances.contains(key)) { + String attachQuery = createAttachQuery(ducklake, ducklakeAlias); + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSTALL ducklake"); + stmt.execute("LOAD ducklake"); + stmt.execute(attachQuery); } + ducklakeInstances.add(key); } } finally { - DUCKLAKE_INIT_LOCK.unlock(); + ducklakeInitLock.unlock(); + } + try (Statement stmt = conn.createStatement()) { + stmt.execute("USE " + ducklakeAlias); } } @@ -129,14 +151,10 @@ private static String createAttachQuery(String ducklake, String ducklakeAlias) t if (!ducklake.startsWith(DUCKLAKE_URL_PREFIX)) { ducklake = DUCKLAKE_URL_PREFIX + ducklake; } - String query = "ATTACH IF NOT EXISTS '" + ducklake + "'"; - if (null != ducklakeAlias) { - if (!DUCKLAKE_ALIAS_OPTION_PATTERN.matcher(ducklakeAlias).matches()) { - throw new SQLException("Invalid DuckLake alias specified: " + ducklakeAlias); - } - query += " AS " + ducklakeAlias; + if (!DUCKLAKE_ALIAS_OPTION_PATTERN.matcher(ducklakeAlias).matches()) { + throw new SQLException("Invalid DuckLake alias specified: " + ducklakeAlias); } - return query; + return "ATTACH '" + ducklake + "' AS " + ducklakeAlias; } private static ParsedProps parsePropsFromUrl(String url) throws SQLException { diff --git a/src/main/java/org/duckdb/JdbcUtils.java b/src/main/java/org/duckdb/JdbcUtils.java index 8186e1857..536c22a58 100644 --- a/src/main/java/org/duckdb/JdbcUtils.java +++ b/src/main/java/org/duckdb/JdbcUtils.java @@ -1,6 +1,7 @@ package org.duckdb; import static org.duckdb.DuckDBDriver.DUCKDB_URL_PREFIX; +import static org.duckdb.DuckDBDriver.MEMORY_DB; import java.sql.SQLException; import java.util.Properties; @@ -19,11 +20,22 @@ static T unwrap(Object obj, Class iface) throws SQLException { } static String removeOption(Properties props, String opt) { + return removeOption(props, opt, null); + } + + static String removeOption(Properties props, String opt, String defaultVal) { Object obj = props.remove(opt); if (null != obj) { return obj.toString().trim(); } - return null; + return defaultVal; + } + + static void setDefaultOptionValue(Properties props, String opt, Object value) { + if (props.contains(opt)) { + return; + } + props.put(opt, value); } static boolean isStringTruish(String val, boolean defaultVal) throws SQLException { @@ -56,9 +68,9 @@ static String dbNameFromUrl(String url) throws SQLException { } String dbName = shortUrl.substring(DUCKDB_URL_PREFIX.length()).trim(); if (dbName.length() == 0) { - dbName = ":memory:"; + dbName = MEMORY_DB; } - if (dbName.startsWith("memory:")) { + if (dbName.startsWith(MEMORY_DB.substring(1))) { dbName = ":" + dbName; } return dbName;