Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 63 additions & 4 deletions .github/workflows/Java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ env:
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DUCKDB_STAGING_KEY }}

jobs:

format-check:
name: Format Check
runs-on: ubuntu-latest
Expand All @@ -31,13 +32,29 @@ jobs:
needs: format-check
env:
MANYLINUX_IMAGE: quay.io/pypa/manylinux_2_28_x86_64
# Spark testing env vars below
DUCKDB_JDBC_JAR: ${{ github.workspace }}/build/release/duckdb_jdbc.jar
SPARK_SQL_EXE: ${{ github.workspace }}/sparktest/spark-3.5.3-bin-hadoop3/bin/spark-sql
POSTGRES_HOST: 127.0.0.1
POSTGRES_PORT: 5432
POSTGRES_MAINTENANCE_DB: postgres
POSTGRES_USERNAME: postgres
POSTGRES_PASSWORD: postgres
DUCKLAKE_CATALOG_DB: lake_test
PARQUET_FILE_URL: https://blobs.duckdb.org/data/taxi_2019_04.parquet
SESSION_INIT_SQL_FILE: ${{ github.workspace }}/sparktest/spark-session-init.sql
MINIO_EXE: ${{ github.workspace }}/sparktest/minio
MINIO_PID: ${{ github.workspace }}/sparktest/minio.pid
MC_EXE: ${{ github.workspace }}/sparktest/mc
MINIO_DATA: ${{ github.workspace }}/sparktest/minio_data
MINIO_HOST: 127.0.0.1
MINIO_PORT: 9000
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Build
shell: bash
run: |
docker run \
-v.:/duckdb \
Expand All @@ -56,7 +73,6 @@ jobs:
"

- name: JDBC Tests EL8
shell: bash
if: ${{ inputs.skip_tests != 'true' }}
run: |
docker run \
Expand All @@ -76,7 +92,6 @@ jobs:
"

- name: JDBC Tests
shell: bash
if: ${{ inputs.skip_tests != 'true' }}
run: |
cat /etc/os-release
Expand All @@ -102,7 +117,6 @@ jobs:

- name: CTS tests
if: ${{ inputs.skip_tests != 'true' }}
shell: bash
run: |
docker run \
-v.:/duckdb \
Expand All @@ -118,6 +132,51 @@ jobs:
make -C /duckdb/jdbc_compatibility_test_suite_runner test
"

- name: Spark Test Resources
run: |
mkdir sparktest
cd sparktest
cmake ..

- name: Setup Postgres
uses: ikalnytskyi/action-setup-postgres@v7
with:
postgres-version: '17'
username: ${{ env.POSTGRES_USERNAME }}
password: ${{ env.POSTGRES_PASSWORD }}
database: ${{ env.POSTGRES_MAINTENANCE_DB }}
port: ${{ env.POSTGRES_PORT }}
ssl: true

- name: Setup Minio
working-directory: sparktest
run: |
wget -nv https://dl.min.io/server/minio/release/linux-amd64/minio
chmod +x minio
./minio --version
wget -nv https://dl.min.io/client/mc/release/linux-amd64/mc
chmod +x mc
./mc --version
java -version
java ${{ github.workspace }}/src/test/external/SetupMinio.java

- name: Setup DuckLake
working-directory: sparktest
run: |
cat ${{ env.SESSION_INIT_SQL_FILE }}
wget -nv https://github.com/pgjdbc/pgjdbc/releases/download/REL42.7.7/postgresql-42.7.7.jar
java -version
java -cp ${{ env.DUCKDB_JDBC_JAR }}:postgresql-42.7.7.jar ${{ github.workspace }}/src/test/external/SetupDuckLake.java

- name: Spark Tests
working-directory: sparktest
run: |
wget -nv https://blobs.duckdb.org/ci/spark-3.5.3-bin-hadoop3.tgz
tar xf spark-3.5.3-bin-hadoop3.tgz
cat spark-test.sql
java -version
java ${{ github.workspace }}/src/test/external/RunSpark.java spark-test.sql

- name: Deploy
shell: bash
run: |
Expand Down
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -658,3 +658,12 @@ add_custom_command(
$<TARGET_FILE_DIR:duckdb_java> $<TARGET_FILE_NAME:duckdb_java>)

add_custom_target(jdbc ALL DEPENDS dummy_jdbc_target)

# test resources

configure_file(
src/test/external/spark-session-init.sql
spark-session-init.sql)
configure_file(
src/test/external/spark-test.sql
spark-test.sql)
9 changes: 9 additions & 0 deletions CMakeLists.txt.in
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,12 @@ add_custom_command(
$<TARGET_FILE_DIR:duckdb_java> $<TARGET_FILE_NAME:duckdb_java>)

add_custom_target(jdbc ALL DEPENDS dummy_jdbc_target)

# test resources

configure_file(
src/test/external/spark-session-init.sql
spark-session-init.sql)
configure_file(
src/test/external/spark-test.sql
spark-test.sql)
37 changes: 37 additions & 0 deletions src/test/external/RunSpark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import static java.lang.ProcessBuilder.Redirect.INHERIT;
import static java.nio.charset.StandardCharsets.UTF_8;

public class RunSpark {

static final String DUCKDB_JDBC_JAR = fromEnv("DUCKDB_JDBC_JAR", "./build/release/duckdb_jdbc.jar");
static final String SPARK_SQL_EXE = fromEnv("SPARK_SQL_EXE", "../spark/spark-3.5.5-bin-hadoop3/bin/spark-sql");

public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new RuntimeException("Path to Spark SQL script must be specified as a first and only argument");
}
Process ps = new ProcessBuilder(SPARK_SQL_EXE, "--driver-class-path", DUCKDB_JDBC_JAR, "-f", args[0])
.redirectInput(INHERIT)
.redirectError(INHERIT)
.start();
String output = new String(ps.getInputStream().readAllBytes(), UTF_8);
System.out.print(output);
int status = ps.waitFor();
String[] lines = output.split("\n");
if (lines.length < 2 || !"7433139".equals(lines[0]) || !"1.429378704487457E8".equals(lines[1])) {
throw new RuntimeException("Spark SQL test output check failed");
}
if (status == 0) {
System.out.println("Success");
}
System.exit(status);
}

static String fromEnv(String envVarName, String defaultValue) {
String env = System.getenv(envVarName);
if (null != env) {
return env;
}
return defaultValue;
}
}
66 changes: 66 additions & 0 deletions src/test/external/SetupDuckLake.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;

class SetupDuckLake {

static final String POSTGRES_HOST = fromEnv("POSTGRES_HOST", "127.0.0.1");
static final String POSTGRES_PORT = fromEnv("POSTGRES_PORT", "5432");
static final String POSTGRES_MAINTENANCE_DB = fromEnv("POSTGRES_MAINTENANCE_DB", "postgres");
static final String POSTGRES_USERNAME = fromEnv("POSTGRES_USERNAME", "postgres");
static final String POSTGRES_PASSWORD = fromEnv("POSTGRES_PASSWORD", "postgres");
static final String POSTGRES_URL = fromEnv("POSTGRES_URL", "jdbc:postgresql://" + POSTGRES_HOST + ":" +
POSTGRES_PORT + "/" + POSTGRES_MAINTENANCE_DB);
static final String DUCKLAKE_CATALOG_DB = fromEnv("DUCKLAKE_CATALOG_DB_NAME", "lake_test");
static final String DUCKLAKE_URL =
fromEnv("DUCKLAKE_URL", "ducklake:postgres:postgresql://" + POSTGRES_USERNAME + ":" + POSTGRES_PASSWORD + "@" +
POSTGRES_HOST + ":" + POSTGRES_PORT + "/" + DUCKLAKE_CATALOG_DB);
static final String PARQUET_FILE_URL =
fromEnv("DUCKLAKE_DATA_PATH", "https://blobs.duckdb.org/data/taxi_2019_04.parquet");
static final String SESSION_INIT_SQL_FILE =
fromEnv("SESSION_INIT_SQL_FILE", "./src/test/external/spark-session-init.sql");

public static void main(String[] args) throws Exception {
setupPostgres();
setupDuckLake();
System.out.println("Success");
}

static void setupPostgres() throws Exception {
System.out.println("Creating Postgres database ...");
try (Connection conn = DriverManager.getConnection(POSTGRES_URL, POSTGRES_USERNAME, POSTGRES_PASSWORD);
Statement stmt = conn.createStatement()) {
stmt.execute("DROP DATABASE IF EXISTS " + DUCKLAKE_CATALOG_DB);
stmt.execute("CREATE DATABASE " + DUCKLAKE_CATALOG_DB);
}
}

static void setupDuckLake() throws Exception {
System.out.println("Creating DuckLake instance ...");
try (Connection conn =
DriverManager.getConnection("jdbc:duckdb:;session_init_sql_file=" + SESSION_INIT_SQL_FILE + ";");
Statement stmt = conn.createStatement()) {
stmt.execute("ATTACH '" + DUCKLAKE_URL + "' AS lake (DATA_PATH 's3://bucket1')");
stmt.execute("USE lake");
System.out.println("Loading data from URL: '" + PARQUET_FILE_URL + "' ...");
stmt.execute("CREATE TABLE tab1 AS FROM '" + PARQUET_FILE_URL + "'");
}
try (Connection conn = DriverManager.getConnection("jdbc:duckdb:" + DUCKLAKE_URL +
";session_init_sql_file=" + SESSION_INIT_SQL_FILE + ";");
Statement stmt = conn.createStatement()) {
try (ResultSet rs = stmt.executeQuery("SELECT count(*) FROM tab1")) {
rs.next();
System.out.println("Records loaded: " + rs.getLong(1));
}
}
}

static String fromEnv(String envVarName, String defaultValue) {
String env = System.getenv(envVarName);
if (null != env) {
return env;
}
return defaultValue;
}
}
98 changes: 98 additions & 0 deletions src/test/external/SetupMinio.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import static java.lang.Integer.parseInt;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.file.Files.readString;

import java.io.IOException;
import java.net.Socket;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;

public class SetupMinio {

static final String MINIO_EXE_PATH = fromEnv("MINIO_EXE", "../minio/minio");
static final String PID_PATH = fromEnv("MINIO_PID", "../minio/minio.pid");
static final String MC_EXE_PATH = fromEnv("MC_EXE", "../minio/mc");
static final String DATA_PATH = fromEnv("MINIO_DATA", "./build/data1");
static final String MINIO_HOST = fromEnv("MINIO_HOST", "127.0.0.1");
static final String MINIO_PORT = fromEnv("MINIO_PORT", "9000");

public static void main(String[] args) throws Exception {
killMinioServer();
deleteMinioData();
setupMinio();
}

static void killMinioServer() throws Exception {
Path pidPath = Paths.get(PID_PATH);
if (!Files.exists(pidPath)) {
return;
}
long pid = Long.parseLong(readString(pidPath, UTF_8));
System.out.println("Killing Minio server process, pid: " + pid + " ...");
new ProcessBuilder("/usr/bin/kill", String.valueOf(pid)).inheritIO().start().waitFor();
Files.delete(pidPath);
}

static void deleteMinioData() throws Exception {
Path minioDataPath = Paths.get(DATA_PATH);
if (!Files.exists(minioDataPath)) {
return;
}
System.out.println("Deleting Minio data: " + minioDataPath + " ...");
Files.walk(minioDataPath).sorted(Comparator.reverseOrder()).forEach(p -> {
try {
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException("Failed to delete " + p, e);
}
});
}

static void setupMinio() throws Exception {
System.out.println("Starting Minio server ...");
Process minioServerProcess =
new ProcessBuilder(MINIO_EXE_PATH, "server", "--address", MINIO_HOST + ":" + MINIO_PORT, DATA_PATH)
.inheritIO()
.start();
Files.write(Paths.get(PID_PATH), String.valueOf(minioServerProcess.pid()).getBytes(UTF_8));
boolean minioServerStarted = false;
for (int i = 0; i < 16; i++) {
try (Socket sock = new Socket(MINIO_HOST, parseInt(MINIO_PORT))) {
minioServerStarted = true;
break;
} catch (IOException e) {
Thread.sleep(1000);
}
}
if (!minioServerStarted) {
throw new RuntimeException("Cannot start Minio");
}
Thread.sleep(2000); // improve log output
System.out.println("Minio server started, pid: " + minioServerProcess.pid() + ", creating bucket ...");
int mcAliasStatus = new ProcessBuilder(MC_EXE_PATH, "alias", "set", "local",
"http://" + MINIO_HOST + ":" + MINIO_PORT, "minioadmin", "minioadmin")
.inheritIO()
.start()
.waitFor();
if (mcAliasStatus != 0) {
killMinioServer();
throw new RuntimeException("Minio mc alias set error, status: " + mcAliasStatus);
}
int mcMbStatus = new ProcessBuilder(MC_EXE_PATH, "mb", "local/bucket1").inheritIO().start().waitFor();
if (mcMbStatus != 0) {
killMinioServer();
throw new RuntimeException("Minio mc mb error, status: " + mcAliasStatus);
}
System.out.println("Minio server set up successfully");
}

static String fromEnv(String envVarName, String defaultValue) {
String env = System.getenv(envVarName);
if (null != env) {
return env;
}
return defaultValue;
}
}
9 changes: 9 additions & 0 deletions src/test/external/spark-session-init.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CREATE OR REPLACE TEMPORARY SECRET secret (
TYPE s3,
ENDPOINT '$ENV{MINIO_HOST}:$ENV{MINIO_PORT}',
PROVIDER config,
KEY_ID 'minioadmin',
SECRET 'minioadmin',
URL_STYLE 'path',
USE_SSL false
)
13 changes: 13 additions & 0 deletions src/test/external/spark-test.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
CREATE OR REPLACE TEMPORARY VIEW tab1 USING jdbc OPTIONS (
url "jdbc:duckdb:ducklake:postgres:postgresql://$ENV{POSTGRES_USERNAME}:$ENV{POSTGRES_PASSWORD}@$ENV{POSTGRES_HOST}:$ENV{POSTGRES_PORT}/$ENV{DUCKLAKE_CATALOG_DB};session_init_sql_file=$ENV{SESSION_INIT_SQL_FILE};",
dbtable "tab1",

partitionColumn "pickup_at",
lowerBound "2008-08-08 09:13:28",
upperBound "2033-04-27 13:08:32",
numPartitions "7"
);

SELECT COUNT(*) FROM tab1;
SELECT SUM(total_amount) FROM tab1;
SELECT * FROM tab1 ORDER BY pickup_at LIMIT 4;
Loading