Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
b0ddd47
Refactor CI workflow to use external test matrix file
nathanwasson Sep 26, 2025
ec298db
Add Dockerfile to set up environment with cloudflared and r2-downloader
nathanwasson Sep 26, 2025
cb60e9d
Add GitHub Actions workflow for building and testing Docker image
nathanwasson Sep 26, 2025
8585a06
Add container image section to README.md
nathanwasson Sep 26, 2025
e55f1c4
Update GitHub Actions workflow to include testing environment and imp…
nathanwasson Sep 26, 2025
2783226
Enhance Dockerfile to support architecture-specific cloudflared insta…
nathanwasson Sep 26, 2025
dc40e73
Add QEMU setup step in GitHub Actions workflow for multi-architecture…
nathanwasson Sep 26, 2025
6ee7d46
Set default architecture to amd64 in Dockerfile for local builds
nathanwasson Sep 26, 2025
531b4b6
Specify arm64 in setup-qemu step
nathanwasson Sep 26, 2025
ab71d90
Add verbose output to wget command for cloudflared installation for d…
nathanwasson Sep 26, 2025
94057c2
Remove pipefail from Dockerfile
nathanwasson Sep 26, 2025
305b564
Remove debugging verbose flag for cloudflared wget
nathanwasson Sep 26, 2025
2545e43
Add concurrency control to GitHub Actions workflows to prevent simult…
nathanwasson Sep 26, 2025
d62b7ec
Twewak README.md headings
nathanwasson Sep 26, 2025
864c4bb
Add arm64 testing in GitHub Actions workflow.
nathanwasson Sep 26, 2025
605f143
Update IMAGE_REF in GitHub Actions workflow to use version tag instea…
nathanwasson Sep 26, 2025
e24d4c5
Update GitHub Actions workflows to change environments
nathanwasson Sep 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 217 additions & 0 deletions .github/workflows/build-test-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
name: Build, Push, and Test Container Image

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
types: [ opened, synchronize, reopened ]
workflow_dispatch:
schedule:
- cron: '44 4 4 * *'

# Prevent simultaneous runs from the same PR
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
build-push-test:
runs-on: ubuntu-latest
environment: ghcr
permissions:
contents: read
packages: write

steps:
- name: Checkout repository
uses: actions/checkout@v5

- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
platforms: arm64

- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=pr
type=raw,value=latest,enable={{is_default_branch}}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=min

- name: Load secrets
id: op-load-secrets
uses: 1password/load-secrets-action@v3
env:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
CF_ACCESS_CLIENT_ID: "op://x4h33jlflygxmifnxfrizew4oa/7g2fhcv3tpsogr2vhkielewvsm/wsyxxjrdsyj36zsaanbh3kybfe"
CF_ACCESS_CLIENT_SECRET: "op://x4h33jlflygxmifnxfrizew4oa/7g2fhcv3tpsogr2vhkielewvsm/5x3sr7bry5a3ealsemvx5vcbuy"

- name: Run test matrix in container (linux/amd64)
env:
IMAGE_REF: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }}
CF_ACCESS_CLIENT_ID: ${{ steps.op-load-secrets.outputs.CF_ACCESS_CLIENT_ID }}
CF_ACCESS_CLIENT_SECRET: ${{ steps.op-load-secrets.outputs.CF_ACCESS_CLIENT_SECRET }}
run: |
set +e
total_tests=0
passed_tests=0
failed_tests=0

while IFS= read -r test_case; do
total_tests=$((total_tests + 1))

test_name=$(echo "$test_case" | jq -r '.name')
test_desc=$(echo "$test_case" | jq -r '.description')
test_args=$(echo "$test_case" | jq -r '.args')
test_url=$(echo "$test_case" | jq -r '.url')
expected_exit=$(echo "$test_case" | jq -r '.expect_exit')
skip_url=$(echo "$test_case" | jq -r '.skip_url')
skip_creds=$(echo "$test_case" | jq -r '.skip_creds // false')

echo ""
echo "--- Test: $test_name ---"
echo "Description: $test_desc"
echo "Expected exit: $expected_exit"
echo "Platform: linux/amd64"

docker_cmd=(docker run --rm --pull always --platform linux/amd64)

if [ "$skip_creds" != "true" ]; then
docker_cmd+=(
-e "CF_ACCESS_CLIENT_ID=$CF_ACCESS_CLIENT_ID"
-e "CF_ACCESS_CLIENT_SECRET=$CF_ACCESS_CLIENT_SECRET"
)
fi

docker_cmd+=("$IMAGE_REF" bash -lc)

if [ "$skip_url" = "true" ]; then
test_command="r2-downloader $test_args"
else
test_command="r2-downloader $test_args $test_url"
fi

echo "Running: docker run --rm ... bash -lc \"$test_command\""

"${docker_cmd[@]}" "$test_command"
actual_exit=$?

if [ $actual_exit -eq $expected_exit ]; then
echo "✅ [PASS] $test_name"
passed_tests=$((passed_tests + 1))
else
echo "❌ [FAIL] $test_name - expected $expected_exit, got $actual_exit"
failed_tests=$((failed_tests + 1))
fi

done < <(jq -c '.include[]' .github/workflows/test-matrix.json)

echo ""
echo "=== Container Test Summary ==="
echo "Total: $total_tests"
echo "Passed: $passed_tests"
echo "Failed: $failed_tests"

if [ $failed_tests -gt 0 ]; then
exit 1
fi

- name: Run test matrix in container (linux/arm64)
env:
IMAGE_REF: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }}
CF_ACCESS_CLIENT_ID: ${{ steps.op-load-secrets.outputs.CF_ACCESS_CLIENT_ID }}
CF_ACCESS_CLIENT_SECRET: ${{ steps.op-load-secrets.outputs.CF_ACCESS_CLIENT_SECRET }}
run: |
set +e
total_tests=0
passed_tests=0
failed_tests=0

while IFS= read -r test_case; do
total_tests=$((total_tests + 1))

test_name=$(echo "$test_case" | jq -r '.name')
test_desc=$(echo "$test_case" | jq -r '.description')
test_args=$(echo "$test_case" | jq -r '.args')
test_url=$(echo "$test_case" | jq -r '.url')
expected_exit=$(echo "$test_case" | jq -r '.expect_exit')
skip_url=$(echo "$test_case" | jq -r '.skip_url')
skip_creds=$(echo "$test_case" | jq -r '.skip_creds // false')

echo ""
echo "--- Test: $test_name ---"
echo "Description: $test_desc"
echo "Expected exit: $expected_exit"
echo "Platform: linux/arm64"

docker_cmd=(docker run --rm --pull always --platform linux/arm64)

if [ "$skip_creds" != "true" ]; then
docker_cmd+=(
-e "CF_ACCESS_CLIENT_ID=$CF_ACCESS_CLIENT_ID"
-e "CF_ACCESS_CLIENT_SECRET=$CF_ACCESS_CLIENT_SECRET"
)
fi

docker_cmd+=("$IMAGE_REF" bash -lc)

if [ "$skip_url" = "true" ]; then
test_command="r2-downloader $test_args"
else
test_command="r2-downloader $test_args $test_url"
fi

echo "Running: docker run --rm ... bash -lc \"$test_command\""

"${docker_cmd[@]}" "$test_command"
actual_exit=$?

if [ $actual_exit -eq $expected_exit ]; then
echo "✅ [PASS] $test_name"
passed_tests=$((passed_tests + 1))
else
echo "❌ [FAIL] $test_name - expected $expected_exit, got $actual_exit"
failed_tests=$((failed_tests + 1))
fi

done < <(jq -c '.include[]' .github/workflows/test-matrix.json)

echo ""
echo "=== Container Test Summary (arm64) ==="
echo "Total: $total_tests"
echo "Passed: $passed_tests"
echo "Failed: $failed_tests"

if [ $failed_tests -gt 0 ]; then
exit 1
fi
106 changes: 5 additions & 101 deletions .github/workflows/test-downloader.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:
- 'cygwin-wget-installer.bat'
- '.github/workflows/test-downloader.yml'

# Prevent multiple runs from the same PR
# Prevent simultaneous runs from the same PR
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
Expand All @@ -22,113 +22,19 @@ jobs:
outputs:
test-matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Checkout repository
uses: actions/checkout@v5

- id: set-matrix
run: |
echo "matrix<<EOF" >> $GITHUB_OUTPUT
cat << 'MATRIX_EOF' >> $GITHUB_OUTPUT
{
"include": [
{
"name": "help_flag",
"description": "Help flag should work without URL",
"args": "-h",
"url": "",
"expect_exit": 0,
"needs_auth": false,
"skip_url": true
},
{
"name": "debug_mode",
"description": "Debug mode with service account",
"args": "-xs",
"url": "https://auth-test.mlcommons-storage.org/metadata/dataset.uri",
"expect_exit": 0,
"needs_auth": true,
"skip_url": false
},
{
"name": "missing_url_service_account",
"description": "Service account flag without URL should fail",
"args": "-s",
"url": "",
"expect_exit": 1,
"needs_auth": false,
"skip_url": true
},
{
"name": "missing_url_multiple_flags",
"description": "Multiple flags without URL should fail",
"args": "-s -t",
"url": "",
"expect_exit": 1,
"needs_auth": false,
"skip_url": true
},
{
"name": "missing_download_dir_arg",
"description": "Download directory flag without directory argument should fail",
"args": "-d",
"url": "",
"expect_exit": 1,
"needs_auth": false,
"skip_url": true
},
{
"name": "service_account_auth",
"description": "Service account with testing mode (auth required)",
"args": "-st -d test/dataset",
"url": "https://auth-test.mlcommons-storage.org/metadata/dataset.uri",
"expect_exit": 0,
"needs_auth": true,
"skip_url": false
},
{
"name": "no_auth_required",
"description": "No authentication required dataset",
"args": "-st -d test/dataset",
"url": "https://no-auth-test.mlcommons-storage.org/metadata/dataset.uri",
"expect_exit": 0,
"needs_auth": false,
"skip_url": false
},
{
"name": "single_file_dataset",
"description": "Single file dataset download",
"args": "",
"url": "https://no-auth-test.mlcommons-storage.org/metadata/single-file-dataset.uri",
"expect_exit": 0,
"needs_auth": false,
"skip_url": false
},
{
"name": "invalid_url",
"description": "Invalid URL format should fail",
"args": "-st",
"url": "not-a-valid-url",
"expect_exit": 1,
"needs_auth": false,
"skip_url": false
},
{
"name": "missing_credentials",
"description": "Service account without credentials should fail",
"args": "-s",
"url": "https://auth-test.mlcommons-storage.org/metadata/dataset.uri",
"expect_exit": 1,
"needs_auth": false,
"skip_url": false,
"skip_creds": true
}
]
}
MATRIX_EOF
cat .github/workflows/test-matrix.json >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT

# Unix platforms (Linux and macOS)
test-unix:
needs: define-tests
runs-on: ${{ matrix.os }}
environment: testing
timeout-minutes: 10
strategy:
fail-fast: false
Expand Down Expand Up @@ -242,7 +148,6 @@ jobs:
test-windows-wsl:
needs: define-tests
runs-on: windows-latest
environment: testing
timeout-minutes: 10

steps:
Expand Down Expand Up @@ -352,7 +257,6 @@ jobs:
test-windows-cygwin:
needs: define-tests
runs-on: windows-latest
environment: testing
timeout-minutes: 10

steps:
Expand Down
Loading
Loading