Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ jobs:
cat /etc/*-release
DEBIAN_FRONTEND=noninteractive sudo apt-get update && sudo apt-get install -yq tzdata curl lsb-core lsb-release # > /dev/null
. ./scripts/cloud_build_test_ubuntu.sh "3.8.12"
gsutil cp clouddq_patched.zip gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq-executable.zip
gsutil cp clouddq_patched.zip.hashsum gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq-executable.zip.hashsum
gsutil cp clouddq/integration/clouddq_pyspark_driver.py gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq_pyspark_driver.py
gcloud storage cp clouddq_patched.zip gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq-executable.zip
gcloud storage cp clouddq_patched.zip.hashsum gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq-executable.zip.hashsum
gcloud storage cp clouddq/integration/clouddq_pyspark_driver.py gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq_pyspark_driver.py
shell: bash
14 changes: 7 additions & 7 deletions cloudbuild-release-debian11.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,29 +41,29 @@ steps:

source scripts/install_gcloud.sh

gsutil ls gs://$_GCS_RELEASE_BUCKET
gcloud storage ls gs://$_GCS_RELEASE_BUCKET

gsutil cp clouddq_patched.zip
gcloud storage cp clouddq_patched.zip
gs://${_GCS_RELEASE_BUCKET}/build-artifacts/debian11/python3.9/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip

gsutil cp clouddq_patched.zip.hashsum
gcloud storage cp clouddq_patched.zip.hashsum
gs://${_GCS_RELEASE_BUCKET}/build-artifacts/debian11/python3.9/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip.hashsum

gsutil cp clouddq/integration/clouddq_pyspark_driver.py
gcloud storage cp clouddq/integration/clouddq_pyspark_driver.py
gs://${_GCS_RELEASE_BUCKET}/build-artifacts/debian11/python3.9/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq_pyspark_driver.py

gsutil cp clouddq_patched.zip
gcloud storage cp clouddq_patched.zip
gs://${_GCS_BUCKET_NAME}/build-artifacts/debian11/python3.9/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip

gsutil cp clouddq_patched.zip.hashsum
gcloud storage cp clouddq_patched.zip.hashsum
gs://${_GCS_BUCKET_NAME}/build-artifacts/debian11/python3.9/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip.hashsum

gsutil cp clouddq/integration/clouddq_pyspark_driver.py
gcloud storage cp clouddq/integration/clouddq_pyspark_driver.py
gs://${_GCS_BUCKET_NAME}/build-artifacts/debian11/python3.9/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq_pyspark_driver.py
entrypoint: /bin/bash
Expand Down
14 changes: 7 additions & 7 deletions cloudbuild-release-ubuntu18.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,29 +37,29 @@ steps:

source scripts/install_gcloud.sh

gsutil ls gs://$_GCS_RELEASE_BUCKET
gcloud storage ls gs://$_GCS_RELEASE_BUCKET

gsutil cp clouddq_patched.zip
gcloud storage cp clouddq_patched.zip
gs://${_GCS_RELEASE_BUCKET}/build-artifacts/ubuntu18.04/python3.8/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip

gsutil cp clouddq_patched.zip.hashsum
gcloud storage cp clouddq_patched.zip.hashsum
gs://${_GCS_RELEASE_BUCKET}/build-artifacts/ubuntu18.04/python3.8/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip.hashsum

gsutil cp clouddq/integration/clouddq_pyspark_driver.py
gcloud storage cp clouddq/integration/clouddq_pyspark_driver.py
gs://${_GCS_RELEASE_BUCKET}/build-artifacts/ubuntu18.04/python3.8/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq_pyspark_driver.py

gsutil cp clouddq_patched.zip
gcloud storage cp clouddq_patched.zip
gs://${_GCS_BUCKET_NAME}/build-artifacts/ubuntu18.04/python3.8/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip

gsutil cp clouddq_patched.zip.hashsum
gcloud storage cp clouddq_patched.zip.hashsum
gs://${_GCS_BUCKET_NAME}/build-artifacts/ubuntu18.04/python3.8/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip.hashsum

gsutil cp clouddq/integration/clouddq_pyspark_driver.py
gcloud storage cp clouddq/integration/clouddq_pyspark_driver.py
gs://${_GCS_BUCKET_NAME}/build-artifacts/ubuntu18.04/python3.8/main/`date
-I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq_pyspark_driver.py
entrypoint: /bin/bash
Expand Down
4 changes: 2 additions & 2 deletions docs/clouddq-as-dataproc-workflow-composer-dag.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ sed -i s/\<your_bigquery_dataset_id\>/${CLOUDDQ_BIGQUERY_DATASET}/g ./configs/en
This is the bucket where the Python executables and the configuration files will be pulled from for the Dataproc PySpark job.

```bash
gsutil mb -p ${PROJECT_ID} -l ${REGION} -b on gs://${GCS_BUCKET_NAME}
gcloud storage buckets create gs://${GCS_BUCKET_NAME} --project=${PROJECT_ID} --location=${REGION} --uniform-bucket-level-access
```

Ensure you have sufficient IAM privileges to create Cloud Storage Buckets in your project.
Expand Down Expand Up @@ -182,7 +182,7 @@ sed -i s/\<template_id\>/${DATAPROC_WORKFLOW_NAME}/g ${DAG_PY_FILE}
export DAG_BUCKET=$(gcloud composer environments describe --format="value(config.dagGcsPrefix)" \
--project ${PROJECT_ID} --location ${REGION} ${COMPOSER_ENVIRONMENT_NAME})

gsutil cp ${DAG_PY_FILE} ${DAG_BUCKET}
gcloud storage cp ${DAG_PY_FILE} ${DAG_BUCKET}
```

## 8. Check Airflow job status
Expand Down
10 changes: 5 additions & 5 deletions scripts/dataproc-workflow-composer/upload_clouddq_to_gcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,20 @@ TARGET_PYTHON_INTERPRETER="${TARGET_PYTHON_INTERPRETER}" || err "Environment var

function zip_configs_directory_and_upload_to_gcs() {
zip -r clouddq-configs.zip ./configs
gsutil mv clouddq-configs.zip gs://"${GCS_BUCKET_NAME}"/clouddq-configs.zip
gsutil ls gs://"${GCS_BUCKET_NAME}"/clouddq_pyspark_driver.py || gsutil cp ./clouddq/integration/clouddq_pyspark_driver.py gs://"${GCS_BUCKET_NAME}"
gcloud storage mv clouddq-configs.zip gs://"${GCS_BUCKET_NAME}"/clouddq-configs.zip
gcloud storage ls gs://"${GCS_BUCKET_NAME}"/clouddq_pyspark_driver.py || gcloud storage cp ./clouddq/integration/clouddq_pyspark_driver.py gs://"${GCS_BUCKET_NAME}"
}

function upload_clouddq_zip_executable_to_gcs() {
wget -O clouddq_executable.zip https://github.com/GoogleCloudPlatform/cloud-data-quality/releases/download/v"${CLOUDDQ_RELEASE_VERSION}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}"_"${TARGET_OS}"_python"${TARGET_PYTHON_INTERPRETER}".zip
wget -O clouddq_executable.zip.hashsum https://github.com/GoogleCloudPlatform/cloud-data-quality/releases/download/v"${CLOUDDQ_RELEASE_VERSION}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}"_"${TARGET_OS}"_python"${TARGET_PYTHON_INTERPRETER}".zip.sha256sum
gsutil cp clouddq_executable.zip gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip
gsutil cp clouddq_executable.zip.hashsum gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip.hashsum
gcloud storage cp clouddq_executable.zip gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip
gcloud storage cp clouddq_executable.zip.hashsum gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip.hashsum
}

function main() {
zip_configs_directory_and_upload_to_gcs
gsutil ls gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip.hashsum || upload_clouddq_zip_executable_to_gcs
gcloud storage ls gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip.hashsum || upload_clouddq_zip_executable_to_gcs
}

main "$@"
Loading