Skip to content
Draft
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
cf9653f
inital commit
candicehou07 May 11, 2022
fbca216
create sample dataset in BigQeury
candicehou07 May 11, 2022
313418f
add cloudbuild.yaml
candicehou07 May 19, 2022
c1ccc83
table json schema
candicehou07 May 22, 2022
1a69c02
Delete bq_schema.json
candicehou07 Jun 6, 2022
feb176f
Delete BatchControlLog.json
candicehou07 Jun 6, 2022
7c42756
Merge branch 'GoogleCloudPlatform:master' into terraform_sync_tool
candicehou07 Jun 8, 2022
d94c55b
terraform sync tool
candicehou07 Jun 8, 2022
345ab63
change dir
candicehou07 Jun 8, 2022
3cfcbc2
specify dir from yaml file
candicehou07 Jun 8, 2022
8ae5b48
add step1 to run python scripts
candicehou07 Jun 8, 2022
4d5ac73
Delete auto generated contents
candicehou07 Jun 8, 2022
7c8e1d1
Update tools/terraform_sync_tool/deploy.sh output json file
candicehou07 Jun 9, 2022
8b690c3
Update tools/terraform_sync_tool/qa/terragrunt.hcl
candicehou07 Jun 9, 2022
c2fc146
Delete state.json
candicehou07 Jun 9, 2022
dd75e1f
Update schema JSON
candicehou07 Jun 9, 2022
2a354f0
Rename module to bq-setup
candicehou07 Jun 9, 2022
376b7fd
Rename JSON output file
candicehou07 Jun 9, 2022
7a33b29
Refractor python scripts
candicehou07 Jun 13, 2022
9a5fe09
Move Converting table names -> table ids in Main()
candicehou07 Jun 13, 2022
5c36be7
Accept user-provided arguments
candicehou07 Jun 13, 2022
d330bb9
Update yaml file
candicehou07 Jun 13, 2022
252dcab
Clean up yaml file
candicehou07 Jun 13, 2022
abcf98f
Update identifiers and refractor terraform_sync.py
candicehou07 Jun 13, 2022
2422a88
Add comments to get_drifted_tables()
candicehou07 Jun 13, 2022
3fca695
Add developer's TODOs: Update project ID and dataset ID
candicehou07 Jun 13, 2022
485bde1
Add README file
candicehou07 Jun 14, 2022
52467af
Update README: add folder structure section
candicehou07 Jun 14, 2022
9211697
Update README: add Cloud Build setup instruction
candicehou07 Jun 14, 2022
5dabf46
Add Comments to terraform_sync.py
candicehou07 Jun 15, 2022
e85bd21
Rename module to bigquery & provide default datasetID
candicehou07 Jun 26, 2022
073e192
Rename prefix path
candicehou07 Jun 26, 2022
f699fdd
Use provided bucket name
candicehou07 Jun 26, 2022
c89fee7
Clean out variables unused
candicehou07 Jun 26, 2022
1e65ae7
Test with mutiple tables and update python scripts to handle muti tables
candicehou07 Jun 26, 2022
ec2d139
Update python scripts - Add check resource_condensed exists condition
candicehou07 Jun 26, 2022
90cb7da
Initiate BigQuery client in main()
candicehou07 Jun 28, 2022
23f1701
Use regex to convert id_value to fully-qualified table_id
candicehou07 Jul 1, 2022
993bffa
Update Prerequisite in README
candicehou07 Jul 11, 2022
d31e25f
Update README
candicehou07 Jul 11, 2022
1e7a6fc
Derive project ID from default credentials
candicehou07 Jul 11, 2022
67505be
Update user-provided arguments description
candicehou07 Jul 11, 2022
69a2df3
Update terraform_sync.py
candicehou07 Jul 11, 2022
9cd0da7
Allow users to provide project_id to ArgumentParser
candicehou07 Jul 12, 2022
1c4fc9f
Formatting: add new line at end of files
candicehou07 Jul 18, 2022
d2e5e0e
Formatting schema json files
candicehou07 Jul 18, 2022
815bee4
Update README: provide more details on setup
candicehou07 Jul 19, 2022
2bf25ea
Update README
candicehou07 Jul 19, 2022
4e1bb73
Update cloudbuild.yaml: provide project_id
candicehou07 Jul 19, 2022
2b20ed5
Reorder and update README
candicehou07 Jul 25, 2022
7f469e4
Update README Introduction
candicehou07 Aug 30, 2022
5496102
Update README: update structure
candicehou07 Aug 30, 2022
34f1461
Update README: fix
candicehou07 Aug 30, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions tools/terraform_sync_tool/cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
steps:
# step 0: run terraform commands in deploy.sh to detects drifts
- name: 'alpine/terragrunt'
entrypoint: 'bash'
dir: './tools/terraform_sync_tool/'
args: ['deploy.sh', 'qa', 'terraform-sync-tool']

# step 1: run python scripts to investigate terraform output
- name: python:3.7
entrypoint: 'bash'
dir: './tools/terraform_sync_tool/'
args:
- -c
- 'pip install -r ./requirements.txt && python terraform_sync.py'
7 changes: 7 additions & 0 deletions tools/terraform_sync_tool/deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

env=$1
tool=$2

terragrunt run-all plan -json --terragrunt-non-interactive --terragrunt-working-dir="${env}"/"${tool}" > state.json
# terragrunt run-all plan -json --terragrunt-non-interactive --terragrunt-working-dir=/qa/terraform-sync-tool > state.json
96 changes: 96 additions & 0 deletions tools/terraform_sync_tool/modules/terraform-sync-tool/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
locals {
datasets = { for dataset in var.datasets : dataset["dataset_id"] => dataset }
tables = { for table in var.tables : table["table_id"] => table }
views = { for view in var.views : view["view_id"] => view }

iam_to_primitive = {
"roles/bigquery.dataOwner" : "OWNER"
"roles/bigquery.dataEditor" : "WRITER"
"roles/bigquery.dataViewer" : "READER"
}
}

#this is the test for dataset list creation
resource "google_bigquery_dataset" "bq_dataset" {
for_each = local.datasets
friendly_name = each.value["friendly_name"]
dataset_id = each.key
location = each.value["location"]
project = var.project_id

dynamic "default_encryption_configuration" {
for_each = var.encryption_key == null ? [] : [var.encryption_key]
content {
kms_key_name = var.encryption_key
}
}

dynamic "access" {
for_each = var.access

content {
# BigQuery API converts IAM to primitive roles in its backend.
# This causes Terraform to show a diff on every plan that uses IAM equivalent roles.
# Thus, do the conversion between IAM to primitive role here to prevent the diff.
role = lookup(local.iam_to_primitive, access.value.role, access.value.role)

domain = lookup(access.value, "domain", null)
group_by_email = lookup(access.value, "group_by_email", null)
user_by_email = lookup(access.value, "user_by_email", null)
special_group = lookup(access.value, "special_group", null)
}
}
}

resource "google_bigquery_table" "bq_table" {
for_each = local.tables
dataset_id = each.value["dataset_id"]
friendly_name = each.key
table_id = each.key
labels = each.value["labels"]
schema = file(each.value["schema"])
clustering = each.value["clustering"]
expiration_time = each.value["expiration_time"]
project = var.project_id
deletion_protection = each.value["deletion_protection"]
depends_on = [google_bigquery_dataset.bq_dataset]

dynamic "time_partitioning" {
for_each = each.value["time_partitioning"] != null ? [each.value["time_partitioning"]] : []
content {
type = time_partitioning.value["type"]
expiration_ms = time_partitioning.value["expiration_ms"]
field = time_partitioning.value["field"]
require_partition_filter = time_partitioning.value["require_partition_filter"]
}
}

dynamic "range_partitioning" {
for_each = each.value["range_partitioning"] != null ? [each.value["range_partitioning"]] : []
content {
field = range_partitioning.value["field"]
range {
start = range_partitioning.value["range"].start
end = range_partitioning.value["range"].end
interval = range_partitioning.value["range"].interval
}
}
}

}

resource "google_bigquery_table" "bq_view" {
for_each = local.views
dataset_id = each.value["dataset_id"]
friendly_name = each.key
table_id = each.key
labels = each.value["labels"]
project = var.project_id
deletion_protection = each.value["deletion_protection"]
depends_on = [google_bigquery_table.bq_table]

view {
query = each.value["query"]
use_legacy_sql = each.value["use_legacy_sql"]
}
}
113 changes: 113 additions & 0 deletions tools/terraform_sync_tool/modules/terraform-sync-tool/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
variable "description" {
description = "Dataset description."
type = string
default = null
}

variable "location" {
description = "The regional location for the dataset only US and EU are allowed in module"
type = string
default = "US"
}

variable "delete_contents_on_destroy" {
description = "(Optional) If set to true, delete all the tables in the dataset when destroying the resource; otherwise, destroying the resource will fail if tables are present."
type = bool
default = null
}

variable "deletion_protection" {
description = "Whether or not to allow Terraform to destroy the instance. Unless this field is set to false in Terraform state, a terraform destroy or terraform apply that would delete the instance will fail."
type = bool
default = true
}

variable "default_table_expiration_ms" {
description = "TTL of tables using the dataset in MS"
type = number
default = null
}

variable "project_id" {
description = "Project where the dataset and table are created"
type = string
}

variable "encryption_key" {
description = "Default encryption key to apply to the dataset. Defaults to null (Google-managed)."
type = string
default = null
}

variable "dataset_labels" {
description = "Key value pairs in a map for dataset labels"
type = map(string)
default = {}
}

# Format: list(objects)
# domain: A domain to grant access to.
# group_by_email: An email address of a Google Group to grant access to.
# user_by_email: An email address of a user to grant access to.
# special_group: A special group to grant access to.

variable "access" {
description = "An array of objects that define dataset access for one or more entities."
type = any

# At least one owner access is required.
default = [{
role = "roles/bigquery.dataOwner"
special_group = "projectOwners"
}]
}
variable "datasets" {
description = "this is a test DS"
default = []
type = list(object({
dataset_id = string
friendly_name = string
location = string
}
))
}
variable "tables" {
description = "A list of objects which include table_id, schema, clustering, time_partitioning, expiration_time and labels."
default = []
type = list(object({
table_id = string,
dataset_id = string, #added to test creating multi dataset
schema = string,
clustering = list(string),
deletion_protection=bool,
time_partitioning = object({
expiration_ms = string,
field = string,
type = string,
require_partition_filter = bool,
}),
range_partitioning = object({
field = string,
range = object({
start = string,
end = string,
interval = string,
}),
}),
expiration_time = string,
labels = map(string),
}
))
}
variable "views" {
description = "A list of objects which include table_id, which is view id, and view query"
default = []
type = list(object({
view_id = string,
dataset_id = string,
query = string,
deletion_protection=bool,
use_legacy_sql = bool,
labels = map(string),
}))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
[
{
"description": "Col1",
"mode": "NULLABLE",
"name": "Col1",
"type": "STRING"
},
{
"description": "Col2",
"mode": "NULLABLE",
"name": "Col2",
"type": "STRING"
},
{
"description": "Col3",
"mode": "NULLABLE",
"name": "Col3",
"type": "STRING"
},
{
"description": "Col4",
"mode": "NULLABLE",
"name": "Col4",
"type": "STRING"
},
{
"description": "Col5",
"mode": "NULLABLE",
"name": "Col5",
"type": "STRING"
},
{
"description": "Col6",
"mode": "NULLABLE",
"name": "Col6",
"type": "STRING"
},
{
"description": "Col7",
"mode": "NULLABLE",
"name": "Col7",
"type": "STRING"
},
{
"description": "Col8",
"mode": "NULLABLE",
"name": "Col8",
"type": "STRING"
},
{
"description": "Col9",
"mode": "NULLABLE",
"name": "Col9",
"type": "STRING"
}
]
Copy link
Collaborator Author

@danieldeleo danieldeleo Jun 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
]
]

39 changes: 39 additions & 0 deletions tools/terraform_sync_tool/qa/terraform-sync-tool/terragrunt.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
terraform {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename this file's parent folder from terraform-sync-tool to "bigquery"

source = "../../modules/terraform-sync-tool"
}

include "root" {
path = find_in_parent_folders()
expose = true
}

locals {
dataset_id = "tf_test_sync_tool"
}

inputs = {
project_id = include.root.inputs.project_id
# The ID of the project in which the resource belongs. If it is not provided, the provider project is used.
datasets = [
{
dataset_id = "${local.dataset_id}"
friendly_name = "Dataset for Terraform Sync Tool"
location = "US"
labels = {}
}
]

tables = [
{
table_id = "TableForTest"
dataset_id = "${local.dataset_id}"
schema = "json_schemas/TableForTest.json"
clustering = []
expiration_time = null
deletion_protection = true
range_partitioning = null
time_partitioning = null
labels = {}
}
]
}
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
}
}

42 changes: 42 additions & 0 deletions tools/terraform_sync_tool/qa/terragrunt.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Indicate where to source the terraform module from.
# The URL used here is a shorthand for
# "tfr://registry.terraform.io/terraform-aws-modules/vpc/aws?version=3.5.0".
# Note the extra `/` after the protocol is required for the shorthand
# notation.

locals {
gcp_project_id = "candicehou-terraform-sync-tool"
}

inputs = {
project_id = local.gcp_project_id
gcp_region = "us-central1"
}

generate "provider" {
path = "provider.tf"
if_exists = "overwrite"
contents = <<EOF
provider "google" {
project = "${local.gcp_project_id}"
}
EOF
}

remote_state {
backend = "gcs"
config = {
project = local.gcp_project_id
location = "us"
bucket = "synctooltest"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have the user provide their own bucket name because bucket names must be unique across all GCP

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prefix = "qa/${path_relative_to_include()}/terraform.tfstate"
gcs_bucket_labels = {
owner = "terragrunt_test"
name = "terraform_state_storage"
}
}
generate = {
path = "backend.tf"
if_exists = "overwrite_terragrunt"
}
}
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
}
}

1 change: 1 addition & 0 deletions tools/terraform_sync_tool/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
google-cloud-bigquery
6 changes: 6 additions & 0 deletions tools/terraform_sync_tool/state.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"@level":"info","@message":"Terraform 1.1.9","@module":"terraform.ui","@timestamp":"2022-06-08T13:22:12.564592-04:00","terraform":"1.1.9","type":"version","ui":"1.0"}
{"@level":"info","@message":"google_bigquery_dataset.bq_dataset[\"tf_test_sync_tool\"]: Refreshing state... [id=projects/candicehou-terraform-sync-tool/datasets/tf_test_sync_tool]","@module":"terraform.ui","@timestamp":"2022-06-08T13:22:14.589057-04:00","hook":{"resource":{"addr":"google_bigquery_dataset.bq_dataset[\"tf_test_sync_tool\"]","module":"","resource":"google_bigquery_dataset.bq_dataset[\"tf_test_sync_tool\"]","implied_provider":"google","resource_type":"google_bigquery_dataset","resource_name":"bq_dataset","resource_key":"tf_test_sync_tool"},"id_key":"id","id_value":"projects/candicehou-terraform-sync-tool/datasets/tf_test_sync_tool"},"type":"refresh_start"}
{"@level":"info","@message":"google_bigquery_dataset.bq_dataset[\"tf_test_sync_tool\"]: Refresh complete [id=projects/candicehou-terraform-sync-tool/datasets/tf_test_sync_tool]","@module":"terraform.ui","@timestamp":"2022-06-08T13:22:15.112837-04:00","hook":{"resource":{"addr":"google_bigquery_dataset.bq_dataset[\"tf_test_sync_tool\"]","module":"","resource":"google_bigquery_dataset.bq_dataset[\"tf_test_sync_tool\"]","implied_provider":"google","resource_type":"google_bigquery_dataset","resource_name":"bq_dataset","resource_key":"tf_test_sync_tool"},"id_key":"id","id_value":"projects/candicehou-terraform-sync-tool/datasets/tf_test_sync_tool"},"type":"refresh_complete"}
{"@level":"info","@message":"google_bigquery_table.bq_table[\"TableForTest\"]: Refreshing state... [id=projects/candicehou-terraform-sync-tool/datasets/tf_test_sync_tool/tables/TableForTest]","@module":"terraform.ui","@timestamp":"2022-06-08T13:22:15.119610-04:00","hook":{"resource":{"addr":"google_bigquery_table.bq_table[\"TableForTest\"]","module":"","resource":"google_bigquery_table.bq_table[\"TableForTest\"]","implied_provider":"google","resource_type":"google_bigquery_table","resource_name":"bq_table","resource_key":"TableForTest"},"id_key":"id","id_value":"projects/candicehou-terraform-sync-tool/datasets/tf_test_sync_tool/tables/TableForTest"},"type":"refresh_start"}
{"@level":"info","@message":"google_bigquery_table.bq_table[\"TableForTest\"]: Refresh complete [id=projects/candicehou-terraform-sync-tool/datasets/tf_test_sync_tool/tables/TableForTest]","@module":"terraform.ui","@timestamp":"2022-06-08T13:22:15.275101-04:00","hook":{"resource":{"addr":"google_bigquery_table.bq_table[\"TableForTest\"]","module":"","resource":"google_bigquery_table.bq_table[\"TableForTest\"]","implied_provider":"google","resource_type":"google_bigquery_table","resource_name":"bq_table","resource_key":"TableForTest"},"id_key":"id","id_value":"projects/candicehou-terraform-sync-tool/datasets/tf_test_sync_tool/tables/TableForTest"},"type":"refresh_complete"}
{"@level":"info","@message":"Plan: 0 to add, 0 to change, 0 to destroy.","@module":"terraform.ui","@timestamp":"2022-06-08T13:22:15.289118-04:00","changes":{"add":0,"change":0,"remove":0,"operation":"plan"},"type":"change_summary"}
Loading