diff --git a/.github/workflows/deploy-gke.yml b/.github/workflows/deploy-gke.yml new file mode 100644 index 0000000..6d2f3f6 --- /dev/null +++ b/.github/workflows/deploy-gke.yml @@ -0,0 +1,135 @@ +name: CD - Deploy to GKE + +on: + workflow_dispatch: + inputs: + source_tag: + description: 'The Git SHA or Dev tag to promote (e.g. sha-a1b2c or dev)' + required: true + default: 'dev' + target_tag: + description: 'The new version tag (e.g. v1.0.0)' + required: true + default: 'v1.0.0' + environment: + description: 'Target Environment' + required: true + type: choice + options: + - dev + - prod + +permissions: + id-token: write + contents: read + +env: + PROJECT_ID: "gcp-capstone-481414" + REGION: "us-central1" + REPO_NAME: "bookshelf-docker-repo" + IMAGE_NAME: "fastapi-app" + GKE_CLUSTER: "bookshelf-dev-cluster" + +jobs: + deploy: + runs-on: ubuntu-latest + environment: ${{ github.event.inputs.environment }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.WIF_PROVIDER }} + service_account: ${{ secrets.WIF_SERVICE_ACCOUNT }} + project_id: ${{ env.PROJECT_ID }} + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + with: + project_id: ${{ env.PROJECT_ID }} + + - name: Install kubectl + run: | + gcloud components install kubectl + + - name: Set Cluster Name for prod + id: set_cluster + shell: bash + run: | + # Read the environment input (dev or prod) + ENV_INPUT="${{ github.event.inputs.environment }}" + + # Construct the name: bookshelf-dev-cluster or bookshelf-prod-cluster + CLUSTER_NAME="bookshelf-${ENV_INPUT}-cluster" + + # Export it to the GITHUB_ENV so subsequent steps can see it + echo "GKE_CLUSTER=${CLUSTER_NAME}" >> $GITHUB_ENV + + echo "Target Cluster set to: ${CLUSTER_NAME}" + + - name: Get GKE Credentials + run: | + gcloud container clusters get-credentials ${{ env.GKE_CLUSTER }} --region ${{ env.REGION }} + + - name: Create Namespace + run: | + kubectl create namespace my-cool-app --dry-run=client -o yaml | kubectl apply -f - + + - name: Ensure Secrets Exist + env: + SECRET_USER: ${{ secrets.DB_USERNAME }} + SECRET_PASS: ${{ secrets.DB_PASSWORD }} + SECRET_NAME: ${{ secrets.DB_NAME }} + run: | + kubectl create secret generic fastapi-secret \ + --from-literal=POSTGRES_USER=$SECRET_USER \ + --from-literal=POSTGRES_PASSWORD=$SECRET_PASS \ + --from-literal=POSTGRES_DB=$SECRET_NAME \ + --namespace=my-cool-app --dry-run=client -o yaml | kubectl apply -f - + + - name: Retag Image in Artifact Registry + env: + SOURCE_TAG: ${{ github.event.inputs.source_tag }} + TARGET_TAG: ${{ github.event.inputs.target_tag }} + IMAGE_URL: ${{ env.REGION }}-docker.pkg.dev/${{ env.PROJECT_ID }}/${{ env.REPO_NAME }}/${{ env.IMAGE_NAME }} + run: | + echo "Promoting ${IMAGE_URL}:${SOURCE_TAG} to ${IMAGE_URL}:${TARGET_TAG}..." + gcloud artifacts docker tags add \ + ${IMAGE_URL}:${SOURCE_TAG} \ + ${IMAGE_URL}:${TARGET_TAG} + + - name: Create DB Init ConfigMap + run: | + kubectl create configmap db-init-script \ + --from-file=server/db/init.sh \ + --namespace=my-cool-app --dry-run=client -o yaml | kubectl apply -f - + + - name: Deploy Application + env: + TARGET_TAG: ${{ github.event.inputs.target_tag }} + run: | + # Use sed to replace the placeholder in the YAML with the actual version + sed -i "s|__IMAGE_TAG__|${TARGET_TAG}|g" kubernetes/fastapi-app.yaml + + # Apply both App and DB + kubectl apply -f kubernetes/postgres-db.yaml + kubectl apply -f kubernetes/fastapi-app.yaml + + - name: Inject Database Connection String + env: + DB_USER: ${{ secrets.DB_USERNAME }} + DB_PASS: ${{ secrets.DB_PASSWORD }} + DB_NAME: ${{ secrets.DB_NAME }} + DB_HOST: "db" + DB_PORT: "5432" + run: | + DB_URL="postgresql://${DB_USER}:${DB_PASS}@${DB_HOST}:${DB_PORT}/${DB_NAME}" + echo "Injecting DOCKER_DATABASE_URL into deployment..." + kubectl set env deployment/fastapi-deployment DOCKER_DATABASE_URL="${DB_URL}" -n my-cool-app + + - name: Verify Deployment + run: | + kubectl rollout status deployment/fastapi-deployment -n my-cool-app diff --git a/.github/workflows/docker-build-gcp.yml b/.github/workflows/docker-build-gcp.yml new file mode 100644 index 0000000..27a4e7b --- /dev/null +++ b/.github/workflows/docker-build-gcp.yml @@ -0,0 +1,53 @@ +name: CI - Build & Push Docker Image + +on: + push: + branches: [ "main" ] + paths: + - 'server/**' + - 'Dockerfile' + workflow_dispatch: + +permissions: + id-token: write + contents: read + +env: + PROJECT_ID: "gcp-capstone-481414" + REGION: "us-central1" + REPO_NAME: "bookshelf-docker-repo" + IMAGE_NAME: "fastapi-app" + +jobs: + build-and-push: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Authenticate to Google Cloud + id: auth + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.WIF_PROVIDER }} + service_account: ${{ secrets.WIF_SERVICE_ACCOUNT }} + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + with: + project_id: ${{ env.PROJECT_ID }} + + - name: Configure Docker Authentication + run: gcloud auth configure-docker ${{ env.REGION }}-docker.pkg.dev + + - name: Build and Push Docker Image + env: + IMAGE_URL: ${{ env.REGION }}-docker.pkg.dev/${{ env.PROJECT_ID }}/${{ env.REPO_NAME }}/${{ env.IMAGE_NAME }} + run: | + # Build the image + docker build -t $IMAGE_URL:dev -t $IMAGE_URL:${{ github.sha }} . + + # Push both tags + docker push $IMAGE_URL:dev + docker push $IMAGE_URL:${{ github.sha }} diff --git a/.github/workflows/infra-destroy.yml b/.github/workflows/infra-destroy.yml new file mode 100644 index 0000000..a1bab74 --- /dev/null +++ b/.github/workflows/infra-destroy.yml @@ -0,0 +1,69 @@ +name: Infrastructure Destroy + +on: + workflow_dispatch: + inputs: + environment: + description: 'Select Environment to Destroy' + required: true + type: choice + options: + - dev + - prod + +permissions: + id-token: write + contents: read + +env: + TF_VERSION: "1.14.2" + GCP_REGION: "us-central1" + +jobs: + destroy: + runs-on: ubuntu-latest + environment: ${{ github.event.inputs.environment }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.WIF_PROVIDER }} + service_account: ${{ secrets.WIF_SERVICE_ACCOUNT }} + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TF_VERSION }} + + - name: Set Target Directory + id: set_dir + run: | + if [ "${{ github.event.inputs.environment }}" == "dev" ]; then + echo "target_dir=infra/envs/dev" >> $GITHUB_OUTPUT + echo "var_file=values.tfvars" >> $GITHUB_OUTPUT + else + echo "target_dir=infra/envs/prod" >> $GITHUB_OUTPUT + echo "var_file=values.tfvars" >> $GITHUB_OUTPUT + fi + + # - name: Manual Approval + # if: github.event.inputs.environment == 'prod' + # uses: trstringer/manual-approval@v1 + # with: + # secret: ${{ secrets.GITHUB_TOKEN }} + # approvers: nizamra + # minimum-approvals: 1 + + - name: Terraform Init + working-directory: ${{ steps.set_dir.outputs.target_dir }} + run: terraform init + + - name: Terraform Destroy + working-directory: ${{ steps.set_dir.outputs.target_dir }} + run: | + terraform plan -destroy -var-file="${{ steps.set_dir.outputs.var_file }}" + terraform destroy -auto-approve -var-file="${{ steps.set_dir.outputs.var_file }}" diff --git a/.github/workflows/infra-pipeline.yml b/.github/workflows/infra-pipeline.yml new file mode 100644 index 0000000..047331c --- /dev/null +++ b/.github/workflows/infra-pipeline.yml @@ -0,0 +1,70 @@ +name: Infrastructure (GKE & Registry) + +on: + push: + branches: [ "main" ] + paths: + - 'infra/**' + workflow_dispatch: + +permissions: + id-token: write + contents: read + +env: + TF_VERSION: "1.14.2" + GCP_REGION: "us-central1" + +jobs: + deploy-infra-dev: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.WIF_PROVIDER }} + service_account: ${{ secrets.WIF_SERVICE_ACCOUNT }} + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TF_VERSION }} + + - name: Terraform Init + working-directory: infra/envs/dev + run: terraform init + + - name: Terraform Apply + working-directory: infra/envs/dev + run: terraform apply -auto-approve -var-file="values.tfvars" + + + deploy-infra-prod: + runs-on: ubuntu-latest + if: github.ref == 'refs/heads/main' + needs: deploy-infra-dev + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TF_VERSION }} + + - name: Initialize Terraform + run: terraform init + + - name: Plan Terraform for Prod + run: terraform plan -var-file="values.tfvars" + + - name: Manual Approval + uses: hmarr/auto-approve-action@v4 + + - name: Apply Terraform for Prod + run: terraform apply -auto-approve -var-file="values.tfvars" \ No newline at end of file diff --git a/.gitignore b/.gitignore index cf3410f..43a7e19 100644 --- a/.gitignore +++ b/.gitignore @@ -166,3 +166,43 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + + + +# Local .terraform directories +.terraform/ +.vscode/ + + +# .tfstate files +**/.terraform/ +**/*.tfstate +**/*.tfstate.* +**/.terraform.lock.hcl + +# Crash log files +crash.log +crash.*.log + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Ignore transient lock info files created by terraform apply +.terraform.tfstate.lock.info + +# Include override files you do wish to add to version control using negated pattern +# !example_override.tf + +# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan +# example: *tfplan* + +# Ignore CLI configuration files +.terraformrc +terraform.rc + +*.private +meta.txt diff --git a/AI_USAGE.md b/AI_USAGE.md new file mode 100644 index 0000000..b68fd29 --- /dev/null +++ b/AI_USAGE.md @@ -0,0 +1,78 @@ +# AI Usage Documentation + +## AI Tools Used +- **github Copilot (GPT-4o), Z.ai (https://chat.z.ai/)** + +## Prompts and Instructions + +I used the AI primarily as a **Code Generator**. Below are the specific prompts and instructions used to generate the project structure and code. + +### Example 1: Project Structure and Documentation + +**Prompt:** +> "I need to generate a complete folder structure for a project deployed on GCP. +> Create a `docs` folder with files: RELEASE_PROCESS.md, ARCHITECTURE_GCP.md, DEPLOYMENT.md, ROLLBACK.md, GKE_RUNBOOK.md." + +**Result:** +- Generated the file tree structure. +- Created basic filler content for `docs/GKE_RUNBOOK.md` and `docs/ROLLBACK.md` (see the respective markdown files in the repo). + +### Example 2: Bootstrapping and CI/CD + +**Prompt:** +> "Create a folder for `bootstrap` with full Terraform code for creating a GCS state bucket and opening APIs (container, artifact registry, storage). +> +> Also, generate GitHub Actions YAML for building a docker image and uploading to GCP artifact registry. Use Workload Identity Federation for authentication." + +**Result:** +- Created `bootstrap/main.tf`, `bootstrap/apis.tf`, `bootstrap/bucket.tf`. +- Created `.github/workflows/docker-build-gcp.yml` with `google-github-actions/auth@v2` and `gcloud auth configure-docker`. + +### Example 3: Infrastructure and Modules + +**Prompt:** +> "Generate a folder structure for `infra` with: +> 1. `envs` subfolder: dev, stag and prod, holding files for main, variables, outputs, and values.tfvars. +> 2. `modules` subfolder. +> 3. Include `backend.tf` and `providers.tf` configurations. +> +> Ensure the Terraform code uses a GCS backend for remote state." + +**Result:** +- Created `infra/envs/dev/`, `infra/envs/prod/` directory structures. +- Created `infra/modules/gke-cluster/` and `infra/modules/artifact-registry/`. +- Configured `terraform { backend "gcs" { ... } }` blocks. + +### Example 4: Deployment Pipelines + +**Prompt:** +> "Generate a pipeline for infrastructure with multiple jobs: one for dev, another for prod. The prod job should only run on main branch and require manual approval. +> +> Generate a separate pipeline for taking the image from registry and deploying in GKE. It should support retagging images (e.g., from SHA to v1.0.0) and using kubectl to apply manifests." + +**Result:** +- Created `.github/workflows/infra-pipeline.yml` with `jobs: deploy-infra-dev` and `deploy-infra-prod` using `needs` and `environment: prod`. +- Created `.github/workflows/cd-deploy.yml` with logic for `gcloud artifacts docker tags add` and `kubectl apply`. + +## Parts Influenced by AI + +- **Terraform**: + - Module files creation (`modules/gke-cluster`, `modules/artifact-registry`). + - `main.tf` wiring for environments. + - Backend configuration and provider code. +- **GitHub Actions**: + - Complete logic for `.github/workflows/docker-build-gcp.yml` (authentication, tagging). + - Complete logic for `.github/workflows/cd-deploy.yml` (promotion, manual approval, kubectl patch). +- **Kubernetes**: + - Fixing `StatefulSet` configurations for GKE Autopilot. + - Creating `ConfigMap` strategies for database initialization. +- **Documentation**: + - Drafting basic Markdown files in the `docs/` folder. + +## Verification + +All code generated by AI was reviewed for: +- Syntax errors (Terraform validate, YAML linting). +- Security best practices (avoiding hardcoded secrets, using Workload Identity). +- Compatibility with the specific GCP project constraints (Autopilot, regions). +- the best way to verify is to deploy diff --git a/SUBMISSION.md b/SUBMISSION.md new file mode 100644 index 0000000..bdc0a5e --- /dev/null +++ b/SUBMISSION.md @@ -0,0 +1,42 @@ +# Project Submission: BookShelf Cloud + +## Repository Link +[https://github.com/nizamra/python-fastapi-demo-docker](https://github.com/nizamra/python-fastapi-demo-docker) + +## CI/CD Evidence + +### CI Pipeline (Docker Build) +- **Link to Run**: [GitHub Actions - CI Build](https://github.com/nizamra/python-fastapi-demo-docker/blob/main/.github/workflows/docker-build-gcp.yml) +- **Status**: ✅ [Success](https://github.com/nizamra/python-fastapi-demo-docker/actions/runs/22283333449) - Build and Push to Artifact Registry. + +### CD Pipeline (GKE Deploy) +- **Link to Run**: [GitHub Actions - CD Deploy](https://github.com/nizamra/python-fastapi-demo-docker/blob/main/.github/workflows/deploy-gke.yml) +- **Status**: ✅ [Success](https://github.com/nizamra/python-fastapi-demo-docker/actions/runs/22284723811) - Deployed to GKE Dev/Prod. + +### Infrastructure Pipeline +- **Workflow Name**: Infrastructure (GKE & Registry) +- **Link to Run**: [GitHub Actions - Infra](https://github.com/nizamra/python-fastapi-demo-docker/actions/workflows/infra-pipeline.yml) +- **Status**: ✅ [Success](https://github.com/nizamra/python-fastapi-demo-docker/actions/runs/22283115694/job/64456861834) - Provisioned the GKE Autopilot cluster and Artifact Registry. + +### Release Evidence +- **Release v1.0.0**: [https://github.com/nizamra/python-fastapi-demo-docker/releases/tag/v1.0.0](https://github.com/nizamra/python-fastapi-demo-docker/releases/tag/v1.0.0) +- **Promoted Image**: `us-central1-docker.pkg.dev/gcp-capstone-481414/bookshelf-docker-repo/fastapi-app:v1.0.0` + +## Artifact Evidence + +The Docker artifacts are stored in Google Cloud Artifact Registry. + +- **Registry Name**: `bookshelf-docker-repo` +- **Location**: `us-central1` +- **View Artifacts**: [Link to GCP Console Artifact Registry](https://console.cloud.google.com/artifacts/docker/gcp-capstone-481414/us-central1/bookshelf-docker-repo?hl=en&project=gcp-capstone-481414) + +## Service URL + +The application is exposed via a GKE LoadBalancer Service. + +- **Application Endpoint**: [http://34.173.120.218/](http://34.173.120.218/) + +## Summary +The project implements a fully automated DevOps pipeline for a Python FastAPI application on GCP. We utilized Terraform for Infrastructure as Code (IaC), managing GKE Autopilot clusters and Artifact Registry securely via remote state. A 3-tier pipeline architecture was adopted: a manual Infrastructure pipeline, a CI pipeline for Docker image building, and a CD pipeline for GKE deployment. We implemented image promotion strategies using semantic versioning and ensured security via Workload Identity Federation, avoiding hardcoded credentials. + +**Next Steps**: With more time, I would implement a dedicated Terraform module for Google Cloud SQL to replace the StatefulSet Postgres instance, ensuring higher availability and automated backups. I would also add Integration Tests to the CI pipeline and set up a Monitoring Dashboard (GCP Cloud Monitoring) for better observability and alerting. \ No newline at end of file diff --git a/bootstrap/apis.tf b/bootstrap/apis.tf new file mode 100644 index 0000000..a536a80 --- /dev/null +++ b/bootstrap/apis.tf @@ -0,0 +1,12 @@ +resource "google_project_service" "required_apis" { + for_each = toset([ + "cloudresourcemanager.googleapis.com", # Project metadata + "storage-component.googleapis.com", # GCS + "compute.googleapis.com", # VPC/Networking + "container.googleapis.com" # GKE + ]) + + project = var.project_id + service = each.value + disable_on_destroy = false +} diff --git a/bootstrap/bucket.tf b/bootstrap/bucket.tf new file mode 100644 index 0000000..e76fec3 --- /dev/null +++ b/bootstrap/bucket.tf @@ -0,0 +1,30 @@ +resource "google_storage_bucket" "terraform_state" { + name = var.state_bucket_name + location = var.region + storage_class = "STANDARD" + uniform_bucket_level_access = true + public_access_prevention = "enforced" + + versioning { + enabled = true + } + + # Lifecycle rule to delete old versions after 30 days + lifecycle_rule { + condition { + num_newer_versions = 6 + } + action { + type = "Delete" + } + } + + # Labels for cost attribution and filtering + labels = { + environment = "bootstrap" + managed_by = "terraform" + project = "gcp-passline" + } + + depends_on = [google_project_service.required_apis] +} diff --git a/bootstrap/main.tf b/bootstrap/main.tf new file mode 100644 index 0000000..a5ba3b6 --- /dev/null +++ b/bootstrap/main.tf @@ -0,0 +1,14 @@ +terraform { + required_version = "1.14.2" + required_providers { + google = { + source = "hashicorp/google" + version = "7.13.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} diff --git a/bootstrap/outputs.tf b/bootstrap/outputs.tf new file mode 100644 index 0000000..06519b3 --- /dev/null +++ b/bootstrap/outputs.tf @@ -0,0 +1,5 @@ +# Output the bucket name for reference +output "terraform_state_bucket_name" { + value = google_storage_bucket.terraform_state.name + description = "Name of the GCS bucket for Terraform state. Save this!" +} diff --git a/bootstrap/values.dev.tfvars b/bootstrap/values.dev.tfvars new file mode 100644 index 0000000..100214b --- /dev/null +++ b/bootstrap/values.dev.tfvars @@ -0,0 +1,3 @@ +project_id = "gcp-capstone-481414" +region = "us-central1" +state_bucket_name = "gcp-passline-481414-tf-state" diff --git a/bootstrap/values.tfvars.example b/bootstrap/values.tfvars.example new file mode 100644 index 0000000..c58148f --- /dev/null +++ b/bootstrap/values.tfvars.example @@ -0,0 +1,3 @@ +project_id = "gcp-passline" +region = "us-central1" +state_bucket_name = "gcp-tf-state" diff --git a/bootstrap/variables.tf b/bootstrap/variables.tf new file mode 100644 index 0000000..bd8ec38 --- /dev/null +++ b/bootstrap/variables.tf @@ -0,0 +1,14 @@ +variable "project_id" { + description = "GCP Project ID where bootstrap resources will live." + type = string +} + +variable "region" { + description = "Primary region for resources (e.g., us-central1)." + type = string +} + +variable "state_bucket_name" { + description = "Name of the GCS bucket to create for Terraform state. Must be globally unique." + type = string +} diff --git a/docs/ARCHITECTURE_GCP.md b/docs/ARCHITECTURE_GCP.md new file mode 100644 index 0000000..ac65408 --- /dev/null +++ b/docs/ARCHITECTURE_GCP.md @@ -0,0 +1,63 @@ +# GCP Architecture + +The BookShelf application is deployed on Google Kubernetes Engine (GKE) using a CI/CD pipeline. The architecture follows a microservices pattern, separating the application logic from the data layer. + +## Diagram + +```mermaid +graph TD + Dev[Developer] -->|Push Code| GH[GitHub Repo] + + subgraph "GitHub Actions CI/CD" + GH --> CI[CI: Build & Push] + CI --> GAR[Artifact Registry] + GH --> CD[CD: Deploy GKE] + CD -->|gcloud/kubectl| GKE[GKE Cluster] + end + + subgraph "GCP Project: gcp-capstone-481414" + GAR -- "Docker Image: fastapi-app" --> GKE + + subgraph "GKE Cluster (Autopilot)" + GKE --> NS[Namespace: my-cool-app] + NS --> POD[FastAPI Pod] + NS --> DB[Postgres StatefulSet] + POD --> SVC[LoadBalancer Service] + end + + DB --> PV[Persistent Volume] + end + + User[User] -->|HTTPS| SVC +``` +![alt text](image.png) + +## Components + +### 1. Source Control & CI/CD +* **GitHub**: Hosts the application code and infrastructure definitions. +* **GitHub Actions**: + * **CI Pipeline**: Builds the Docker image from the source code and pushes it to Google Artifact Registry. + * **CD Pipeline**: Handles promotion of images (tagging) and deployment to GKE using `kubectl`. + +### 2. Artifact Registry +* **Purpose**: Stores the Docker container images. +* **Strategy**: Images are tagged with Git SHA for traceability and Semantic Versions (e.g., `v1.0.0`) for production releases. + +### 3. Google Kubernetes Engine (GKE) +* **Mode**: Autopilot. This removes the need to manage node VMs; Google automatically scales the infrastructure based on pod requirements. +* **Namespace**: `my-cool-app`. Logical isolation within the cluster. + +#### Workloads +1. **FastAPI Deployment**: + * **Image**: `us-central1-docker.pkg.dev/gcp-capstone-481414/bookshelf-docker-repo/fastapi-app` + * **Replicas**: 1 (Configurable via HPA in the future). + * **Exposure**: External LoadBalancer. +2. **PostgreSQL StatefulSet**: + * **Image**: `postgres:13`. + * **Storage**: Persistent Volume Claim (PVC) utilizing `standard-rwo` storage class. + * **Init**: A `ConfigMap` mounts an `init.sh` script to automatically create the database schema and table on first startup. + +### 4. Networking +* **LoadBalancer**: Exposes the FastAPI service to the public internet on port 80/443. +* **ClusterIP**: Internal service for the database, allowing the FastAPI app to communicate with Postgres via the DNS name `db`. diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md new file mode 100644 index 0000000..da6f9c4 --- /dev/null +++ b/docs/DEPLOYMENT.md @@ -0,0 +1,27 @@ +# Deployment Strategy + +## Promotion Path + +The deployment strategy follows a strict promotion model from Development to Production. + +### 1. Development Environment (Automatic) +- **Trigger**: Push to `main` branch (changes in `server/` or `Dockerfile`). +- **Process**: + 1. CI Pipeline builds the Docker image. + 2. Image is tagged with the Git SHA and `:dev`. + 3. Image is pushed to Artifact Registry. + 4. CD Pipeline is NOT triggered automatically for Dev in this workflow (Manual dispatch is used for consistency), but conceptually it deploys the `:dev` tag. + +### 2. Production Environment (Manual Promotion) +- **Trigger**: Manual Workflow Dispatch. +- **Process**: + 1. User selects a specific source Git SHA (verified in Dev). + 2. User defines a Semantic Version tag (e.g., `v1.0.0`). + 3. Pipeline requires **Manual Approval**. + 4. Upon approval: + - The Docker image is re-tagged in Artifact Registry (SHA -> v1.0.0). + - `kubectl apply` updates the GKE Production Deployment. + - GKE performs a Rolling Update. + +## Why Tagging? +We use immutable tags (`v1.0.0`) for Production instead of mutable tags (`latest`). This ensures we can identify exactly what code is running and allows for instant rollbacks by redeploying a previous tag. diff --git a/docs/GKE_RUNBOOK.md b/docs/GKE_RUNBOOK.md new file mode 100644 index 0000000..19a042a --- /dev/null +++ b/docs/GKE_RUNBOOK.md @@ -0,0 +1,49 @@ +# GKE Runbook + +## How to Deploy to GKE manually + +If the CI/CD pipeline is down, or for emergency fixes: + +1. **Authenticate**: + ```bash + gcloud container clusters get-credentials bookshelf-dev-cluster --region us-central1 + ``` + +2. **Build and Push Image** (Locally): + ```bash + docker build -t us-central1-docker.pkg.dev/gcp-capstone-481414/bookshelf-docker-repo/fastapi-app:manual-fix . + docker push us-central1-docker.pkg.dev/gcp-capstone-481414/bookshelf-docker-repo/fastapi-app:manual-fix + ``` + +3. **Update Deployment**: + Edit the image in the deployment: + ```bash + kubectl set image deployment/fastapi-deployment \ + web=us-central1-docker.pkg.dev/gcp-capstone-481414/bookshelf-docker-repo/fastapi-app:manual-fix \ + -n my-cool-app + ``` + +## Rolling Updates + +By default, GKE Deployments use a `RollingUpdate` strategy. +- **What happens**: GKE gradually replaces old Pods with new ones. +- **Configuration**: Defined in `kubernetes/fastapi-app.yaml` under `strategy`. +- **Monitoring**: Watch the status with `kubectl rollout status deployment/fastapi-deployment -n my-cool-app`. + +## Rollbacks + +See `docs/ROLLBACK.md`. The primary command is: +```bash +kubectl rollout undo deployment/fastapi-deployment -n my-cool-app +``` + +Checking Logs +Application Logs: +```bash +kubectl logs -f deployment/fastapi-deployment -n my-cool-app +``` + +Database Logs: +```bash +kubectl logs -f statefulset/fastapi-postgres -n my-cool-app +``` diff --git a/docs/RELEASE_PROCESS.md b/docs/RELEASE_PROCESS.md new file mode 100644 index 0000000..c97b1d7 --- /dev/null +++ b/docs/RELEASE_PROCESS.md @@ -0,0 +1,39 @@ +# Release Process +This document outlines the steps to release a new version of the BookShelf application to Production. + +## Prerequisites +- All code changes merged to the main branch. +- CI Pipeline has successfully built and tested the Docker image. +- You have the Git SHA of the commit you wish to release. + +## Steps +1. Trigger Infrastructure (first time/on change) +- If you have modified Terraform code (infra/), ensure the Infrastructure Pipeline has run successfully and both Dev and Prod clusters are healthy. + +2. Verify CI Build +- Navigate to the "CI - Build & Push Docker Image" workflow in GitHub Actions. +- Identify the Git SHA of the commit you want to release (e.g., a1b2c3d). + +3. Promote to Production +- Go to the "CD - Deploy to GKE" workflow in GitHub Actions. +- Click Run workflow. +- Fill in the inputs: + - source_tag: Enter the Git SHA (e.g., sha-a1b2c3d) or dev. + - target_tag: Enter the semantic version (e.g., v1.0.0). + - environment: Select prod. +- Click Run workflow. + +4. Manual Approval +- The workflow will pause at the Manual Approval step. +- An authorized maintainer must review the changes and click Approve in the GitHub Actions UI. +- This is the final gate before production is modified. + +5. Deployment Execution +- Upon approval, the workflow will: +- Tag the image in Artifact Registry as v1.0.0. +- Update the GKE Production deployment. +- Perform a rolling update of pods. + +6. Verification +- Check the Production URL (found in SUBMISSION.md) to ensure the new version is live. +- Monitor the GKE Dashboard for pod health. diff --git a/docs/ROLLBACK.md b/docs/ROLLBACK.md new file mode 100644 index 0000000..ccd7394 --- /dev/null +++ b/docs/ROLLBACK.md @@ -0,0 +1,67 @@ +# Rollback Procedure + +This document describes how to rollback the application if a deployment introduces a critical bug or failure. + +## Important Note on Database Rollbacks + +Rolling back the application code does **not** automatically rollback the database schema. +* **Risk**: If the failed deployment included a database migration (e.g., via `init.sh`), simply reverting the code might cause errors if the old application code expects the previous database schema structure but the database has already been updated. +* **Action**: In such events, manual SQL intervention may be required to revert schema changes before rolling back the application. + +--- + +## Scenario 1: Immediate Rollback via CLI (Fastest) + +If you have `kubectl` access to the cluster and need to revert to the previous version immediately: + +1. **Verify current status**: + Check the history of the deployment to see available revisions. + ```bash + kubectl rollout history deployment/fastapi-deployment -n my-cool-app + ``` + +2. **Rollback to the previous revision**: + This command reverts the deployment to the state immediately preceding the current one. + ```bash + kubectl rollout undo deployment/fastapi-deployment -n my-cool-app + ``` + +3. **Verify the rollback**: + Watch the status to ensure the pods are restarting with the old image and becoming healthy. + ```bash + kubectl rollout status deployment/fastapi-deployment -n my-cool-app + ``` + +--- + +## Scenario 2: Rollback via GitHub Actions (Specific Version) + +If the previous revision is also corrupted, or if you need to jump back to a specific known-good version (e.g., `v1.0.0`): + +1. Navigate to the **Repository Actions** tab. +2. Select the **CD - Deploy to GKE** workflow. +3. Click **Run workflow**. +4. Configure the inputs: + * `source_tag`: The tag of the specific version you want to restore (e.g., `v1.0.0`). + * `target_tag`: The new version name (optional, you can reuse the old tag or name it `v1.0.1-rollback`). + * `environment`: `prod` (or `dev`). +5. Run the workflow and complete the **Manual Approval**. + +**Result**: This will force the GKE Deployment to update the Pods to run the Docker image associated with the specific tag you provided. + +--- + +## Scenario 3: Rollback via CLI (Specific Image) + +If you cannot use the GitHub Actions pipeline but know the exact image URL: + +1. Update the deployment image directly: + ```bash + kubectl set image deployment/fastapi-deployment \ + web=us-central1-docker.pkg.dev/gcp-capstone-481414/bookshelf-docker-repo/fastapi-app:v1.0.0 \ + -n my-cool-app + ``` +2. Watch the rollout: + ```bash + kubectl rollout status deployment/fastapi-deployment -n my-cool-app + ``` diff --git a/docs/image.png b/docs/image.png new file mode 100644 index 0000000..289e7e9 Binary files /dev/null and b/docs/image.png differ diff --git a/infra/README.md b/infra/README.md new file mode 100644 index 0000000..64c8048 --- /dev/null +++ b/infra/README.md @@ -0,0 +1,80 @@ +# Infrastructure as Code (Terraform) + +This directory contains the Terraform configuration to provision Google Cloud Platform (GCP) resources for the BookShelf application. + +## Prerequisites + +1. [Terraform](https://www.terraform.io/downloads.html) >= 1.14.2 installed. +2. [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) installed and authenticated (`gcloud auth application-default login`). +3. A GCP Project with APIs enabled (Container, Artifact Registry, Cloud Storage). + +## Directory Structure + +1. `bootstrap/`: One-time setup to create the remote state bucket and enable required APIs. +2. `modules/`: Reusable Terraform modules (GKE Cluster, Artifact Registry). +3. `envs/`: Environment-specific configurations (`dev`, `prod`). + +## How to Run + +### 1. Bootstrap (First time only) + +This step creates the Google Cloud Storage bucket required to store the Terraform state file. + +1. Navigate to the bootstrap directory: + ```bash + cd bootstrap + ``` +2. Initialize Terraform: + ```bash + terraform init + ``` +3. Apply the configuration: + ```bash + terraform apply -var-file="values.tfvars" + ``` +4. **Important**: Note the output `terraform_state_bucket_name`. You will need this value if you configure the backend manually later, though our scripts automate this. + +### 2. Deploy Environment (Dev or Prod) + +**Note**: This step is fully automated via GitHub Actions. See `.github/workflows/infra-pipeline.yml`. +Running these commands locally is optional and typically reserved for debugging or initial setup without triggering a pipeline run. + +1. Navigate to the environment directory: + ```bash + cd envs/dev # or envs/prod + ``` + +2. Initialize Terraform. + This command downloads the Google provider and configures the remote backend to use the bucket created in the bootstrap step. + ```bash + terraform init + ``` + +3. Review the plan. + Always check what Terraform intends to create before applying: + ```bash + terraform plan -var-file="values.tfvars" + ``` + +4. Apply the changes. + This will create the resources in GCP: + ```bash + terraform apply -auto-approve -var-file="values.tfvars" + ``` + +## Outputs + +After a successful `apply`, Terraform will print valuable connection information to your terminal. + +1. **Cluster Endpoint**: The internal IP of the Kubernetes API. +2. **Get Credentials Command**: Terraform will provide a `gcloud` command similar to this: + ```bash + gcloud container clusters get-credentials bookshelf-dev-cluster --region us-central1 --project gcp-capstone-481414 + ``` +3. **Action**: Copy and paste this command into your terminal. It configures your local `kubectl` to communicate with the new GKE cluster. + +## Troubleshooting + +1. **Error: Backend configuration changed**: Ensure you have run the `bootstrap` step first and the bucket exists. +2. **Error: API not enabled**: The bootstrap script should handle this, but if you encounter errors regarding `container.googleapis.com`, enable it manually in the GCP Console. +3. **Module not found**: Ensure you are running commands from inside `envs/dev` or `envs/prod`, not the root `infra` folder. diff --git a/infra/envs/dev/backend.tf b/infra/envs/dev/backend.tf new file mode 100644 index 0000000..5c515cc --- /dev/null +++ b/infra/envs/dev/backend.tf @@ -0,0 +1,6 @@ +terraform { + backend "gcs" { + bucket = "gcp-passline-481414-tf-state" + prefix = "passline/dev" + } +} \ No newline at end of file diff --git a/infra/envs/dev/main.tf b/infra/envs/dev/main.tf new file mode 100644 index 0000000..c48282e --- /dev/null +++ b/infra/envs/dev/main.tf @@ -0,0 +1,27 @@ +# Set up the default VPC and Subnet dynamically, no hardcode IDs +data "google_compute_network" "vpc" { + name = var.vpc_network_name +} + +data "google_compute_subnetwork" "default" { + name = "default" + region = var.region +} + +module "artifact_registry" { + source = "../../modules/artifact-registry" + + project_id = var.project_id + region = var.region + repository_name = "bookshelf-docker-repo" +} + +module "gke_cluster" { + source = "../../modules/gke-cluster" + + project_id = var.project_id + region = var.region + cluster_name = "bookshelf-dev-cluster" + network_name = data.google_compute_network.vpc.name + subnetwork_name = data.google_compute_subnetwork.default.name +} diff --git a/infra/envs/dev/outputs.tf b/infra/envs/dev/outputs.tf new file mode 100644 index 0000000..fe6d550 --- /dev/null +++ b/infra/envs/dev/outputs.tf @@ -0,0 +1,7 @@ +output "artifact_registry_url" { + value = module.artifact_registry.repository_url +} + +output "gke_cluster_endpoint" { + value = module.gke_cluster.cluster_endpoint +} diff --git a/infra/envs/dev/providers.tf b/infra/envs/dev/providers.tf new file mode 100644 index 0000000..a9db365 --- /dev/null +++ b/infra/envs/dev/providers.tf @@ -0,0 +1,14 @@ +terraform { + required_version = "1.14.2" + required_providers { + google = { + source = "hashicorp/google" + version = "7.13.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} \ No newline at end of file diff --git a/infra/envs/dev/values.tfvars b/infra/envs/dev/values.tfvars new file mode 100644 index 0000000..8597202 --- /dev/null +++ b/infra/envs/dev/values.tfvars @@ -0,0 +1,5 @@ +project_id = "gcp-capstone-481414" +region = "us-central1" + +# Network configuration +vpc_network_name = "default" diff --git a/infra/envs/dev/values.tfvars.example b/infra/envs/dev/values.tfvars.example new file mode 100644 index 0000000..da185f3 --- /dev/null +++ b/infra/envs/dev/values.tfvars.example @@ -0,0 +1,3 @@ +project_id = "gcp-passline" +region = "us-central1" +vpc_network_name = "default" diff --git a/infra/envs/dev/variables.tf b/infra/envs/dev/variables.tf new file mode 100644 index 0000000..d8cce15 --- /dev/null +++ b/infra/envs/dev/variables.tf @@ -0,0 +1,15 @@ +variable "project_id" { + description = "The GCP project ID" + type = string +} + +variable "region" { + description = "The GCP region" + type = string +} + +# Network variables +variable "vpc_network_name" { + description = "The name of the VPC network (usually 'default' for new projects)" + type = string +} diff --git a/infra/envs/prod/backend.tf b/infra/envs/prod/backend.tf new file mode 100644 index 0000000..10b5d15 --- /dev/null +++ b/infra/envs/prod/backend.tf @@ -0,0 +1,6 @@ +terraform { + backend "gcs" { + bucket = "gcp-passline-481414-tf-state" + prefix = "passline/prod" + } +} diff --git a/infra/envs/prod/main.tf b/infra/envs/prod/main.tf new file mode 100644 index 0000000..d3d3c83 --- /dev/null +++ b/infra/envs/prod/main.tf @@ -0,0 +1,27 @@ +# Set up the default VPC and Subnet dynamically, no hardcode IDs +data "google_compute_network" "vpc" { + name = var.vpc_network_name +} + +data "google_compute_subnetwork" "default" { + name = "default" + region = var.region +} + +module "artifact_registry" { + source = "../../modules/artifact-registry" + + project_id = var.project_id + region = var.region + repository_name = "bookshelf-docker-repo" +} + +module "gke_cluster" { + source = "../../modules/gke-cluster" + + project_id = var.project_id + region = var.region + cluster_name = "bookshelf-prod-cluster" + network_name = data.google_compute_network.vpc.name + subnetwork_name = data.google_compute_subnetwork.default.name +} diff --git a/infra/envs/prod/outputs.tf b/infra/envs/prod/outputs.tf new file mode 100644 index 0000000..fe6d550 --- /dev/null +++ b/infra/envs/prod/outputs.tf @@ -0,0 +1,7 @@ +output "artifact_registry_url" { + value = module.artifact_registry.repository_url +} + +output "gke_cluster_endpoint" { + value = module.gke_cluster.cluster_endpoint +} diff --git a/infra/envs/prod/providers.tf b/infra/envs/prod/providers.tf new file mode 100644 index 0000000..a5ba3b6 --- /dev/null +++ b/infra/envs/prod/providers.tf @@ -0,0 +1,14 @@ +terraform { + required_version = "1.14.2" + required_providers { + google = { + source = "hashicorp/google" + version = "7.13.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} diff --git a/infra/envs/prod/values.tfvars b/infra/envs/prod/values.tfvars new file mode 100644 index 0000000..8597202 --- /dev/null +++ b/infra/envs/prod/values.tfvars @@ -0,0 +1,5 @@ +project_id = "gcp-capstone-481414" +region = "us-central1" + +# Network configuration +vpc_network_name = "default" diff --git a/infra/envs/prod/values.tfvars.example b/infra/envs/prod/values.tfvars.example new file mode 100644 index 0000000..da185f3 --- /dev/null +++ b/infra/envs/prod/values.tfvars.example @@ -0,0 +1,3 @@ +project_id = "gcp-passline" +region = "us-central1" +vpc_network_name = "default" diff --git a/infra/envs/prod/variables.tf b/infra/envs/prod/variables.tf new file mode 100644 index 0000000..d8cce15 --- /dev/null +++ b/infra/envs/prod/variables.tf @@ -0,0 +1,15 @@ +variable "project_id" { + description = "The GCP project ID" + type = string +} + +variable "region" { + description = "The GCP region" + type = string +} + +# Network variables +variable "vpc_network_name" { + description = "The name of the VPC network (usually 'default' for new projects)" + type = string +} diff --git a/infra/modules/artifact-registry/main.tf b/infra/modules/artifact-registry/main.tf new file mode 100644 index 0000000..6391b69 --- /dev/null +++ b/infra/modules/artifact-registry/main.tf @@ -0,0 +1,16 @@ +resource "google_artifact_registry_repository" "docker_repo" { + location = var.region + repository_id = var.repository_name + description = "Docker repository for BookShelf API" + format = "DOCKER" + + # Cleanup policy to keep costs down in Dev + cleanup_policies { + id = "delete-old-untagged" + action = "DELETE" + condition { + tag_state = "UNTAGGED" + older_than = "7d" + } + } +} \ No newline at end of file diff --git a/infra/modules/artifact-registry/outputs.tf b/infra/modules/artifact-registry/outputs.tf new file mode 100644 index 0000000..6e3ea16 --- /dev/null +++ b/infra/modules/artifact-registry/outputs.tf @@ -0,0 +1,4 @@ +output "repository_url" { + description = "Full URL of the docker repository" + value = google_artifact_registry_repository.docker_repo.registry_uri +} diff --git a/infra/modules/artifact-registry/variables.tf b/infra/modules/artifact-registry/variables.tf new file mode 100644 index 0000000..d469f0e --- /dev/null +++ b/infra/modules/artifact-registry/variables.tf @@ -0,0 +1,14 @@ +variable "project_id" { + description = "The GCP project ID" + type = string +} + +variable "region" { + description = "The GCP region" + type = string +} + +variable "repository_name" { + description = "The ID of the repository" + type = string +} diff --git a/infra/modules/gke-cluster/main.tf b/infra/modules/gke-cluster/main.tf new file mode 100644 index 0000000..cfc68ba --- /dev/null +++ b/infra/modules/gke-cluster/main.tf @@ -0,0 +1,32 @@ +resource "google_container_cluster" "primary" { + name = var.cluster_name + location = var.region + project = var.project_id + + enable_autopilot = true + + # Network configuration + network = var.network_name + subnetwork = var.subnetwork_name + + # Private Cluster Configuration + private_cluster_config { + enable_private_endpoint = false + enable_private_nodes = true + master_ipv4_cidr_block = "172.16.0.0/28" + } + + workload_identity_config { + workload_pool = "${var.project_id}.svc.id.goog" + } + + initial_node_count = 1 + + # Authentication + master_authorized_networks_config { + cidr_blocks { + cidr_block = "0.0.0.0/0" + display_name = "Public Access" + } + } +} diff --git a/infra/modules/gke-cluster/outputs.tf b/infra/modules/gke-cluster/outputs.tf new file mode 100644 index 0000000..2cd2356 --- /dev/null +++ b/infra/modules/gke-cluster/outputs.tf @@ -0,0 +1,20 @@ +output "cluster_endpoint" { + description = "The cluster endpoint" + value = google_container_cluster.primary.endpoint +} + +output "cluster_ca_certificate" { + description = "The cluster CA certificate" + value = google_container_cluster.primary.master_auth[0].cluster_ca_certificate + sensitive = true +} + +output "location" { + description = "The location (region) of the cluster" + value = google_container_cluster.primary.location +} + +output "kubernetes_cluster_name" { + description = "The name of the cluster" + value = google_container_cluster.primary.name +} diff --git a/infra/modules/gke-cluster/variables.tf b/infra/modules/gke-cluster/variables.tf new file mode 100644 index 0000000..f04c70f --- /dev/null +++ b/infra/modules/gke-cluster/variables.tf @@ -0,0 +1,23 @@ +variable "project_id" { + description = "The GCP project ID" + type = string +} + +variable "region" { + description = "The GCP region" + type = string +} +variable "cluster_name" { + description = "Name of the GKE cluster" + type = string +} + +variable "network_name" { + description = "The VPC network name" + type = string +} + +variable "subnetwork_name" { + description = "The subnetwork name" + type = string +} diff --git a/kubernetes/fastapi-app.yaml b/kubernetes/fastapi-app.yaml index 0c1bd1e..8f3fb23 100644 --- a/kubernetes/fastapi-app.yaml +++ b/kubernetes/fastapi-app.yaml @@ -29,7 +29,7 @@ spec: spec: containers: - name: web - image: 01234567890.dkr.ecr.us-east-1.amazonaws.com/fastapi-microservices:1.0 + image: us-central1-docker.pkg.dev/gcp-capstone-481414/bookshelf-docker-repo/fastapi-app:dev ports: - containerPort: 8000 envFrom: @@ -42,5 +42,3 @@ spec: limits: cpu: "500m" memory: "500Mi" - imagePullSecrets: - - name: regcred \ No newline at end of file diff --git a/kubernetes/postgres-db.yaml b/kubernetes/postgres-db.yaml index 18acdff..3ea6ef6 100644 --- a/kubernetes/postgres-db.yaml +++ b/kubernetes/postgres-db.yaml @@ -69,4 +69,4 @@ spec: - ReadWriteOnce resources: requests: - storage: 1Gi \ No newline at end of file + storage: 1Gi