|
| 1 | +import marimo |
| 2 | + |
| 3 | +__generated_with = "0.9.0" |
| 4 | +app = marimo.App(width="medium") |
| 5 | + |
| 6 | + |
| 7 | +@app.cell |
| 8 | +def __(): |
| 9 | + import marimo as mo |
| 10 | + |
| 11 | + mo.md( |
| 12 | + r""" |
| 13 | + # 🏗️ Data Pipeline Build Runnersss |
| 14 | + |
| 15 | + This notebook provides an interface to execute and monitor data pipeline builds |
| 16 | + for NYC DCP data products. |
| 17 | + """ |
| 18 | + ) |
| 19 | + return (mo,) |
| 20 | + |
| 21 | + |
| 22 | +@app.cell |
| 23 | +def __(mo): |
| 24 | + import subprocess |
| 25 | + import sys |
| 26 | + from pathlib import Path |
| 27 | + import json |
| 28 | + from datetime import datetime |
| 29 | + |
| 30 | + # Add dcpy to path |
| 31 | + repo_root = Path(__file__).parents[3] # Go up to data-engineering root |
| 32 | + products_dir = repo_root / "products" |
| 33 | + |
| 34 | + mo.md(f"📁 **Products Directory**: `{products_dir}`") |
| 35 | + return Path, datetime, json, products_dir, repo_root, subprocess, sys |
| 36 | + |
| 37 | + |
| 38 | +@app.cell |
| 39 | +def __(mo, products_dir): |
| 40 | + # Discover available products |
| 41 | + available_products = [] |
| 42 | + if products_dir.exists(): |
| 43 | + for product_dir in products_dir.iterdir(): |
| 44 | + if product_dir.is_dir() and (product_dir / "dbt_project.yml").exists(): |
| 45 | + available_products.append(product_dir.name) |
| 46 | + |
| 47 | + if available_products: |
| 48 | + product_selector = mo.ui.dropdown( |
| 49 | + options=sorted(available_products), label="Select Product to Build:" |
| 50 | + ) |
| 51 | + |
| 52 | + mo.md(f""" |
| 53 | + ## Available Products ({len(available_products)}) |
| 54 | + |
| 55 | + {product_selector} |
| 56 | + """) |
| 57 | + else: |
| 58 | + product_selector = None |
| 59 | + mo.md("❌ No dbt products found in products directory") |
| 60 | + return available_products, product_selector |
| 61 | + |
| 62 | + |
| 63 | +@app.cell |
| 64 | +def __(mo, product_selector): |
| 65 | + # Build configuration options |
| 66 | + if product_selector and product_selector.value: |
| 67 | + # Build mode selector |
| 68 | + build_mode = mo.ui.dropdown( |
| 69 | + options=[ |
| 70 | + ("Full Build", "run"), |
| 71 | + ("Test Only", "test"), |
| 72 | + ("Compile Only", "compile"), |
| 73 | + ("Parse Only", "parse"), |
| 74 | + ], |
| 75 | + value="run", |
| 76 | + label="Build Mode:", |
| 77 | + ) |
| 78 | + |
| 79 | + # Model selector |
| 80 | + model_selector = mo.ui.text( |
| 81 | + placeholder="e.g., +model_name, tag:staging, --exclude test_type:unit", |
| 82 | + label="Model Selection (optional):", |
| 83 | + full_width=True, |
| 84 | + ) |
| 85 | + |
| 86 | + # Environment selector |
| 87 | + profile_selector = mo.ui.dropdown( |
| 88 | + options=[ |
| 89 | + ("Local Development", "local"), |
| 90 | + ("Development", "development"), |
| 91 | + ("Staging", "staging"), |
| 92 | + ("Production", "production"), |
| 93 | + ], |
| 94 | + value="local", |
| 95 | + label="Target Environment:", |
| 96 | + ) |
| 97 | + |
| 98 | + mo.md(f""" |
| 99 | + ### Build Configuration for {product_selector.value} |
| 100 | + |
| 101 | + {build_mode} |
| 102 | + {model_selector} |
| 103 | + {profile_selector} |
| 104 | + """) |
| 105 | + else: |
| 106 | + build_mode = model_selector = profile_selector = None |
| 107 | + mo.md("") |
| 108 | + return build_mode, model_selector, profile_selector |
| 109 | + |
| 110 | + |
| 111 | +@app.cell |
| 112 | +def __(mo): |
| 113 | + # Build execution status |
| 114 | + build_status = { |
| 115 | + "running": False, |
| 116 | + "last_run": None, |
| 117 | + "output": "", |
| 118 | + "return_code": None, |
| 119 | + } |
| 120 | + |
| 121 | + mo.md("## Build Execution") |
| 122 | + return (build_status,) |
| 123 | + |
| 124 | + |
| 125 | +@app.cell |
| 126 | +def __( |
| 127 | + build_mode, |
| 128 | + build_status, |
| 129 | + datetime, |
| 130 | + mo, |
| 131 | + model_selector, |
| 132 | + product_selector, |
| 133 | + products_dir, |
| 134 | + profile_selector, |
| 135 | + subprocess, |
| 136 | +): |
| 137 | + # Build execution function |
| 138 | + def run_dbt_command(): |
| 139 | + """Execute the dbt command based on selected options.""" |
| 140 | + if not product_selector or not product_selector.value: |
| 141 | + return "❌ No product selected" |
| 142 | + |
| 143 | + product_path = products_dir / product_selector.value |
| 144 | + if not product_path.exists(): |
| 145 | + return f"❌ Product directory not found: {product_path}" |
| 146 | + |
| 147 | + # Build the dbt command |
| 148 | + cmd = ["dbt", build_mode.value if build_mode else "run"] |
| 149 | + |
| 150 | + # Add model selection if specified |
| 151 | + if model_selector and model_selector.value.strip(): |
| 152 | + cmd.extend(["--select", model_selector.value.strip()]) |
| 153 | + |
| 154 | + # Add profile directory (assumes profiles.yml in product dir) |
| 155 | + cmd.extend(["--profiles-dir", str(product_path)]) |
| 156 | + |
| 157 | + # Add target environment |
| 158 | + if profile_selector and profile_selector.value != "local": |
| 159 | + cmd.extend(["--target", profile_selector.value]) |
| 160 | + |
| 161 | + build_status["running"] = True |
| 162 | + build_status["last_run"] = datetime.now() |
| 163 | + |
| 164 | + try: |
| 165 | + # Execute the command |
| 166 | + result = subprocess.run( |
| 167 | + cmd, |
| 168 | + cwd=product_path, |
| 169 | + capture_output=True, |
| 170 | + text=True, |
| 171 | + timeout=300, # 5 minute timeout |
| 172 | + ) |
| 173 | + |
| 174 | + build_status["output"] = ( |
| 175 | + f"STDOUT:\n{result.stdout}\n\nSTDERR:\n{result.stderr}" |
| 176 | + ) |
| 177 | + build_status["return_code"] = result.returncode |
| 178 | + |
| 179 | + if result.returncode == 0: |
| 180 | + status_msg = f"✅ Build completed successfully!" |
| 181 | + else: |
| 182 | + status_msg = f"❌ Build failed with code {result.returncode}" |
| 183 | + |
| 184 | + except subprocess.TimeoutExpired: |
| 185 | + build_status["output"] = "❌ Build timed out after 5 minutes" |
| 186 | + build_status["return_code"] = -1 |
| 187 | + status_msg = "⏰ Build timed out" |
| 188 | + |
| 189 | + except Exception as e: |
| 190 | + build_status["output"] = f"❌ Error executing build: {str(e)}" |
| 191 | + build_status["return_code"] = -1 |
| 192 | + status_msg = f"💥 Build error: {str(e)}" |
| 193 | + |
| 194 | + finally: |
| 195 | + build_status["running"] = False |
| 196 | + |
| 197 | + return status_msg |
| 198 | + |
| 199 | + # Build button and status display |
| 200 | + if product_selector and product_selector.value and build_mode: |
| 201 | + # Show current configuration |
| 202 | + config_info = f""" |
| 203 | + **Product**: {product_selector.value} |
| 204 | + **Mode**: {build_mode.value} |
| 205 | + **Target**: {profile_selector.value if profile_selector else "local"} |
| 206 | + """ |
| 207 | + |
| 208 | + if model_selector and model_selector.value.strip(): |
| 209 | + config_info += f" \n**Selection**: `{model_selector.value.strip()}`" |
| 210 | + |
| 211 | + # Run button |
| 212 | + run_button = mo.ui.run_button(label="🚀 Execute Build") |
| 213 | + |
| 214 | + if run_button.value: |
| 215 | + if not build_status["running"]: |
| 216 | + result_msg = run_dbt_command() |
| 217 | + mo.md(f""" |
| 218 | + ### Build Configuration |
| 219 | + {config_info} |
| 220 | + |
| 221 | + {run_button} |
| 222 | + |
| 223 | + ### Build Result |
| 224 | + {result_msg} |
| 225 | + |
| 226 | + **Started**: {build_status["last_run"].strftime("%Y-%m-%d %H:%M:%S") if build_status["last_run"] else "Never"} |
| 227 | + **Status**: {"🔄 Running" if build_status["running"] else "✅ Complete" if build_status["return_code"] == 0 else "❌ Failed"} |
| 228 | + """) |
| 229 | + else: |
| 230 | + mo.md(f""" |
| 231 | + ### Build Configuration |
| 232 | + {config_info} |
| 233 | + |
| 234 | + {run_button} |
| 235 | + |
| 236 | + ### Build Status |
| 237 | + 🔄 **Build is currently running...** |
| 238 | + |
| 239 | + Started: {build_status["last_run"].strftime("%Y-%m-%d %H:%M:%S")} |
| 240 | + """) |
| 241 | + else: |
| 242 | + mo.md(f""" |
| 243 | + ### Build Configuration |
| 244 | + {config_info} |
| 245 | + |
| 246 | + {run_button} |
| 247 | + """) |
| 248 | + else: |
| 249 | + run_button = None |
| 250 | + mo.md("Complete the configuration above to execute a build.") |
| 251 | + return config_info, result_msg, run_button, run_dbt_command |
| 252 | + |
| 253 | + |
| 254 | +@app.cell |
| 255 | +def __(build_status, mo): |
| 256 | + # Build output display |
| 257 | + if build_status["output"]: |
| 258 | + mo.md(f""" |
| 259 | + ### Build Output |
| 260 | + |
| 261 | + **Return Code**: {build_status["return_code"]} |
| 262 | + |
| 263 | + ``` |
| 264 | + {build_status["output"][:2000]}{"..." if len(build_status["output"]) > 2000 else ""} |
| 265 | + ``` |
| 266 | + """) |
| 267 | + else: |
| 268 | + mo.md("") |
| 269 | + return |
| 270 | + |
| 271 | + |
| 272 | +@app.cell |
| 273 | +def __(mo, products_dir): |
| 274 | + # Quick product info |
| 275 | + def get_product_info(product_name): |
| 276 | + """Get basic info about a product.""" |
| 277 | + product_path = products_dir / product_name |
| 278 | + |
| 279 | + info = { |
| 280 | + "path": str(product_path), |
| 281 | + "has_dbt_project": (product_path / "dbt_project.yml").exists(), |
| 282 | + "has_profiles": (product_path / "profiles.yml").exists(), |
| 283 | + "has_readme": (product_path / "README.md").exists(), |
| 284 | + } |
| 285 | + |
| 286 | + # Try to read dbt_project.yml for more info |
| 287 | + try: |
| 288 | + import yaml |
| 289 | + |
| 290 | + with open(product_path / "dbt_project.yml") as f: |
| 291 | + dbt_config = yaml.safe_load(f) |
| 292 | + info["dbt_version"] = dbt_config.get("require-dbt-version", "unknown") |
| 293 | + info["models"] = list(dbt_config.get("models", {}).keys()) |
| 294 | + except: |
| 295 | + info["dbt_version"] = "unknown" |
| 296 | + info["models"] = [] |
| 297 | + |
| 298 | + return info |
| 299 | + |
| 300 | + if product_selector and product_selector.value: |
| 301 | + product_info = get_product_info(product_selector.value) |
| 302 | + |
| 303 | + mo.md(f""" |
| 304 | + --- |
| 305 | + |
| 306 | + ### Product Information: {product_selector.value} |
| 307 | + |
| 308 | + - **Path**: `{product_info["path"]}` |
| 309 | + - **dbt Project**: {"✅" if product_info["has_dbt_project"] else "❌"} |
| 310 | + - **Local Profiles**: {"✅" if product_info["has_profiles"] else "❌"} |
| 311 | + - **README**: {"✅" if product_info["has_readme"] else "❌"} |
| 312 | + - **dbt Version**: {product_info["dbt_version"]} |
| 313 | + """) |
| 314 | + else: |
| 315 | + mo.md("") |
| 316 | + return get_product_info, product_info |
| 317 | + |
| 318 | + |
| 319 | +@app.cell |
| 320 | +def __(mo): |
| 321 | + mo.md( |
| 322 | + r""" |
| 323 | + --- |
| 324 | + |
| 325 | + ## 💡 Usage Guide |
| 326 | + |
| 327 | + ### Build Modes |
| 328 | + - **Full Build**: Runs `dbt run` - executes models and tests |
| 329 | + - **Test Only**: Runs `dbt test` - executes data quality tests |
| 330 | + - **Compile Only**: Runs `dbt compile` - validates SQL without execution |
| 331 | + - **Parse Only**: Runs `dbt parse` - validates project structure |
| 332 | + |
| 333 | + ### Model Selection |
| 334 | + Use dbt selection syntax: |
| 335 | + - `+model_name` - model and all upstream dependencies |
| 336 | + - `model_name+` - model and all downstream dependents |
| 337 | + - `tag:staging` - all models with "staging" tag |
| 338 | + - `--exclude test_type:unit` - exclude unit tests |
| 339 | + |
| 340 | + ### Environment Targets |
| 341 | + - **Local**: Uses local development settings |
| 342 | + - **Development**: Remote development environment |
| 343 | + - **Staging**: Pre-production testing environment |
| 344 | + - **Production**: Live production environment |
| 345 | + |
| 346 | + ### Prerequisites |
| 347 | + - dbt CLI must be installed and configured |
| 348 | + - Product must have `dbt_project.yml` |
| 349 | + - Database connections must be properly configured |
| 350 | + - Appropriate permissions for selected target environment |
| 351 | + |
| 352 | + ### Troubleshooting |
| 353 | + - Check build output for detailed error messages |
| 354 | + - Verify database connectivity and permissions |
| 355 | + - Ensure all required environment variables are set |
| 356 | + - Check that profiles.yml exists and is properly configured |
| 357 | + """ |
| 358 | + ) |
| 359 | + return |
| 360 | + |
| 361 | + |
| 362 | +if __name__ == "__main__": |
| 363 | + app.run() |
0 commit comments