Skip to content

Commit b5e6f7e

Browse files
committed
Proto-marimo notebook
1 parent dacb0ed commit b5e6f7e

File tree

9 files changed

+1694
-0
lines changed

9 files changed

+1694
-0
lines changed
Lines changed: 363 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,363 @@
1+
import marimo
2+
3+
__generated_with = "0.9.0"
4+
app = marimo.App(width="medium")
5+
6+
7+
@app.cell
8+
def __():
9+
import marimo as mo
10+
11+
mo.md(
12+
r"""
13+
# 🏗️ Data Pipeline Build Runnersss
14+
15+
This notebook provides an interface to execute and monitor data pipeline builds
16+
for NYC DCP data products.
17+
"""
18+
)
19+
return (mo,)
20+
21+
22+
@app.cell
23+
def __(mo):
24+
import subprocess
25+
import sys
26+
from pathlib import Path
27+
import json
28+
from datetime import datetime
29+
30+
# Add dcpy to path
31+
repo_root = Path(__file__).parents[3] # Go up to data-engineering root
32+
products_dir = repo_root / "products"
33+
34+
mo.md(f"📁 **Products Directory**: `{products_dir}`")
35+
return Path, datetime, json, products_dir, repo_root, subprocess, sys
36+
37+
38+
@app.cell
39+
def __(mo, products_dir):
40+
# Discover available products
41+
available_products = []
42+
if products_dir.exists():
43+
for product_dir in products_dir.iterdir():
44+
if product_dir.is_dir() and (product_dir / "dbt_project.yml").exists():
45+
available_products.append(product_dir.name)
46+
47+
if available_products:
48+
product_selector = mo.ui.dropdown(
49+
options=sorted(available_products), label="Select Product to Build:"
50+
)
51+
52+
mo.md(f"""
53+
## Available Products ({len(available_products)})
54+
55+
{product_selector}
56+
""")
57+
else:
58+
product_selector = None
59+
mo.md("❌ No dbt products found in products directory")
60+
return available_products, product_selector
61+
62+
63+
@app.cell
64+
def __(mo, product_selector):
65+
# Build configuration options
66+
if product_selector and product_selector.value:
67+
# Build mode selector
68+
build_mode = mo.ui.dropdown(
69+
options=[
70+
("Full Build", "run"),
71+
("Test Only", "test"),
72+
("Compile Only", "compile"),
73+
("Parse Only", "parse"),
74+
],
75+
value="run",
76+
label="Build Mode:",
77+
)
78+
79+
# Model selector
80+
model_selector = mo.ui.text(
81+
placeholder="e.g., +model_name, tag:staging, --exclude test_type:unit",
82+
label="Model Selection (optional):",
83+
full_width=True,
84+
)
85+
86+
# Environment selector
87+
profile_selector = mo.ui.dropdown(
88+
options=[
89+
("Local Development", "local"),
90+
("Development", "development"),
91+
("Staging", "staging"),
92+
("Production", "production"),
93+
],
94+
value="local",
95+
label="Target Environment:",
96+
)
97+
98+
mo.md(f"""
99+
### Build Configuration for {product_selector.value}
100+
101+
{build_mode}
102+
{model_selector}
103+
{profile_selector}
104+
""")
105+
else:
106+
build_mode = model_selector = profile_selector = None
107+
mo.md("")
108+
return build_mode, model_selector, profile_selector
109+
110+
111+
@app.cell
112+
def __(mo):
113+
# Build execution status
114+
build_status = {
115+
"running": False,
116+
"last_run": None,
117+
"output": "",
118+
"return_code": None,
119+
}
120+
121+
mo.md("## Build Execution")
122+
return (build_status,)
123+
124+
125+
@app.cell
126+
def __(
127+
build_mode,
128+
build_status,
129+
datetime,
130+
mo,
131+
model_selector,
132+
product_selector,
133+
products_dir,
134+
profile_selector,
135+
subprocess,
136+
):
137+
# Build execution function
138+
def run_dbt_command():
139+
"""Execute the dbt command based on selected options."""
140+
if not product_selector or not product_selector.value:
141+
return "❌ No product selected"
142+
143+
product_path = products_dir / product_selector.value
144+
if not product_path.exists():
145+
return f"❌ Product directory not found: {product_path}"
146+
147+
# Build the dbt command
148+
cmd = ["dbt", build_mode.value if build_mode else "run"]
149+
150+
# Add model selection if specified
151+
if model_selector and model_selector.value.strip():
152+
cmd.extend(["--select", model_selector.value.strip()])
153+
154+
# Add profile directory (assumes profiles.yml in product dir)
155+
cmd.extend(["--profiles-dir", str(product_path)])
156+
157+
# Add target environment
158+
if profile_selector and profile_selector.value != "local":
159+
cmd.extend(["--target", profile_selector.value])
160+
161+
build_status["running"] = True
162+
build_status["last_run"] = datetime.now()
163+
164+
try:
165+
# Execute the command
166+
result = subprocess.run(
167+
cmd,
168+
cwd=product_path,
169+
capture_output=True,
170+
text=True,
171+
timeout=300, # 5 minute timeout
172+
)
173+
174+
build_status["output"] = (
175+
f"STDOUT:\n{result.stdout}\n\nSTDERR:\n{result.stderr}"
176+
)
177+
build_status["return_code"] = result.returncode
178+
179+
if result.returncode == 0:
180+
status_msg = f"✅ Build completed successfully!"
181+
else:
182+
status_msg = f"❌ Build failed with code {result.returncode}"
183+
184+
except subprocess.TimeoutExpired:
185+
build_status["output"] = "❌ Build timed out after 5 minutes"
186+
build_status["return_code"] = -1
187+
status_msg = "⏰ Build timed out"
188+
189+
except Exception as e:
190+
build_status["output"] = f"❌ Error executing build: {str(e)}"
191+
build_status["return_code"] = -1
192+
status_msg = f"💥 Build error: {str(e)}"
193+
194+
finally:
195+
build_status["running"] = False
196+
197+
return status_msg
198+
199+
# Build button and status display
200+
if product_selector and product_selector.value and build_mode:
201+
# Show current configuration
202+
config_info = f"""
203+
**Product**: {product_selector.value}
204+
**Mode**: {build_mode.value}
205+
**Target**: {profile_selector.value if profile_selector else "local"}
206+
"""
207+
208+
if model_selector and model_selector.value.strip():
209+
config_info += f" \n**Selection**: `{model_selector.value.strip()}`"
210+
211+
# Run button
212+
run_button = mo.ui.run_button(label="🚀 Execute Build")
213+
214+
if run_button.value:
215+
if not build_status["running"]:
216+
result_msg = run_dbt_command()
217+
mo.md(f"""
218+
### Build Configuration
219+
{config_info}
220+
221+
{run_button}
222+
223+
### Build Result
224+
{result_msg}
225+
226+
**Started**: {build_status["last_run"].strftime("%Y-%m-%d %H:%M:%S") if build_status["last_run"] else "Never"}
227+
**Status**: {"🔄 Running" if build_status["running"] else "✅ Complete" if build_status["return_code"] == 0 else "❌ Failed"}
228+
""")
229+
else:
230+
mo.md(f"""
231+
### Build Configuration
232+
{config_info}
233+
234+
{run_button}
235+
236+
### Build Status
237+
🔄 **Build is currently running...**
238+
239+
Started: {build_status["last_run"].strftime("%Y-%m-%d %H:%M:%S")}
240+
""")
241+
else:
242+
mo.md(f"""
243+
### Build Configuration
244+
{config_info}
245+
246+
{run_button}
247+
""")
248+
else:
249+
run_button = None
250+
mo.md("Complete the configuration above to execute a build.")
251+
return config_info, result_msg, run_button, run_dbt_command
252+
253+
254+
@app.cell
255+
def __(build_status, mo):
256+
# Build output display
257+
if build_status["output"]:
258+
mo.md(f"""
259+
### Build Output
260+
261+
**Return Code**: {build_status["return_code"]}
262+
263+
```
264+
{build_status["output"][:2000]}{"..." if len(build_status["output"]) > 2000 else ""}
265+
```
266+
""")
267+
else:
268+
mo.md("")
269+
return
270+
271+
272+
@app.cell
273+
def __(mo, products_dir):
274+
# Quick product info
275+
def get_product_info(product_name):
276+
"""Get basic info about a product."""
277+
product_path = products_dir / product_name
278+
279+
info = {
280+
"path": str(product_path),
281+
"has_dbt_project": (product_path / "dbt_project.yml").exists(),
282+
"has_profiles": (product_path / "profiles.yml").exists(),
283+
"has_readme": (product_path / "README.md").exists(),
284+
}
285+
286+
# Try to read dbt_project.yml for more info
287+
try:
288+
import yaml
289+
290+
with open(product_path / "dbt_project.yml") as f:
291+
dbt_config = yaml.safe_load(f)
292+
info["dbt_version"] = dbt_config.get("require-dbt-version", "unknown")
293+
info["models"] = list(dbt_config.get("models", {}).keys())
294+
except:
295+
info["dbt_version"] = "unknown"
296+
info["models"] = []
297+
298+
return info
299+
300+
if product_selector and product_selector.value:
301+
product_info = get_product_info(product_selector.value)
302+
303+
mo.md(f"""
304+
---
305+
306+
### Product Information: {product_selector.value}
307+
308+
- **Path**: `{product_info["path"]}`
309+
- **dbt Project**: {"✅" if product_info["has_dbt_project"] else "❌"}
310+
- **Local Profiles**: {"✅" if product_info["has_profiles"] else "❌"}
311+
- **README**: {"✅" if product_info["has_readme"] else "❌"}
312+
- **dbt Version**: {product_info["dbt_version"]}
313+
""")
314+
else:
315+
mo.md("")
316+
return get_product_info, product_info
317+
318+
319+
@app.cell
320+
def __(mo):
321+
mo.md(
322+
r"""
323+
---
324+
325+
## 💡 Usage Guide
326+
327+
### Build Modes
328+
- **Full Build**: Runs `dbt run` - executes models and tests
329+
- **Test Only**: Runs `dbt test` - executes data quality tests
330+
- **Compile Only**: Runs `dbt compile` - validates SQL without execution
331+
- **Parse Only**: Runs `dbt parse` - validates project structure
332+
333+
### Model Selection
334+
Use dbt selection syntax:
335+
- `+model_name` - model and all upstream dependencies
336+
- `model_name+` - model and all downstream dependents
337+
- `tag:staging` - all models with "staging" tag
338+
- `--exclude test_type:unit` - exclude unit tests
339+
340+
### Environment Targets
341+
- **Local**: Uses local development settings
342+
- **Development**: Remote development environment
343+
- **Staging**: Pre-production testing environment
344+
- **Production**: Live production environment
345+
346+
### Prerequisites
347+
- dbt CLI must be installed and configured
348+
- Product must have `dbt_project.yml`
349+
- Database connections must be properly configured
350+
- Appropriate permissions for selected target environment
351+
352+
### Troubleshooting
353+
- Check build output for detailed error messages
354+
- Verify database connectivity and permissions
355+
- Ensure all required environment variables are set
356+
- Check that profiles.yml exists and is properly configured
357+
"""
358+
)
359+
return
360+
361+
362+
if __name__ == "__main__":
363+
app.run()

0 commit comments

Comments
 (0)