Skip to content

Commit c026001

Browse files
authored
Add support for opentelemetry metrics (#1245)
* feat(telemetry): initialize OpenTelemetry metrics provider * feat(metrics): implement and integrate HTTP request duration metrics * feat(metrics): instrument database query durations * refactor(database): add OTel name mapping helper * feat(metrics): instrument database connection pool * feat(telemetry): update OTel collector to receive OTLP metrics * feat(telemetry): add metrics panels to Grafana dashboard * refactor(metrics): clean up database instrumentation with helper functions * fix(metrics): ensure correct db.system.name for ODBC connections by reusing discovery logic * fix(metrics): remove global OnceLock and pass database type explicitly to pool callbacks * refactor(metrics): use OpenTelemetry semantic convention constants * refactor(metrics): eliminate all OTel convention string literals in favor of constants * fix(telemetry): use standard YAML list notation in OTel collector config to fix parsing error * fix(telemetry): fix clippy warnings and use latest OTel attribute names * fix(telemetry): add Default impl for TelemetryMetrics * Add app-scoped telemetry metrics for HTTP * Refactor db query metrics recording * Use semantic convention attribute names * Track db pool metrics during lifecycle * Adjust telemetry docker compose * fix(metrics): configure explicit histogram boundaries and fix database latency measurement * Enable metrics views for explicit histograms * Use observable gauge for pool connection count * Read pool gauge directly from sqlx pool * telemetry: export OTEL metrics every second in example stack * Improve Grafana trace list links and readability * Restore removed DB connection debug logs
1 parent 27765b9 commit c026001

File tree

16 files changed

+720
-202
lines changed

16 files changed

+720
-202
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,9 @@ tracing-opentelemetry = "0.32"
8888
tracing-actix-web = { version = "0.7", default-features = false, features = ["opentelemetry_0_31"] }
8989
tracing-log = "0.2"
9090
opentelemetry = "0.31"
91-
opentelemetry_sdk = { version = "0.31", features = ["rt-tokio-current-thread"] }
92-
opentelemetry-otlp = { version = "0.31", features = ["http-proto", "grpc-tonic"] }
91+
opentelemetry_sdk = { version = "0.31", features = ["metrics", "rt-tokio-current-thread", "spec_unstable_metrics_views"] }
92+
opentelemetry-otlp = { version = "0.31", features = ["http-proto", "grpc-tonic", "metrics"] }
93+
opentelemetry-semantic-conventions = { version = "0.31", features = ["semconv_experimental"] }
9394

9495

9596
[features]

examples/telemetry/docker-compose.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ services:
2323
environment:
2424
- DATABASE_URL=postgres://sqlpage:sqlpage@postgres:5432/sqlpage
2525
- OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
26+
- OTEL_METRIC_EXPORT_INTERVAL=1000
2627
- OTEL_SERVICE_NAME=sqlpage
2728
volumes:
2829
- ./website:/var/www
@@ -113,8 +114,10 @@ services:
113114
- "1514:1514/udp"
114115
- "1516:1516/udp"
115116
depends_on:
116-
- tempo
117-
- postgres
117+
tempo:
118+
condition: service_started
119+
postgres:
120+
condition: service_started
118121
loki:
119122
condition: service_healthy
120123

examples/telemetry/grafana/sqlpage-home.json

Lines changed: 153 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
"showLineNumbers": false,
4040
"showMiniMap": false
4141
},
42-
"content": "<div style=\"padding: 4px 2px 0; font-size: 15px; line-height: 1.55;\"><h1 style=\"margin: 0 0 10px; font-size: 28px;\">Recent SQLPage traces, logs, and PostgreSQL metrics</h1><p style=\"margin: 0 0 8px;\">Open <a href=\"http://localhost\" target=\"_blank\" rel=\"noopener noreferrer\">http://localhost</a> and interact with the app. New requests will appear here automatically.</p><p style=\"margin: 0; color: #666;\">The trace table shows recent requests. Click any trace ID to open the full span waterfall in Grafana. PostgreSQL slow-query explain plans appear in the PostgreSQL Logs panel and link back to the same trace via the extracted trace ID. The metrics panels come from the OpenTelemetry PostgreSQL receiver via Prometheus.</p></div>",
42+
"content": "<div style=\"padding: 4px 2px 0; font-size: 15px; line-height: 1.55;\"><h1 style=\"margin: 0 0 10px; font-size: 28px;\">SQLPage Observability</h1><p style=\"margin: 0 0 8px;\">Open <a href=\"http://localhost\" target=\"_blank\" rel=\"noopener noreferrer\">http://localhost</a> and interact with the app. New requests will appear here automatically.</p><p style=\"margin: 0; color: #666;\">This dashboard shows traces, logs, and application metrics exported by SQLPage. Trace waterfalls link to PostgreSQL logs via trace IDs. Metrics include HTTP durations, DB query latencies, and connection pool states.</p></div>",
4343
"mode": "html"
4444
},
4545
"pluginVersion": "12.4.0",
@@ -54,7 +54,39 @@
5454
"fieldConfig": {
5555
"defaults": {
5656
"color": {
57-
"mode": "thresholds"
57+
"mode": "palette-classic"
58+
},
59+
"custom": {
60+
"axisBorderShow": false,
61+
"axisCenteredZero": false,
62+
"axisColorMode": "text",
63+
"axisLabel": "",
64+
"axisPlacement": "auto",
65+
"barAlignment": 0,
66+
"drawStyle": "line",
67+
"fillOpacity": 10,
68+
"gradientMode": "none",
69+
"hideFrom": {
70+
"legend": false,
71+
"tooltip": false,
72+
"viz": false
73+
},
74+
"insertNulls": false,
75+
"lineInterpolation": "linear",
76+
"lineWidth": 2,
77+
"pointSize": 4,
78+
"scaleDistribution": {
79+
"type": "linear"
80+
},
81+
"showPoints": "never",
82+
"spanNulls": false,
83+
"stacking": {
84+
"group": "A",
85+
"mode": "none"
86+
},
87+
"thresholdsStyle": {
88+
"mode": "off"
89+
}
5890
},
5991
"mappings": [],
6092
"thresholds": {
@@ -63,38 +95,31 @@
6395
{
6496
"color": "green",
6597
"value": null
66-
},
67-
{
68-
"color": "orange",
69-
"value": 10
7098
}
7199
]
72100
},
73-
"unit": "none"
101+
"unit": "s"
74102
},
75103
"overrides": []
76104
},
77105
"gridPos": {
78-
"h": 4,
106+
"h": 8,
79107
"w": 12,
80108
"x": 0,
81109
"y": 4
82110
},
83-
"id": 4,
111+
"id": 10,
84112
"options": {
85-
"colorMode": "value",
86-
"graphMode": "none",
87-
"justifyMode": "auto",
88-
"orientation": "auto",
89-
"percentChangeColorMode": "standard",
90-
"reduceOptions": {
91-
"calcs": ["lastNotNull"],
92-
"fields": "",
93-
"values": false
113+
"legend": {
114+
"calcs": [],
115+
"displayMode": "list",
116+
"placement": "bottom",
117+
"showLegend": true
94118
},
95-
"showPercentChange": false,
96-
"textMode": "auto",
97-
"wideLayout": true
119+
"tooltip": {
120+
"mode": "single",
121+
"sort": "none"
122+
}
98123
},
99124
"pluginVersion": "12.4.0",
100125
"targets": [
@@ -103,12 +128,22 @@
103128
"type": "prometheus",
104129
"uid": "prometheus"
105130
},
106-
"expr": "sum(postgresql_backends)",
131+
"expr": "histogram_quantile(0.95, sum(rate(http_server_request_duration_seconds_bucket[5m])) by (le, http_route))",
132+
"legendFormat": "HTTP P95 {{http_route}}",
107133
"refId": "A"
134+
},
135+
{
136+
"datasource": {
137+
"type": "prometheus",
138+
"uid": "prometheus"
139+
},
140+
"expr": "histogram_quantile(0.95, sum(rate(db_client_operation_duration_seconds_bucket[5m])) by (le, db_operation_name))",
141+
"legendFormat": "DB P95 {{db_operation_name}}",
142+
"refId": "B"
108143
}
109144
],
110-
"title": "PostgreSQL Backends",
111-
"type": "stat"
145+
"title": "Request & Query Latency (P95)",
146+
"type": "timeseries"
112147
},
113148
{
114149
"datasource": {
@@ -162,17 +197,17 @@
162197
}
163198
]
164199
},
165-
"unit": "bytes"
200+
"unit": "none"
166201
},
167202
"overrides": []
168203
},
169204
"gridPos": {
170-
"h": 4,
205+
"h": 8,
171206
"w": 12,
172207
"x": 12,
173208
"y": 4
174209
},
175-
"id": 5,
210+
"id": 11,
176211
"options": {
177212
"legend": {
178213
"calcs": [],
@@ -192,12 +227,12 @@
192227
"type": "prometheus",
193228
"uid": "prometheus"
194229
},
195-
"expr": "sum(postgresql_db_size) by (postgresql_database_name)",
196-
"legendFormat": "{{postgresql_database_name}}",
230+
"expr": "sum(db_client_connection_count) by (db_client_connection_state)",
231+
"legendFormat": "{{db_client_connection_state}}",
197232
"refId": "A"
198233
}
199234
],
200-
"title": "PostgreSQL Database Size",
235+
"title": "SQLPage DB Connection Pool",
201236
"type": "timeseries"
202237
},
203238
{
@@ -223,8 +258,8 @@
223258
},
224259
"properties": [
225260
{
226-
"id": "custom.width",
227-
"value": 300
261+
"id": "custom.hidden",
262+
"value": true
228263
}
229264
]
230265
},
@@ -248,7 +283,35 @@
248283
"properties": [
249284
{
250285
"id": "custom.width",
251-
"value": 140
286+
"value": 120
287+
}
288+
]
289+
},
290+
{
291+
"matcher": {
292+
"id": "byName",
293+
"options": "traceName"
294+
},
295+
"properties": [
296+
{
297+
"id": "custom.width",
298+
"value": 520
299+
},
300+
{
301+
"id": "custom.cellOptions",
302+
"value": {
303+
"type": "data-links"
304+
}
305+
},
306+
{
307+
"id": "links",
308+
"value": [
309+
{
310+
"targetBlank": false,
311+
"title": "${__value.text}",
312+
"url": "/a/grafana-exploretraces-app/explore?traceId=${__data.fields.traceID}"
313+
}
314+
]
252315
}
253316
]
254317
},
@@ -279,10 +342,10 @@
279342
]
280343
},
281344
"gridPos": {
282-
"h": 12,
345+
"h": 8,
283346
"w": 24,
284347
"x": 0,
285-
"y": 8
348+
"y": 12
286349
},
287350
"id": 2,
288351
"options": {
@@ -335,7 +398,7 @@
335398
"renameByName": {
336399
"startTime": "Start time",
337400
"traceDuration": "Duration",
338-
"traceID": "Trace ID",
401+
"traceID": "Trace",
339402
"traceName": "Route",
340403
"traceService": "Service"
341404
}
@@ -344,6 +407,36 @@
344407
],
345408
"type": "table"
346409
},
410+
{
411+
"datasource": {
412+
"type": "tempo",
413+
"uid": "tempo"
414+
},
415+
"gridPos": {
416+
"h": 10,
417+
"w": 24,
418+
"x": 0,
419+
"y": 30
420+
},
421+
"id": 12,
422+
"pluginVersion": "12.4.0",
423+
"targets": [
424+
{
425+
"datasource": {
426+
"type": "tempo",
427+
"uid": "tempo"
428+
},
429+
"limit": 20,
430+
"query": "$traceId",
431+
"queryType": "traceql",
432+
"refId": "A",
433+
"tableType": "traces"
434+
}
435+
],
436+
"timeFrom": "1h",
437+
"title": "Selected Trace",
438+
"type": "traces"
439+
},
347440
{
348441
"datasource": {
349442
"type": "loki",
@@ -428,9 +521,30 @@
428521
"refresh": "5s",
429522
"schemaVersion": 41,
430523
"style": "dark",
431-
"tags": ["sqlpage", "tracing", "logs"],
524+
"tags": ["sqlpage", "tracing", "logs", "metrics"],
432525
"templating": {
433-
"list": []
526+
"list": [
527+
{
528+
"current": {
529+
"selected": true,
530+
"text": "",
531+
"value": ""
532+
},
533+
"hide": 2,
534+
"label": "Trace ID",
535+
"name": "traceId",
536+
"options": [
537+
{
538+
"selected": true,
539+
"text": "",
540+
"value": ""
541+
}
542+
],
543+
"query": "",
544+
"skipUrlSync": false,
545+
"type": "textbox"
546+
}
547+
]
434548
},
435549
"time": {
436550
"from": "now-1h",
@@ -440,5 +554,5 @@
440554
"timezone": "browser",
441555
"title": "SQLPage Observability Home",
442556
"uid": "sqlpage-tracing-home",
443-
"version": 5
557+
"version": 6
444558
}

0 commit comments

Comments
 (0)