From f0e00dd40def32a9c388a9b3dab1f325ef2df6ad Mon Sep 17 00:00:00 2001
From: tbuchaillot <tombuchaillot89@gmail.com>
Date: Thu, 13 Nov 2025 18:20:03 +0100
Subject: [PATCH] POC: Prometheus metrics

---
 IMPLEMENTATION_PLAN.md                     | 785 +++++++++++++++++++++
 PROMETHEUS_METRICS.md                      | 466 ++++++++++++
 config/config.go                           |  30 +
 gateway/handler_success.go                 |  14 +
 gateway/instrumentation_handlers.go        |  68 ++
 gateway/instrumentation_prometheus.go      | 462 ++++++++++++
 gateway/instrumentation_prometheus_sink.go |  56 ++
 gateway/server.go                          |  27 +
 generate-traffic.sh                        | 257 +++++++
 internal/redis/redis.go                    |   2 +
 storage/connection_handler.go              |  32 +
 storage/redis_cluster.go                   |  12 +
 tyk.conf.example                           |   9 +
 13 files changed, 2220 insertions(+)
 create mode 100644 IMPLEMENTATION_PLAN.md
 create mode 100644 PROMETHEUS_METRICS.md
 create mode 100644 gateway/instrumentation_prometheus.go
 create mode 100644 gateway/instrumentation_prometheus_sink.go
 create mode 100755 generate-traffic.sh

diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md
new file mode 100644
index 00000000000..4d7083674b8
--- /dev/null
+++ b/IMPLEMENTATION_PLAN.md
@@ -0,0 +1,785 @@
+# Prometheus Metrics Integration Implementation Plan
+
+## Overview
+This document outlines the implementation plan for adding Prometheus metrics support to Tyk Gateway, following the existing patterns from StatsD and NewRelic integrations.
+
+## Requirements (from jira.md)
+
+### Metrics Categories
+1. **System Metrics**: CPU, memory, goroutines, connections
+2. **Gateway Metrics**: # APIs loaded, # Policies loaded, Requests RED metrics (Rate, Error, Duration)
+3. **Redis Metrics**: pool size, active connections, latency
+4. **Request Processing Metrics**: queue depth, throughput, latency, middleware execution and latency
+
+## Architecture Analysis
+
+### Existing Instrumentation Patterns
+
+#### 1. StatsD Integration (`gateway/instrumentation_statsd_sink.go`)
+- **Location**: `gateway/instrumentation_statsd_sink.go`
+- **Sink Pattern**: Implements `health.Sink` interface
+- **UDP Transport**: Uses UDP for sending metrics
+- **Metrics Types**: Events, Timings, Gauges, Complete
+- **Prefix Support**: Configurable prefix for all metrics
+- **Buffer Management**: Batches metrics before sending
+
+#### 2. NewRelic Integration (`internal/service/newrelic/`)
+- **Location**: `internal/service/newrelic/newrelic.go`, `internal/service/newrelic/sink.go`
+- **Sink Pattern**: Also implements `health.Sink` interface
+- **Integration**: Uses NewRelic SDK
+- **Middleware**: Router-level middleware (`nrgorilla.Middleware`)
+
+#### 3. Current Instrumentation Hook (`gateway/instrumentation_handlers.go:21-88`)
+- **Entry Point**: `setupInstrumentation()` function
+- **Health Stream**: Uses `health.NewStream()` from gocraft/health
+- **Monitoring**: `MonitorApplicationInstrumentation()` for GC stats and RPS
+- **Activation**: Via `TYK_INSTRUMENTATION` env var or `--log-instrumentation` flag
+
+### Key Data Points
+
+#### Redis Pool Stats
+- Connection handler: `storage/connection_handler.go`
+- Redis connector interface in `storage` package
+- Need to expose pool statistics from underlying Redis connection
+
+#### System Metrics
+- GC Stats: Already monitored in `MonitorApplicationInstrumentation()` (line 68-88)
+- CPU/Memory: Available via `runtime` package
+- Goroutines: `runtime.NumGoroutine()`
+- Connections: Tracked in `ConnectionWatcher` (`internal/httputil/connection_watcher.go`)
+
+#### Gateway Metrics
+- APIs Count: `len(gw.apisByID)` in Gateway struct
+- Policies Count: `len(gw.policiesByID)` in Gateway struct
+- Request metrics: Already tracked via `instrument.NewJob()` calls
+
+## Implementation Plan
+
+### Phase 1: Configuration Structure
+
+#### 1.1 Add Prometheus Config to `config/config.go`
+
+**Location**: `config/config.go` (after line 1187, near StatsdPrefix)
+
+```go
+// PrometheusConfig holds configuration for Prometheus metrics exposure
+type PrometheusConfig struct {
+	// Enabled activates Prometheus metrics endpoint
+	Enabled bool `json:"enabled"`
+	// ListenAddress is the address to expose metrics (e.g., ":9090")
+	ListenAddress string `json:"listen_address"`
+	// Path is the HTTP path for metrics endpoint (default: "/metrics")
+	Path string `json:"path"`
+	// MetricPrefix is the prefix for all Tyk metrics (default: "tyk_gateway")
+	MetricPrefix string `json:"metric_prefix"`
+	// EnableGoCollector enables Go runtime metrics
+	EnableGoCollector bool `json:"enable_go_collector"`
+	// EnableProcessCollector enables process metrics
+	EnableProcessCollector bool `json:"enable_process_collector"`
+}
+
+// Add to Config struct (after line 1187)
+Prometheus PrometheusConfig `json:"prometheus"`
+```
+
+#### 1.2 Configuration Defaults
+
+```go
+// In config defaults
+Prometheus: PrometheusConfig{
+	Enabled:                false,
+	ListenAddress:          ":9090",
+	Path:                   "/metrics",
+	MetricPrefix:           "tyk_gateway",
+	EnableGoCollector:      true,
+	EnableProcessCollector: true,
+}
+```
+
+### Phase 2: Prometheus Metrics Registry
+
+#### 2.1 Create Prometheus Handler (`gateway/instrumentation_prometheus.go`)
+
+```go
+package gateway
+
+import (
+	"context"
+	"net/http"
+	"runtime"
+	"strconv"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+	"github.com/sirupsen/logrus"
+)
+
+type PrometheusMetrics struct {
+	// System Metrics
+	goroutines   prometheus.Gauge
+	memoryAlloc  prometheus.Gauge
+	memoryTotal  prometheus.Gauge
+	cpuUsage     prometheus.Gauge
+	connections  prometheus.Gauge
+
+	// Gateway Metrics
+	apisLoaded      prometheus.Gauge
+	policiesLoaded  prometheus.Gauge
+	requestsTotal   *prometheus.CounterVec
+	requestDuration *prometheus.HistogramVec
+	requestErrors   *prometheus.CounterVec
+
+	// Redis Metrics
+	redisPoolSize    *prometheus.GaugeVec
+	redisActiveConns *prometheus.GaugeVec
+	redisLatency     prometheus.Histogram
+
+	// Request Processing Metrics
+	queueDepth         prometheus.Gauge
+	throughput         prometheus.Gauge
+	middlewareExecTime *prometheus.HistogramVec
+
+	registry *prometheus.Registry
+	gw       *Gateway
+}
+
+func NewPrometheusMetrics(gw *Gateway, prefix string) *PrometheusMetrics {
+	registry := prometheus.NewRegistry()
+
+	pm := &PrometheusMetrics{
+		gw:       gw,
+		registry: registry,
+	}
+
+	// System Metrics
+	pm.goroutines = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "system",
+		Name:      "goroutines",
+		Help:      "Number of active goroutines",
+	})
+
+	pm.memoryAlloc = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "system",
+		Name:      "memory_alloc_bytes",
+		Help:      "Bytes of allocated heap objects",
+	})
+
+	pm.memoryTotal = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "system",
+		Name:      "memory_total_bytes",
+		Help:      "Total bytes obtained from OS",
+	})
+
+	pm.cpuUsage = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "system",
+		Name:      "cpu_usage_percent",
+		Help:      "CPU usage percentage",
+	})
+
+	pm.connections = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "system",
+		Name:      "open_connections",
+		Help:      "Number of open connections",
+	})
+
+	// Gateway Metrics
+	pm.apisLoaded = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "gateway",
+		Name:      "apis_loaded",
+		Help:      "Number of APIs currently loaded",
+	})
+
+	pm.policiesLoaded = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "gateway",
+		Name:      "policies_loaded",
+		Help:      "Number of policies currently loaded",
+	})
+
+	pm.requestsTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: prefix,
+			Subsystem: "gateway",
+			Name:      "requests_total",
+			Help:      "Total number of requests processed (RED: Rate)",
+		},
+		[]string{"api_id", "api_name", "method", "status_code"},
+	)
+
+	pm.requestDuration = prometheus.NewHistogramVec(
+		prometheus.HistogramOpts{
+			Namespace: prefix,
+			Subsystem: "gateway",
+			Name:      "request_duration_seconds",
+			Help:      "Request duration in seconds (RED: Duration)",
+			Buckets:   prometheus.DefBuckets,
+		},
+		[]string{"api_id", "api_name", "method"},
+	)
+
+	pm.requestErrors = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: prefix,
+			Subsystem: "gateway",
+			Name:      "request_errors_total",
+			Help:      "Total number of request errors (RED: Errors)",
+		},
+		[]string{"api_id", "api_name", "method", "error_type"},
+	)
+
+	// Redis Metrics
+	pm.redisPoolSize = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: prefix,
+			Subsystem: "redis",
+			Name:      "pool_size",
+			Help:      "Redis connection pool size",
+		},
+		[]string{"type"},
+	)
+
+	pm.redisActiveConns = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: prefix,
+			Subsystem: "redis",
+			Name:      "active_connections",
+			Help:      "Number of active Redis connections",
+		},
+		[]string{"type"},
+	)
+
+	pm.redisLatency = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Namespace: prefix,
+		Subsystem: "redis",
+		Name:      "operation_duration_seconds",
+		Help:      "Redis operation latency in seconds",
+		Buckets:   []float64{.001, .005, .01, .025, .05, .1, .25, .5, 1},
+	})
+
+	// Request Processing Metrics
+	pm.queueDepth = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "processing",
+		Name:      "queue_depth",
+		Help:      "Current request queue depth",
+	})
+
+	pm.throughput = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "processing",
+		Name:      "throughput_rps",
+		Help:      "Current throughput in requests per second",
+	})
+
+	pm.middlewareExecTime = prometheus.NewHistogramVec(
+		prometheus.HistogramOpts{
+			Namespace: prefix,
+			Subsystem: "processing",
+			Name:      "middleware_execution_seconds",
+			Help:      "Middleware execution time in seconds",
+			Buckets:   []float64{.001, .005, .01, .025, .05, .1, .25, .5, 1},
+		},
+		[]string{"middleware_name", "api_id"},
+	)
+
+	// Register all metrics
+	registry.MustRegister(
+		pm.goroutines,
+		pm.memoryAlloc,
+		pm.memoryTotal,
+		pm.cpuUsage,
+		pm.connections,
+		pm.apisLoaded,
+		pm.policiesLoaded,
+		pm.requestsTotal,
+		pm.requestDuration,
+		pm.requestErrors,
+		pm.redisPoolSize,
+		pm.redisActiveConns,
+		pm.redisLatency,
+		pm.queueDepth,
+		pm.throughput,
+		pm.middlewareExecTime,
+	)
+
+	return pm
+}
+
+// UpdateSystemMetrics updates system-level metrics
+func (pm *PrometheusMetrics) UpdateSystemMetrics() {
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+
+	pm.goroutines.Set(float64(runtime.NumGoroutine()))
+	pm.memoryAlloc.Set(float64(m.Alloc))
+	pm.memoryTotal.Set(float64(m.TotalAlloc))
+
+	if pm.gw.ConnectionWatcher != nil {
+		pm.connections.Set(float64(pm.gw.ConnectionWatcher.Count()))
+	}
+}
+
+// UpdateGatewayMetrics updates gateway-specific metrics
+func (pm *PrometheusMetrics) UpdateGatewayMetrics() {
+	pm.gw.apisMu.RLock()
+	pm.apisLoaded.Set(float64(len(pm.gw.apisByID)))
+	pm.gw.apisMu.RUnlock()
+
+	pm.gw.policiesMu.RLock()
+	pm.policiesLoaded.Set(float64(len(pm.gw.policiesByID)))
+	pm.gw.policiesMu.RUnlock()
+
+	// Throughput from GlobalRate
+	pm.throughput.Set(float64(GlobalRate.Rate()))
+}
+
+// UpdateRedisMetrics updates Redis connection metrics
+func (pm *PrometheusMetrics) UpdateRedisMetrics() {
+	if pm.gw.StorageConnectionHandler != nil {
+		for _, connType := range []string{storage.DefaultConn, storage.CacheConn, storage.AnalyticsConn} {
+			stats := pm.gw.StorageConnectionHandler.GetRedisStats(connType)
+			if stats != nil {
+				labels := prometheus.Labels{"type": connType}
+				pm.redisPoolSize.With(labels).Set(float64(stats.TotalConns))
+				pm.redisActiveConns.With(labels).Set(float64(stats.ActiveConns))
+			}
+		}
+	}
+}
+
+// RecordRequest records request metrics (called from middleware/handler)
+func (pm *PrometheusMetrics) RecordRequest(apiID, apiName, method string, statusCode int, duration float64) {
+	labels := prometheus.Labels{
+		"api_id":      apiID,
+		"api_name":    apiName,
+		"method":      method,
+		"status_code": strconv.Itoa(statusCode),
+	}
+
+	pm.requestsTotal.With(labels).Inc()
+
+	durLabels := prometheus.Labels{
+		"api_id":   apiID,
+		"api_name": apiName,
+		"method":   method,
+	}
+	pm.requestDuration.With(durLabels).Observe(duration)
+
+	if statusCode >= 400 {
+		errorType := "client_error"
+		if statusCode >= 500 {
+			errorType = "server_error"
+		}
+
+		errLabels := prometheus.Labels{
+			"api_id":     apiID,
+			"api_name":   apiName,
+			"method":     method,
+			"error_type": errorType,
+		}
+		pm.requestErrors.With(errLabels).Inc()
+	}
+}
+
+// RecordMiddlewareExecution records middleware execution time
+func (pm *PrometheusMetrics) RecordMiddlewareExecution(middlewareName, apiID string, duration float64) {
+	pm.middlewareExecTime.With(prometheus.Labels{
+		"middleware_name": middlewareName,
+		"api_id":          apiID,
+	}).Observe(duration)
+}
+
+// Handler returns the HTTP handler for metrics endpoint
+func (pm *PrometheusMetrics) Handler() http.Handler {
+	return promhttp.HandlerFor(pm.registry, promhttp.HandlerOpts{})
+}
+
+// StartMetricsCollection starts background metrics collection
+func (pm *PrometheusMetrics) StartMetricsCollection(ctx context.Context) {
+	ticker := time.NewTicker(5 * time.Second)
+
+	go func() {
+		defer ticker.Stop()
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticker.C:
+				pm.UpdateSystemMetrics()
+				pm.UpdateGatewayMetrics()
+				pm.UpdateRedisMetrics()
+			}
+		}
+	}()
+}
+```
+
+#### 2.2 Create Prometheus Sink (`gateway/instrumentation_prometheus_sink.go`)
+
+```go
+package gateway
+
+import (
+	"github.com/gocraft/health"
+)
+
+// PrometheusSink implements health.Sink interface for Prometheus
+type PrometheusSink struct {
+	metrics *PrometheusMetrics
+}
+
+func NewPrometheusSink(metrics *PrometheusMetrics) *PrometheusSink {
+	return &PrometheusSink{
+		metrics: metrics,
+	}
+}
+
+func (s *PrometheusSink) EmitEvent(job, event string, kvs map[string]string) {
+	// Convert events to Prometheus metrics if needed
+}
+
+func (s *PrometheusSink) EmitEventErr(job, event string, err error, kvs map[string]string) {
+	// Track errors in Prometheus
+}
+
+func (s *PrometheusSink) EmitTiming(job, event string, nanos int64, kvs map[string]string) {
+	// Convert timing events to histograms
+	seconds := float64(nanos) / 1e9
+
+	if job == "MiddlewareCall" {
+		if mwName, ok := kvs["mw_name"]; ok {
+			apiID := kvs["api_id"]
+			s.metrics.RecordMiddlewareExecution(mwName, apiID, seconds)
+		}
+	}
+}
+
+func (s *PrometheusSink) EmitGauge(job, event string, value float64, kvs map[string]string) {
+	// Handle gauge metrics
+}
+
+func (s *PrometheusSink) EmitComplete(job string, status health.CompletionStatus, nanos int64, kvs map[string]string) {
+	// Complete status tracking
+}
+```
+
+### Phase 3: Gateway Integration
+
+#### 3.1 Update `gateway/instrumentation_handlers.go`
+
+Add Prometheus setup alongside StatsD:
+
+```go
+// Add to Gateway struct in gateway/server.go
+PrometheusMetrics *PrometheusMetrics
+prometheusServer  *http.Server
+
+// Update setupInstrumentation() function
+func (gw *Gateway) setupInstrumentation() {
+	gwConfig := gw.GetConfig()
+
+	// Existing StatsD setup...
+
+	// Prometheus Setup
+	if gwConfig.Prometheus.Enabled {
+		log.Info("Initializing Prometheus metrics...")
+
+		gw.PrometheusMetrics = NewPrometheusMetrics(gw, gwConfig.Prometheus.MetricPrefix)
+
+		// Add Prometheus sink to instrument stream
+		prometheusSink := NewPrometheusSink(gw.PrometheusMetrics)
+		instrument.AddSink(prometheusSink)
+
+		// Start metrics collection
+		gw.PrometheusMetrics.StartMetricsCollection(gw.ctx)
+
+		// Start Prometheus HTTP server
+		gw.startPrometheusServer()
+
+		log.WithFields(logrus.Fields{
+			"listen_address": gwConfig.Prometheus.ListenAddress,
+			"path":           gwConfig.Prometheus.Path,
+		}).Info("Prometheus metrics endpoint started")
+	}
+
+	// Existing monitoring...
+	gw.MonitorApplicationInstrumentation()
+}
+
+// Add Prometheus HTTP server
+func (gw *Gateway) startPrometheusServer() {
+	gwConfig := gw.GetConfig()
+
+	mux := http.NewServeMux()
+	mux.Handle(gwConfig.Prometheus.Path, gw.PrometheusMetrics.Handler())
+
+	gw.prometheusServer = &http.Server{
+		Addr:    gwConfig.Prometheus.ListenAddress,
+		Handler: mux,
+	}
+
+	go func() {
+		if err := gw.prometheusServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+			log.WithError(err).Error("Prometheus metrics server error")
+		}
+	}()
+}
+
+// Add shutdown logic in Gateway.gracefulShutdown()
+if gw.prometheusServer != nil {
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	if err := gw.prometheusServer.Shutdown(ctx); err != nil {
+		log.WithError(err).Error("Error shutting down Prometheus server")
+	}
+}
+```
+
+#### 3.2 Integrate Request Metrics in Success Handler
+
+Update `gateway/handler_success.go` RecordHit function (around line 300):
+
+```go
+// Add after existing analytics recording
+if gw.PrometheusMetrics != nil {
+	duration := float64(latency.Total) / 1e9 // Convert nanoseconds to seconds
+	gw.PrometheusMetrics.RecordRequest(
+		s.Spec.APIID,
+		s.Spec.Name,
+		r.Method,
+		code,
+		duration,
+	)
+}
+```
+
+#### 3.3 Integrate Middleware Metrics
+
+Update `gateway/middleware.go` createMiddleware function (around line 150-180):
+
+```go
+// Add after existing instrumentation
+if gw.PrometheusMetrics != nil {
+	finishTime := time.Since(startTime)
+	gw.PrometheusMetrics.RecordMiddlewareExecution(
+		mw.Name(),
+		spec.APIID,
+		float64(finishTime.Nanoseconds())/1e9,
+	)
+}
+```
+
+### Phase 4: Redis Metrics Enhancement
+
+#### 4.1 Expose Redis Pool Stats
+
+Update `storage/redis.go` to expose pool statistics:
+
+```go
+// Add method to RedisCluster
+func (r *RedisCluster) PoolStats() *redis.PoolStats {
+	if r.singleton != nil {
+		return r.singleton.PoolStats()
+	}
+	return nil
+}
+
+// Add method to ConnectionHandler in storage/connection_handler.go
+func (rc *ConnectionHandler) GetRedisStats(connType string) *redis.PoolStats {
+	rc.connectionsMu.RLock()
+	defer rc.connectionsMu.RUnlock()
+
+	if conn, ok := rc.connections[connType]; ok {
+		if redisConn, ok := conn.(*RedisCluster); ok {
+			return redisConn.PoolStats()
+		}
+	}
+	return nil
+}
+```
+
+#### 4.2 Update Prometheus Metrics Collection
+
+```go
+// In PrometheusMetrics.UpdateRedisMetrics()
+func (pm *PrometheusMetrics) UpdateRedisMetrics() {
+	if pm.gw.StorageConnectionHandler != nil {
+		for _, connType := range []string{storage.DefaultConn, storage.CacheConn, storage.AnalyticsConn} {
+			stats := pm.gw.StorageConnectionHandler.GetRedisStats(connType)
+			if stats != nil {
+				pm.redisPoolSize.WithLabelValues(connType).Set(float64(stats.TotalConns))
+				pm.redisActiveConns.WithLabelValues(connType).Set(float64(stats.ActiveConns))
+			}
+		}
+	}
+}
+```
+
+### Phase 5: Documentation and Configuration
+
+#### 5.1 Configuration Example
+
+Add to `tyk.conf.example`:
+
+```json
+{
+  "prometheus": {
+    "enabled": true,
+    "listen_address": ":9090",
+    "path": "/metrics",
+    "metric_prefix": "tyk_gateway",
+    "enable_go_collector": true,
+    "enable_process_collector": true
+  }
+}
+```
+
+#### 5.2 Environment Variables
+
+Support environment variable configuration:
+- `TYK_GW_PROMETHEUS_ENABLED`
+- `TYK_GW_PROMETHEUS_LISTENADDRESS`
+- `TYK_GW_PROMETHEUS_PATH`
+- `TYK_GW_PROMETHEUS_METRICPREFIX`
+
+### Phase 6: Testing
+
+#### 6.1 Unit Tests
+
+Create `gateway/instrumentation_prometheus_test.go`:
+
+```go
+package gateway
+
+import (
+	"testing"
+	"github.com/prometheus/client_golang/prometheus/testutil"
+)
+
+func TestPrometheusMetrics(t *testing.T) {
+	// Test metric registration
+	// Test metric updates
+	// Test HTTP handler
+}
+
+func TestPrometheusSink(t *testing.T) {
+	// Test sink integration with health.Stream
+}
+```
+
+#### 6.2 Integration Tests
+
+- Test metrics endpoint accessibility
+- Verify metric values match expected gateway state
+- Test with high request load
+- Verify graceful shutdown
+
+### Phase 7: Additional Enhancements
+
+#### 7.1 Optional Go Runtime Collectors
+
+```go
+// In NewPrometheusMetrics, optionally register Go collectors
+if gwConfig.Prometheus.EnableGoCollector {
+	registry.MustRegister(prometheus.NewGoCollector())
+}
+
+if gwConfig.Prometheus.EnableProcessCollector {
+	registry.MustRegister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}))
+}
+```
+
+#### 7.2 Custom Labels Support
+
+Add configuration for custom labels:
+
+```go
+type PrometheusConfig struct {
+	// ... existing fields ...
+	CustomLabels map[string]string `json:"custom_labels"`
+}
+```
+
+## Implementation Timeline
+
+### Week 1: Foundation
+- [ ] Day 1-2: Configuration structure (Phase 1)
+- [ ] Day 3-5: Prometheus metrics registry and handler (Phase 2.1, 2.2)
+
+### Week 2: Integration
+- [ ] Day 1-2: Gateway integration (Phase 3.1, 3.2)
+- [ ] Day 3-4: Middleware integration (Phase 3.3)
+- [ ] Day 5: Redis metrics enhancement (Phase 4)
+
+### Week 3: Testing and Documentation
+- [ ] Day 1-2: Unit tests (Phase 6.1)
+- [ ] Day 3-4: Integration tests (Phase 6.2)
+- [ ] Day 5: Documentation and examples (Phase 5)
+
+### Week 4: Enhancements and Review
+- [ ] Day 1-2: Additional enhancements (Phase 7)
+- [ ] Day 3-4: Code review and refinements
+- [ ] Day 5: Final testing and deployment preparation
+
+## Metrics Exposed (Summary)
+
+### System Metrics
+- `tyk_gateway_system_goroutines` - Number of active goroutines
+- `tyk_gateway_system_memory_alloc_bytes` - Allocated heap memory
+- `tyk_gateway_system_memory_total_bytes` - Total memory from OS
+- `tyk_gateway_system_cpu_usage_percent` - CPU usage
+- `tyk_gateway_system_open_connections` - Open connections
+
+### Gateway Metrics
+- `tyk_gateway_gateway_apis_loaded` - Number of loaded APIs
+- `tyk_gateway_gateway_policies_loaded` - Number of loaded policies
+- `tyk_gateway_gateway_requests_total{api_id, api_name, method, status_code}` - Total requests (RED: Rate)
+- `tyk_gateway_gateway_request_duration_seconds{api_id, api_name, method}` - Request duration histogram (RED: Duration)
+- `tyk_gateway_gateway_request_errors_total{api_id, api_name, method, error_type}` - Request errors (RED: Errors)
+
+### Redis Metrics
+- `tyk_gateway_redis_pool_size{type}` - Connection pool size (types: default, cache, analytics)
+- `tyk_gateway_redis_active_connections{type}` - Active connections (types: default, cache, analytics)
+- `tyk_gateway_redis_operation_duration_seconds` - Operation latency histogram
+
+### Request Processing Metrics
+- `tyk_gateway_processing_queue_depth` - Request queue depth
+- `tyk_gateway_processing_throughput_rps` - Throughput in RPS
+- `tyk_gateway_processing_middleware_execution_seconds{middleware_name, api_id}` - Middleware execution time
+
+## Migration and Compatibility
+
+- Prometheus metrics run independently alongside existing StatsD/NewRelic
+- No breaking changes to existing instrumentation
+- Can be enabled/disabled via configuration
+- Uses separate HTTP server to avoid conflicts with main gateway ports
+
+## Security Considerations
+
+1. **Separate Metrics Port**: Run Prometheus endpoint on dedicated port
+2. **Access Control**: Document firewall rules for metrics port
+3. **Sensitive Data**: Ensure no API keys or sensitive data in labels
+4. **Rate Limiting**: Consider adding rate limiting to metrics endpoint for production
+
+## Deployment Recommendations
+
+1. **Production**: Enable with dedicated metrics port (e.g., 9090)
+2. **Kubernetes**: Use ServiceMonitor for automatic discovery
+3. **Docker**: Expose metrics port in container configuration
+4. **Monitoring**: Set up Prometheus scraping with 15-30s interval
+
+## References
+
+- Existing implementations:
+  - `gateway/instrumentation_statsd_sink.go`
+  - `internal/service/newrelic/`
+  - `gateway/instrumentation_handlers.go`
+- Prometheus client library: `github.com/prometheus/client_golang`
+- Health stream: `github.com/gocraft/health`
diff --git a/PROMETHEUS_METRICS.md b/PROMETHEUS_METRICS.md
new file mode 100644
index 00000000000..68158e09a52
--- /dev/null
+++ b/PROMETHEUS_METRICS.md
@@ -0,0 +1,466 @@
+# Prometheus Metrics Integration for Tyk Gateway
+
+This document describes the Prometheus metrics integration for Tyk Gateway, providing comprehensive monitoring and observability capabilities.
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Configuration](#configuration)
+- [Environment Variables](#environment-variables)
+- [Available Metrics](#available-metrics)
+- [Usage Examples](#usage-examples)
+- [Integration with Monitoring Systems](#integration-with-monitoring-systems)
+- [Best Practices](#best-practices)
+
+## Overview
+
+Tyk Gateway now supports native Prometheus metrics export through a dedicated HTTP endpoint. This integration provides detailed metrics across four key categories:
+
+1. **System Metrics**: Runtime performance and resource utilization
+2. **Gateway Metrics**: API and policy management, request processing (RED metrics)
+3. **Redis Metrics**: Connection pool statistics and performance
+4. **Request Processing Metrics**: Queue depth, throughput, and middleware performance
+
+## Configuration
+
+### Configuration File
+
+Add the following section to your `tyk.conf`:
+
+```json
+{
+  "prometheus": {
+    "enabled": true,
+    "listen_address": ":9090",
+    "path": "/metrics",
+    "metric_prefix": "tyk_gateway",
+    "enable_go_collector": true,
+    "enable_process_collector": true,
+    "enable_per_api_metrics": false
+  }
+}
+```
+
+### Configuration Options
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enabled` | boolean | `false` | Enable/disable Prometheus metrics endpoint |
+| `listen_address` | string | `":9090"` | Address and port for metrics endpoint |
+| `path` | string | `"/metrics"` | HTTP path for metrics endpoint |
+| `metric_prefix` | string | `"tyk_gateway"` | Prefix for all Tyk metrics |
+| `enable_go_collector` | boolean | `true` | Enable Go runtime metrics collection |
+| `enable_process_collector` | boolean | `true` | Enable process-level metrics collection |
+| `enable_per_api_metrics` | boolean | `false` | Enable per-API metrics with `api_id` label (increases cardinality) |
+
+## Environment Variables
+
+All configuration options can be set via environment variables using the `TYK_GW_PROMETHEUS_` prefix:
+
+```bash
+export TYK_GW_PROMETHEUS_ENABLED=true
+export TYK_GW_PROMETHEUS_LISTENADDRESS=":9090"
+export TYK_GW_PROMETHEUS_PATH="/metrics"
+export TYK_GW_PROMETHEUS_METRICPREFIX="tyk_gateway"
+export TYK_GW_PROMETHEUS_ENABLEGOCOLLECTOR=true
+export TYK_GW_PROMETHEUS_ENABLEPROCESSCOLLECTOR=true
+export TYK_GW_PROMETHEUS_ENABLEPERAPIMETRICS=false
+```
+
+## Available Metrics
+
+### System Metrics
+
+| Metric | Type | Description | Labels |
+|--------|------|-------------|--------|
+| `tyk_gateway_system_goroutines` | Gauge | Number of active goroutines | - |
+| `tyk_gateway_system_memory_alloc_bytes` | Gauge | Bytes of allocated heap objects | - |
+| `tyk_gateway_system_memory_total_bytes` | Gauge | Total bytes obtained from OS | - |
+| `tyk_gateway_system_cpu_usage_percent` | Gauge | CPU usage percentage | - |
+| `tyk_gateway_system_open_connections` | Gauge | Number of open connections | - |
+
+### Gateway Metrics (RED)
+
+#### Overall Gateway Metrics (Always Available)
+
+| Metric | Type | Description | Labels |
+|--------|------|-------------|--------|
+| `tyk_gateway_gateway_apis_loaded` | Gauge | Number of APIs currently loaded | - |
+| `tyk_gateway_gateway_policies_loaded` | Gauge | Number of policies currently loaded | - |
+| `tyk_gateway_gateway_http_requests_total` | Counter | Total HTTP requests across all APIs (RED: Rate) | `method`, `status_class` |
+| `tyk_gateway_gateway_http_request_duration_seconds` | Histogram | Total HTTP request duration across all APIs (RED: Duration) | `method`, `status_class` |
+| `tyk_gateway_gateway_http_request_upstream_latency_seconds` | Histogram | Upstream service latency (time waiting for upstream response) | `method`, `status_class` |
+| `tyk_gateway_gateway_http_request_gateway_latency_seconds` | Histogram | Gateway processing latency (time spent in gateway middleware) | `method`, `status_class` |
+
+**Note**: `status_class` groups status codes as `2xx`, `3xx`, `4xx`, `5xx` to reduce cardinality.
+
+**Latency Breakdown**:
+- **Total Duration** = Upstream Latency + Gateway Latency
+- **Upstream Latency**: Time waiting for the upstream service to respond
+- **Gateway Latency**: Time spent in Tyk gateway processing (authentication, rate limiting, transformations, etc.)
+
+**Error Metrics**: Calculate errors from `status_class=~"4xx|5xx"` in the requests metrics above.
+
+#### Per-API Metrics (Optional - `enable_per_api_metrics: true`)
+
+| Metric | Type | Description | Labels |
+|--------|------|-------------|--------|
+| `tyk_gateway_gateway_requests_total` | Counter | Total requests per API (RED: Rate) | `api_id`, `method`, `status_class` |
+| `tyk_gateway_gateway_request_duration_seconds` | Histogram | Request duration per API (RED: Duration) | `api_id`, `method` |
+| `tyk_gateway_gateway_request_errors_total` | Counter | Request errors per API (RED: Errors) | `api_id`, `method`, `status_class` |
+
+**Warning**: Per-API metrics can create high cardinality with many APIs. Enable only when needed for detailed per-API monitoring.
+
+### Redis Metrics
+
+| Metric | Type | Description | Labels |
+|--------|------|-------------|--------|
+| `tyk_gateway_redis_pool_size` | Gauge | Redis connection pool size | `type` (default, cache, analytics) |
+| `tyk_gateway_redis_active_connections` | Gauge | Active Redis connections | `type` (default, cache, analytics) |
+| `tyk_gateway_redis_operation_duration_seconds` | Histogram | Redis operation latency | - |
+
+### Request Processing Metrics
+
+| Metric | Type | Description | Labels |
+|--------|------|-------------|--------|
+| `tyk_gateway_processing_queue_depth` | Gauge | Current request queue depth | - |
+| `tyk_gateway_processing_throughput_rps` | Gauge | Throughput in requests per second | - |
+| `tyk_gateway_processing_middleware_execution_seconds` | Histogram | Middleware execution time | `middleware_name`, `api_id` |
+
+## Usage Examples
+
+### Basic Curl Test
+
+```bash
+curl http://localhost:9090/metrics
+```
+
+### Sample Prometheus Configuration
+
+Add this to your `prometheus.yml`:
+
+```yaml
+scrape_configs:
+  - job_name: 'tyk-gateway'
+    static_configs:
+      - targets: ['localhost:9090']
+    scrape_interval: 15s
+    scrape_timeout: 10s
+```
+
+### Example Metrics Output
+
+```promql
+# System Metrics
+tyk_gateway_system_goroutines 245
+tyk_gateway_system_memory_alloc_bytes 45678912
+tyk_gateway_system_open_connections 12
+
+# Gateway Metrics (Overall - Always Available)
+tyk_gateway_gateway_apis_loaded 5
+tyk_gateway_gateway_policies_loaded 3
+tyk_gateway_gateway_http_requests_total{method="GET",status_class="2xx"} 1523
+tyk_gateway_gateway_http_requests_total{method="POST",status_class="4xx"} 12
+tyk_gateway_gateway_http_request_duration_seconds_bucket{method="GET",status_class="2xx",le="0.1"} 1450
+tyk_gateway_gateway_http_request_upstream_latency_seconds_bucket{method="GET",status_class="2xx",le="0.05"} 1400
+tyk_gateway_gateway_http_request_gateway_latency_seconds_bucket{method="GET",status_class="2xx",le="0.005"} 1500
+
+# Gateway Metrics (Per-API - Only if enable_per_api_metrics=true)
+tyk_gateway_gateway_requests_total{api_id="api1",method="GET",status_class="2xx"} 1000
+tyk_gateway_gateway_request_errors_total{api_id="api1",method="GET",status_class="4xx"} 5
+
+# Redis Metrics
+tyk_gateway_redis_pool_size{type="default"} 100
+tyk_gateway_redis_active_connections{type="default"} 45
+```
+
+## Integration with Monitoring Systems
+
+### Grafana Dashboard
+
+Create a Grafana dashboard using these example queries:
+
+**Overall Request Rate (RED: Rate)**
+```promql
+sum(rate(tyk_gateway_gateway_http_requests_total[5m])) by (method, status_class)
+```
+
+**Overall Request Duration P95 (RED: Duration)**
+```promql
+histogram_quantile(0.95, sum(rate(tyk_gateway_gateway_http_request_duration_seconds_bucket[5m])) by (le, method))
+```
+
+**Overall Error Rate (RED: Errors)**
+```promql
+sum(rate(tyk_gateway_gateway_http_requests_total{status_class=~"4xx|5xx"}[5m])) by (status_class)
+```
+
+**Upstream Latency P95**
+```promql
+histogram_quantile(0.95, sum(rate(tyk_gateway_gateway_http_request_upstream_latency_seconds_bucket[5m])) by (le, method))
+```
+
+**Gateway Processing Latency P95**
+```promql
+histogram_quantile(0.95, sum(rate(tyk_gateway_gateway_http_request_gateway_latency_seconds_bucket[5m])) by (le, method))
+```
+
+**Latency Breakdown Comparison**
+```promql
+# Compare upstream vs gateway latency
+sum(rate(tyk_gateway_gateway_http_request_upstream_latency_seconds_sum[5m])) / sum(rate(tyk_gateway_gateway_http_request_upstream_latency_seconds_count[5m]))
+/
+sum(rate(tyk_gateway_gateway_http_request_gateway_latency_seconds_sum[5m])) / sum(rate(tyk_gateway_gateway_http_request_gateway_latency_seconds_count[5m]))
+```
+
+**Per-API Request Rate (if `enable_per_api_metrics=true`)**
+```promql
+sum(rate(tyk_gateway_gateway_requests_total[5m])) by (api_id, method, status_class)
+```
+
+**Per-API Error Rate (if `enable_per_api_metrics=true`)**
+```promql
+sum(rate(tyk_gateway_gateway_request_errors_total[5m])) by (api_id, status_class)
+```
+
+**Redis Pool Utilization**
+```promql
+(tyk_gateway_redis_active_connections / tyk_gateway_redis_pool_size) * 100
+```
+
+### Kubernetes ServiceMonitor
+
+For automatic Prometheus discovery in Kubernetes:
+
+```yaml
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: tyk-gateway
+spec:
+  selector:
+    matchLabels:
+      app: tyk-gateway
+  endpoints:
+  - port: prometheus
+    interval: 15s
+    path: /metrics
+```
+
+### Docker Compose
+
+```yaml
+version: '3.8'
+services:
+  tyk-gateway:
+    image: tykio/tyk-gateway:latest
+    ports:
+      - "8080:8080"
+      - "9090:9090"
+    environment:
+      - TYK_GW_PROMETHEUS_ENABLED=true
+      - TYK_GW_PROMETHEUS_LISTENADDRESS=:9090
+
+  prometheus:
+    image: prom/prometheus:latest
+    ports:
+      - "9091:9090"
+    volumes:
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml
+```
+
+## Latency Analysis
+
+### Using Latency Breakdown
+
+The gateway provides detailed latency breakdown to help identify performance bottlenecks:
+
+**Identify Bottlenecks**:
+```promql
+# If upstream latency is high, the problem is in your backend services
+histogram_quantile(0.95, rate(tyk_gateway_gateway_http_request_upstream_latency_seconds_bucket[5m]))
+
+# If gateway latency is high, the problem is in gateway middleware/processing
+histogram_quantile(0.95, rate(tyk_gateway_gateway_http_request_gateway_latency_seconds_bucket[5m]))
+```
+
+**Calculate Overhead Percentage**:
+```promql
+# What percentage of total time is spent in the gateway?
+(
+  sum(rate(tyk_gateway_gateway_http_request_gateway_latency_seconds_sum[5m]))
+  /
+  sum(rate(tyk_gateway_gateway_http_request_duration_seconds_sum[5m]))
+) * 100
+```
+
+**Common Patterns**:
+- **High Upstream, Low Gateway**: Backend services are slow - optimize your APIs
+- **Low Upstream, High Gateway**: Gateway processing is slow - review middleware, rate limits, plugins
+- **Both High**: Multiple bottlenecks - prioritize based on which contributes more to total latency
+
+### Troubleshooting Scenarios
+
+**Scenario 1: Slow Response Times**
+1. Check total P95: `histogram_quantile(0.95, rate(tyk_gateway_gateway_http_request_duration_seconds_bucket[5m]))`
+2. Compare upstream vs gateway latency to identify where time is spent
+3. If upstream is slow: Review backend service performance
+4. If gateway is slow: Review middleware configuration, plugins, rate limiting
+
+**Scenario 2: Increased Latency After Deployment**
+```promql
+# Compare gateway latency before and after deployment
+histogram_quantile(0.95,
+  sum(rate(tyk_gateway_gateway_http_request_gateway_latency_seconds_bucket[5m] offset 1h)) by (le)
+)
+```
+
+**Scenario 3: Inconsistent Performance**
+```promql
+# High variance in latency
+stddev_over_time(
+  histogram_quantile(0.95,
+    rate(tyk_gateway_gateway_http_request_duration_seconds_bucket[5m])
+  )[10m:1m]
+)
+```
+
+## Best Practices
+
+### Cardinality Management
+
+**Understanding Per-API Metrics**:
+- Per-API metrics add the `api_id` label to metrics
+- With 100 APIs × 5 methods × 5 status classes = 2,500 time series per metric
+- High cardinality can impact Prometheus performance and storage
+
+**When to Enable Per-API Metrics**:
+- ✅ **Development/Testing**: Detailed debugging and analysis
+- ✅ **Small Deployments**: <50 APIs with low request volume
+- ✅ **Specific Monitoring**: Temporary troubleshooting of specific APIs
+- ❌ **Large Deployments**: >100 APIs or high request volume
+- ❌ **Production (default)**: Use overall gateway metrics instead
+
+**Recommended Configuration**:
+```json
+{
+  "prometheus": {
+    "enabled": true,
+    "enable_per_api_metrics": false,  // Default: use overall metrics
+    "enable_go_collector": true,
+    "enable_process_collector": true
+  }
+}
+```
+
+**Alternative Approaches**:
+- Use overall gateway metrics (`gateway_http_requests_total`) for alerting
+- Enable per-API metrics temporarily for debugging specific issues
+- Use API analytics/logging for detailed per-API investigation
+
+### Performance Considerations
+
+1. **Scrape Interval**: Use 15-30 second intervals for production
+2. **Metrics Port**: Run on a separate port (9090) from the gateway (8080)
+3. **Network Security**: Restrict metrics endpoint access via firewall rules
+4. **Cardinality**: Keep per-API metrics disabled unless specifically needed
+
+### Security Recommendations
+
+1. **Separate Network**: Expose metrics port only on internal network
+2. **Authentication**: Use network-level authentication (VPN, IP whitelisting)
+3. **Sensitive Data**: Metrics do not include API keys or sensitive request data
+
+### Troubleshooting
+
+**Issue**: Metrics endpoint not accessible
+```bash
+# Check if Prometheus is enabled in configuration
+grep -A 5 "prometheus" /opt/tyk-gateway/tyk.conf
+
+# Check if metrics port is listening
+netstat -tln | grep 9090
+```
+
+**Issue**: No data in metrics
+- Ensure gateway is receiving traffic
+- Check gateway logs for Prometheus initialization messages
+- Verify metrics collection goroutine is running
+
+**Issue**: High memory usage in Prometheus
+- Check cardinality: `curl -s http://localhost:9090/api/v1/status/tsdb | jq`
+- **If per-API metrics are enabled**: Disable `enable_per_api_metrics` to reduce cardinality
+- Reduce scrape frequency from 15s to 30s or 60s
+- Consider disabling `enable_go_collector` and `enable_process_collector` if not needed
+- Use recording rules to pre-aggregate high-cardinality metrics
+
+**Issue**: Too many time series
+- Verify per-API metrics setting: `grep enable_per_api_metrics /opt/tyk-gateway/tyk.conf`
+- Calculate expected cardinality: `(number of APIs) × (methods) × (status classes)`
+- Consider using overall gateway metrics instead of per-API metrics
+- Use Prometheus relabeling to drop unnecessary labels
+
+### Alerting Rules
+
+Example Prometheus alerting rules:
+
+```yaml
+groups:
+  - name: tyk_gateway
+    rules:
+      - alert: TykHighErrorRate
+        expr: |
+          sum(rate(tyk_gateway_gateway_http_requests_total{status_class=~"4xx|5xx"}[5m]))
+          /
+          sum(rate(tyk_gateway_gateway_http_requests_total[5m]))
+          > 0.05
+        for: 5m
+        annotations:
+          summary: "High error rate detected (>5%)"
+          description: "Gateway error rate is {{ $value | humanizePercentage }}"
+
+      - alert: TykHighP95Latency
+        expr: |
+          histogram_quantile(0.95,
+            sum(rate(tyk_gateway_gateway_http_request_duration_seconds_bucket[5m])) by (le)
+          ) > 1
+        for: 5m
+        annotations:
+          summary: "High P95 latency detected"
+          description: "P95 latency is {{ $value }}s (threshold: 1s)"
+
+      - alert: TykRedisPoolExhaustion
+        expr: |
+          (tyk_gateway_redis_active_connections / tyk_gateway_redis_pool_size) > 0.9
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Redis connection pool nearly exhausted"
+          description: "Redis pool utilization is {{ $value | humanizePercentage }} for {{ $labels.type }}"
+
+      - alert: TykHighMemoryUsage
+        expr: tyk_gateway_system_memory_alloc_bytes > 1e9
+        for: 10m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High memory usage detected"
+          description: "Memory usage is {{ $value | humanize1024 }}"
+```
+
+## Migration from StatsD
+
+Prometheus can run alongside existing StatsD instrumentation without conflicts. Both systems collect metrics independently and can be gradually migrated:
+
+1. Enable Prometheus alongside StatsD
+2. Validate metrics accuracy in Prometheus
+3. Update monitoring dashboards to use Prometheus
+4. Disable StatsD once migration is complete
+
+## Support
+
+For issues, feature requests, or questions:
+- GitHub Issues: https://github.com/TykTechnologies/tyk/issues
+- Documentation: https://tyk.io/docs
+- Community Forum: https://community.tyk.io
diff --git a/config/config.go b/config/config.go
index a9c5d2db75f..5a107959d41 100644
--- a/config/config.go
+++ b/config/config.go
@@ -59,6 +59,15 @@ var (
 			Enabled:     false,
 			AllowUnsafe: []string{},
 		},
+		Prometheus: PrometheusConfig{
+			Enabled:                false,
+			ListenAddress:          ":9090",
+			Path:                   "/metrics",
+			MetricPrefix:           "tyk_gateway",
+			EnableGoCollector:      true,
+			EnableProcessCollector: true,
+			EnablePerAPIMetrics:    false,
+		},
 		PIDFileLocation: "/var/run/tyk/tyk-gateway.pid",
 		Security: SecurityConfig{
 			CertificateExpiryMonitor: CertificateExpiryMonitorConfig{
@@ -772,6 +781,24 @@ type StreamingConfig struct {
 	AllowUnsafe []string `json:"allow_unsafe"`
 }
 
+// PrometheusConfig holds configuration for Prometheus metrics exposure
+type PrometheusConfig struct {
+	// Enabled activates Prometheus metrics endpoint
+	Enabled bool `json:"enabled"`
+	// ListenAddress is the address to expose metrics (e.g., ":9090")
+	ListenAddress string `json:"listen_address"`
+	// Path is the HTTP path for metrics endpoint (default: "/metrics")
+	Path string `json:"path"`
+	// MetricPrefix is the prefix for all Tyk metrics (default: "tyk_gateway")
+	MetricPrefix string `json:"metric_prefix"`
+	// EnableGoCollector enables Go runtime metrics
+	EnableGoCollector bool `json:"enable_go_collector"`
+	// EnableProcessCollector enables process metrics
+	EnableProcessCollector bool `json:"enable_process_collector"`
+	// EnablePerAPIMetrics enables per-API metrics with api_id label (can increase cardinality)
+	EnablePerAPIMetrics bool `json:"enable_per_api_metrics"`
+}
+
 // Config is the configuration object used by Tyk to set up various parameters.
 type Config struct {
 	// Force your Gateway to work only on a specific domain name. Can be overridden by API custom domain.
@@ -1186,6 +1213,9 @@ type Config struct {
 	// StatsD prefix
 	StatsdPrefix string `json:"statsd_prefix"`
 
+	// Prometheus metrics configuration
+	Prometheus PrometheusConfig `json:"prometheus"`
+
 	// Event System
 	EventHandlers        apidef.EventHandlerMetaConfig         `json:"event_handlers"`
 	EventTriggers        map[apidef.TykEvent][]TykEventHandler `json:"event_trigers_defunct"`  // Deprecated: Config.GetEventTriggers instead.
diff --git a/gateway/handler_success.go b/gateway/handler_success.go
index 5ad74aadfb5..96092c51d0e 100644
--- a/gateway/handler_success.go
+++ b/gateway/handler_success.go
@@ -182,8 +182,22 @@ func (s *SuccessHandler) addTraceIDTag(reqCtx context.Context, tags []string) []
 }
 
 func (s *SuccessHandler) RecordHit(r *http.Request, timing analytics.Latency, code int, responseCopy *http.Response, cached bool) {
+	// Record Prometheus metrics (independent of analytics)
+	if s.Gw.PrometheusMetrics != nil {
+		s.Gw.PrometheusMetrics.RecordRequest(
+			s.Spec.APIID,
+			s.Spec.Name,
+			r.Method,
+			code,
+			timing.Total,
+			timing.Upstream,
+		)
+	} else {
+		log.Debug("PrometheusMetrics is nil, skipping metrics recording")
+	}
 
 	if s.Spec.DoNotTrack || ctxGetDoNotTrack(r) {
+		log.Debug("Skipping RecordHit: DoNotTrack enabled")
 		return
 	}
 
diff --git a/gateway/instrumentation_handlers.go b/gateway/instrumentation_handlers.go
index 2d96f5fcb58..79d65167ab9 100644
--- a/gateway/instrumentation_handlers.go
+++ b/gateway/instrumentation_handlers.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	"github.com/gocraft/health"
+	"github.com/sirupsen/logrus"
 
 	"github.com/TykTechnologies/tyk/cli"
 	"github.com/TykTechnologies/tyk/request"
@@ -48,6 +49,73 @@ func (gw *Gateway) setupInstrumentation() {
 	gw.MonitorApplicationInstrumentation()
 }
 
+// setupPrometheusInstrumentation initializes Prometheus metrics collection and HTTP endpoint
+func (gw *Gateway) setupPrometheusInstrumentation() {
+	gwConfig := gw.GetConfig()
+
+	if !gwConfig.Prometheus.Enabled {
+		return
+	}
+
+	log.WithFields(logrus.Fields{
+		"per_api_metrics": gwConfig.Prometheus.EnablePerAPIMetrics,
+	}).Info("Initializing Prometheus metrics...")
+
+	gw.PrometheusMetrics = NewPrometheusMetrics(gw, gwConfig.Prometheus.MetricPrefix, gwConfig.Prometheus.EnablePerAPIMetrics)
+
+	// Register optional Go and process collectors
+	gw.PrometheusMetrics.RegisterGoCollectors(
+		gwConfig.Prometheus.EnableGoCollector,
+		gwConfig.Prometheus.EnableProcessCollector,
+	)
+
+	// Add Prometheus sink to instrument stream
+	prometheusSink := NewPrometheusSink(gw.PrometheusMetrics)
+	instrument.AddSink(prometheusSink)
+
+	// Start metrics collection
+	gw.PrometheusMetrics.StartMetricsCollection(gw.ctx)
+
+	// Start Prometheus HTTP server
+	gw.startPrometheusServer()
+
+	log.WithFields(logrus.Fields{
+		"listen_address": gwConfig.Prometheus.ListenAddress,
+		"path":           gwConfig.Prometheus.Path,
+		"prefix":         gwConfig.Prometheus.MetricPrefix,
+	}).Info("Prometheus metrics endpoint started")
+}
+
+// startPrometheusServer starts the HTTP server for Prometheus metrics endpoint
+func (gw *Gateway) startPrometheusServer() {
+	gwConfig := gw.GetConfig()
+
+	mux := http.NewServeMux()
+	mux.Handle(gwConfig.Prometheus.Path, gw.PrometheusMetrics.Handler())
+
+	server := &http.Server{
+		Addr:         gwConfig.Prometheus.ListenAddress,
+		Handler:      mux,
+		ReadTimeout:  10 * time.Second,
+		WriteTimeout: 10 * time.Second,
+		IdleTimeout:  120 * time.Second,
+	}
+
+	gw.prometheusServerMu.Lock()
+	gw.prometheusServer = server
+	gw.prometheusServerMu.Unlock()
+
+	go func() {
+		log.WithFields(logrus.Fields{
+			"address": gwConfig.Prometheus.ListenAddress,
+		}).Info("Starting Prometheus metrics server...")
+
+		if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+			log.WithError(err).Fatal("Prometheus metrics server failed to start")
+		}
+	}()
+}
+
 // InstrumentationMW will set basic instrumentation events, variables and timers on API jobs
 func InstrumentationMW(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
diff --git a/gateway/instrumentation_prometheus.go b/gateway/instrumentation_prometheus.go
new file mode 100644
index 00000000000..81e9b8f3f50
--- /dev/null
+++ b/gateway/instrumentation_prometheus.go
@@ -0,0 +1,462 @@
+package gateway
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"runtime"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/collectors"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+	"github.com/sirupsen/logrus"
+
+	"github.com/TykTechnologies/tyk/storage"
+)
+
+// PrometheusMetrics holds all Prometheus metric collectors for the Gateway
+type PrometheusMetrics struct {
+	// System Metrics
+	goroutines  prometheus.Gauge
+	memoryAlloc prometheus.Gauge
+	memoryTotal prometheus.Gauge
+	cpuUsage    prometheus.Gauge
+	connections prometheus.Gauge
+
+	// Gateway Metrics
+	apisLoaded      prometheus.Gauge
+	policiesLoaded  prometheus.Gauge
+	requestsTotal   *prometheus.CounterVec
+	requestDuration *prometheus.HistogramVec
+	requestErrors   *prometheus.CounterVec
+
+	// Overall Gateway RED metrics (no api_id dimension)
+	gatewayRequestsTotal          *prometheus.CounterVec
+	gatewayRequestDuration        *prometheus.HistogramVec
+	gatewayRequestUpstreamLatency *prometheus.HistogramVec
+	gatewayRequestGatewayLatency  *prometheus.HistogramVec
+
+	// Redis Metrics
+	redisPoolSize    *prometheus.GaugeVec
+	redisActiveConns *prometheus.GaugeVec
+	redisLatency     prometheus.Histogram
+
+	// Request Processing Metrics
+	queueDepth         prometheus.Gauge
+	throughput         prometheus.Gauge
+	middlewareExecTime *prometheus.HistogramVec
+
+	registry       *prometheus.Registry
+	gw             *Gateway
+	collectionDone chan struct{}
+
+	// Configuration
+	enablePerAPIMetrics bool
+
+	// CPU tracking
+	lastCPUTime   time.Duration
+	lastCheckTime time.Time
+}
+
+// NewPrometheusMetrics creates and registers all Prometheus metrics
+func NewPrometheusMetrics(gw *Gateway, prefix string, enablePerAPIMetrics bool) *PrometheusMetrics {
+	registry := prometheus.NewRegistry()
+
+	pm := &PrometheusMetrics{
+		gw:                  gw,
+		registry:            registry,
+		collectionDone:      make(chan struct{}),
+		lastCheckTime:       time.Now(),
+		enablePerAPIMetrics: enablePerAPIMetrics,
+	}
+
+	// System Metrics
+	pm.goroutines = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "system",
+		Name:      "goroutines",
+		Help:      "Number of active goroutines",
+	})
+
+	pm.memoryAlloc = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "system",
+		Name:      "memory_alloc_bytes",
+		Help:      "Bytes of allocated heap objects",
+	})
+
+	pm.memoryTotal = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "system",
+		Name:      "memory_total_bytes",
+		Help:      "Total bytes obtained from OS",
+	})
+
+	pm.cpuUsage = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "system",
+		Name:      "cpu_usage_percent",
+		Help:      "CPU usage percentage",
+	})
+
+	pm.connections = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "system",
+		Name:      "open_connections",
+		Help:      "Number of open connections",
+	})
+
+	// Gateway Metrics
+	pm.apisLoaded = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "gateway",
+		Name:      "apis_loaded",
+		Help:      "Number of APIs currently loaded",
+	})
+
+	pm.policiesLoaded = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "gateway",
+		Name:      "policies_loaded",
+		Help:      "Number of policies currently loaded",
+	})
+
+	// Per-API metrics (optional, controlled by config)
+	if enablePerAPIMetrics {
+		pm.requestsTotal = prometheus.NewCounterVec(
+			prometheus.CounterOpts{
+				Namespace: prefix,
+				Subsystem: "gateway",
+				Name:      "requests_total",
+				Help:      "Total number of requests processed per API (RED: Rate)",
+			},
+			[]string{"api_id", "method", "status_class"},
+		)
+
+		pm.requestDuration = prometheus.NewHistogramVec(
+			prometheus.HistogramOpts{
+				Namespace: prefix,
+				Subsystem: "gateway",
+				Name:      "request_duration_seconds",
+				Help:      "Request duration in seconds per API (RED: Duration)",
+				Buckets:   prometheus.DefBuckets,
+			},
+			[]string{"api_id", "method"},
+		)
+
+		pm.requestErrors = prometheus.NewCounterVec(
+			prometheus.CounterOpts{
+				Namespace: prefix,
+				Subsystem: "gateway",
+				Name:      "request_errors_total",
+				Help:      "Total number of request errors per API (RED: Errors)",
+			},
+			[]string{"api_id", "method", "status_class"},
+		)
+	}
+
+	// Overall Gateway RED metrics (no api_id dimension)
+	pm.gatewayRequestsTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: prefix,
+			Subsystem: "gateway",
+			Name:      "http_requests_total",
+			Help:      "Total number of HTTP requests processed by the gateway (RED: Rate)",
+		},
+		[]string{"method", "status_class"},
+	)
+
+	pm.gatewayRequestDuration = prometheus.NewHistogramVec(
+		prometheus.HistogramOpts{
+			Namespace: prefix,
+			Subsystem: "gateway",
+			Name:      "http_request_duration_seconds",
+			Help:      "Total HTTP request duration in seconds (RED: Duration)",
+			Buckets:   prometheus.DefBuckets,
+		},
+		[]string{"method", "status_class"},
+	)
+
+	pm.gatewayRequestUpstreamLatency = prometheus.NewHistogramVec(
+		prometheus.HistogramOpts{
+			Namespace: prefix,
+			Subsystem: "gateway",
+			Name:      "http_request_upstream_latency_seconds",
+			Help:      "Upstream response latency in seconds (time waiting for upstream service)",
+			Buckets:   prometheus.DefBuckets,
+		},
+		[]string{"method", "status_class"},
+	)
+
+	pm.gatewayRequestGatewayLatency = prometheus.NewHistogramVec(
+		prometheus.HistogramOpts{
+			Namespace: prefix,
+			Subsystem: "gateway",
+			Name:      "http_request_gateway_latency_seconds",
+			Help:      "Gateway processing latency in seconds (time spent in gateway middleware)",
+			Buckets:   prometheus.DefBuckets,
+		},
+		[]string{"method", "status_class"},
+	)
+
+	// Redis Metrics
+	pm.redisPoolSize = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: prefix,
+			Subsystem: "redis",
+			Name:      "pool_size",
+			Help:      "Redis connection pool size",
+		},
+		[]string{"type"},
+	)
+
+	pm.redisActiveConns = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: prefix,
+			Subsystem: "redis",
+			Name:      "active_connections",
+			Help:      "Number of active Redis connections",
+		},
+		[]string{"type"},
+	)
+
+	pm.redisLatency = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Namespace: prefix,
+		Subsystem: "redis",
+		Name:      "operation_duration_seconds",
+		Help:      "Redis operation latency in seconds",
+		Buckets:   []float64{.001, .005, .01, .025, .05, .1, .25, .5, 1},
+	})
+
+	// Request Processing Metrics
+	pm.queueDepth = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "processing",
+		Name:      "queue_depth",
+		Help:      "Current request queue depth",
+	})
+
+	pm.throughput = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: prefix,
+		Subsystem: "processing",
+		Name:      "throughput_rps",
+		Help:      "Current throughput in requests per second",
+	})
+
+	pm.middlewareExecTime = prometheus.NewHistogramVec(
+		prometheus.HistogramOpts{
+			Namespace: prefix,
+			Subsystem: "processing",
+			Name:      "middleware_execution_seconds",
+			Help:      "Middleware execution time in seconds",
+			Buckets:   []float64{.001, .005, .01, .025, .05, .1, .25, .5, 1},
+		},
+		[]string{"middleware_name", "api_id"},
+	)
+
+	// Register metrics
+	collectors := []prometheus.Collector{
+		pm.goroutines,
+		pm.memoryAlloc,
+		pm.memoryTotal,
+		pm.cpuUsage,
+		pm.connections,
+		pm.apisLoaded,
+		pm.policiesLoaded,
+		pm.gatewayRequestsTotal,
+		pm.gatewayRequestDuration,
+		pm.gatewayRequestUpstreamLatency,
+		pm.gatewayRequestGatewayLatency,
+		pm.redisPoolSize,
+		pm.redisActiveConns,
+		pm.redisLatency,
+		pm.queueDepth,
+		pm.throughput,
+		pm.middlewareExecTime,
+	}
+
+	// Conditionally add per-API metrics
+	if enablePerAPIMetrics {
+		collectors = append(collectors,
+			pm.requestsTotal,
+			pm.requestDuration,
+			pm.requestErrors,
+		)
+	}
+
+	registry.MustRegister(collectors...)
+
+	return pm
+}
+
+// RegisterGoCollectors registers optional Go runtime and process collectors
+func (pm *PrometheusMetrics) RegisterGoCollectors(enableGoCollector, enableProcessCollector bool) {
+	if enableGoCollector {
+		pm.registry.MustRegister(collectors.NewGoCollector())
+		log.Debug("Registered Prometheus Go runtime collector")
+	}
+
+	if enableProcessCollector {
+		pm.registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
+		log.Debug("Registered Prometheus process collector")
+	}
+}
+
+// UpdateSystemMetrics updates system-level metrics
+func (pm *PrometheusMetrics) UpdateSystemMetrics() {
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+
+	pm.goroutines.Set(float64(runtime.NumGoroutine()))
+	pm.memoryAlloc.Set(float64(m.Alloc))
+	pm.memoryTotal.Set(float64(m.TotalAlloc))
+
+	// Calculate CPU usage based on GC pause times
+	now := time.Now()
+	elapsed := now.Sub(pm.lastCheckTime).Seconds()
+
+	if elapsed > 0 {
+		// Calculate CPU time from GC pauses
+		currentCPUTime := time.Duration(m.PauseTotalNs)
+		cpuDelta := currentCPUTime - pm.lastCPUTime
+
+		// Calculate CPU usage as percentage (GC pause time / elapsed time * 100)
+		// Note: This is a simplified metric showing GC CPU impact
+		// For production, consider using more comprehensive CPU tracking
+		cpuUsagePercent := (cpuDelta.Seconds() / elapsed) * 100.0 / float64(runtime.NumCPU())
+
+		// Cap at 100% to handle edge cases
+		if cpuUsagePercent > 100.0 {
+			cpuUsagePercent = 100.0
+		}
+
+		pm.cpuUsage.Set(cpuUsagePercent)
+		pm.lastCPUTime = currentCPUTime
+		pm.lastCheckTime = now
+	}
+
+	if pm.gw.ConnectionWatcher != nil {
+		pm.connections.Set(float64(pm.gw.ConnectionWatcher.Count()))
+	}
+}
+
+// UpdateGatewayMetrics updates gateway-specific metrics
+func (pm *PrometheusMetrics) UpdateGatewayMetrics() {
+	pm.gw.apisMu.RLock()
+	pm.apisLoaded.Set(float64(len(pm.gw.apisByID)))
+	pm.gw.apisMu.RUnlock()
+
+	pm.gw.policiesMu.RLock()
+	pm.policiesLoaded.Set(float64(len(pm.gw.policiesByID)))
+	pm.gw.policiesMu.RUnlock()
+
+	// Throughput from GlobalRate
+	pm.throughput.Set(float64(GlobalRate.Rate()))
+}
+
+// UpdateRedisMetrics updates Redis connection metrics
+func (pm *PrometheusMetrics) UpdateRedisMetrics() {
+	if pm.gw.StorageConnectionHandler != nil {
+		for _, connType := range []string{storage.DefaultConn, storage.CacheConn, storage.AnalyticsConn} {
+			stats := pm.gw.StorageConnectionHandler.GetRedisStats(connType)
+			if stats != nil {
+				labels := prometheus.Labels{"type": connType}
+				pm.redisPoolSize.With(labels).Set(float64(stats.TotalConns))
+				// Active connections = Total - Idle
+				activeConns := stats.TotalConns - stats.IdleConns
+				pm.redisActiveConns.With(labels).Set(float64(activeConns))
+			}
+		}
+	}
+}
+
+// RecordRequest records request metrics (called from middleware/handler)
+func (pm *PrometheusMetrics) RecordRequest(apiID, apiName, method string, statusCode int, totalNs, upstreamNs int64) {
+	// Group status codes to reduce cardinality (2xx, 3xx, 4xx, 5xx)
+	statusClass := fmt.Sprintf("%dxx", statusCode/100)
+
+	// Convert nanoseconds to seconds for Prometheus
+	totalSeconds := float64(totalNs) / 1e9
+	upstreamSeconds := float64(upstreamNs) / 1e9
+	gatewaySeconds := float64(totalNs-upstreamNs) / 1e9
+
+	// Per-API metrics (only if enabled)
+	if pm.enablePerAPIMetrics {
+		labels := prometheus.Labels{
+			"api_id":       apiID,
+			"method":       method,
+			"status_class": statusClass,
+		}
+		pm.requestsTotal.With(labels).Inc()
+
+		durLabels := prometheus.Labels{
+			"api_id": apiID,
+			"method": method,
+		}
+		pm.requestDuration.With(durLabels).Observe(totalSeconds)
+
+		// Record errors with status_class instead of error_type
+		if statusCode >= 400 {
+			errLabels := prometheus.Labels{
+				"api_id":       apiID,
+				"method":       method,
+				"status_class": statusClass,
+			}
+			pm.requestErrors.With(errLabels).Inc()
+		}
+	}
+
+	// Overall Gateway RED metrics (always recorded)
+	gatewayLabels := prometheus.Labels{
+		"method":       method,
+		"status_class": statusClass,
+	}
+	pm.gatewayRequestsTotal.With(gatewayLabels).Inc()
+	pm.gatewayRequestDuration.With(gatewayLabels).Observe(totalSeconds)
+	pm.gatewayRequestUpstreamLatency.With(gatewayLabels).Observe(upstreamSeconds)
+	pm.gatewayRequestGatewayLatency.With(gatewayLabels).Observe(gatewaySeconds)
+}
+
+// RecordMiddlewareExecution records middleware execution time
+func (pm *PrometheusMetrics) RecordMiddlewareExecution(middlewareName, apiID string, duration float64) {
+	pm.middlewareExecTime.With(prometheus.Labels{
+		"middleware_name": middlewareName,
+		"api_id":          apiID,
+	}).Observe(duration)
+}
+
+// Handler returns the HTTP handler for metrics endpoint
+func (pm *PrometheusMetrics) Handler() http.Handler {
+	return promhttp.HandlerFor(pm.registry, promhttp.HandlerOpts{})
+}
+
+// StartMetricsCollection starts background metrics collection
+func (pm *PrometheusMetrics) StartMetricsCollection(ctx context.Context) {
+	ticker := time.NewTicker(5 * time.Second)
+
+	go func() {
+		defer ticker.Stop()
+		defer close(pm.collectionDone)
+		for {
+			select {
+			case <-ctx.Done():
+				log.WithFields(logrus.Fields{
+					"component": "prometheus",
+				}).Info("Stopping Prometheus metrics collection")
+				return
+			case <-ticker.C:
+				pm.UpdateSystemMetrics()
+				pm.UpdateGatewayMetrics()
+				pm.UpdateRedisMetrics()
+			}
+		}
+	}()
+}
+
+// WaitForShutdown waits for the metrics collection goroutine to finish
+func (pm *PrometheusMetrics) WaitForShutdown() {
+	if pm.collectionDone != nil {
+		<-pm.collectionDone
+	}
+}
diff --git a/gateway/instrumentation_prometheus_sink.go b/gateway/instrumentation_prometheus_sink.go
new file mode 100644
index 00000000000..0c8532d98b6
--- /dev/null
+++ b/gateway/instrumentation_prometheus_sink.go
@@ -0,0 +1,56 @@
+package gateway
+
+import (
+	"github.com/gocraft/health"
+)
+
+// PrometheusSink implements health.Sink interface for Prometheus
+type PrometheusSink struct {
+	metrics *PrometheusMetrics
+}
+
+// NewPrometheusSink creates a new Prometheus sink
+func NewPrometheusSink(metrics *PrometheusMetrics) *PrometheusSink {
+	return &PrometheusSink{
+		metrics: metrics,
+	}
+}
+
+// EmitEvent converts events to Prometheus metrics
+func (s *PrometheusSink) EmitEvent(job, event string, kvs map[string]string) {
+	// Events are tracked through specific metric collectors
+	// Can be extended if specific event tracking is needed
+}
+
+// EmitEventErr tracks errors in Prometheus
+func (s *PrometheusSink) EmitEventErr(job, event string, err error, kvs map[string]string) {
+	// Error tracking is handled by RecordRequest for HTTP errors
+	// Can be extended for non-HTTP errors if needed
+}
+
+// EmitTiming converts timing events to histograms
+func (s *PrometheusSink) EmitTiming(job, event string, nanos int64, kvs map[string]string) {
+	seconds := float64(nanos) / 1e9
+
+	// Track middleware execution timing
+	if job == "MiddlewareCall" {
+		if mwName, ok := kvs["mw_name"]; ok {
+			apiID := kvs["api_id"]
+			s.metrics.RecordMiddlewareExecution(mwName, apiID, seconds)
+		}
+	}
+
+	// Can be extended for other timing events
+}
+
+// EmitGauge handles gauge metrics
+func (s *PrometheusSink) EmitGauge(job, event string, value float64, kvs map[string]string) {
+	// Gauge metrics are updated periodically by UpdateSystemMetrics/UpdateGatewayMetrics
+	// Can be extended if specific gauge tracking is needed beyond the existing collectors
+}
+
+// EmitComplete tracks completion status
+func (s *PrometheusSink) EmitComplete(job string, status health.CompletionStatus, nanos int64, kvs map[string]string) {
+	// Completion status is tracked through RecordRequest
+	// Can be extended if additional completion tracking is needed
+}
diff --git a/gateway/server.go b/gateway/server.go
index fd7551f3c4a..00821d566ec 100644
--- a/gateway/server.go
+++ b/gateway/server.go
@@ -132,6 +132,9 @@ type Gateway struct {
 	HostCheckerClient    *http.Client
 	TracerProvider       otel.TracerProvider
 	NewRelicApplication  *newrelic.Application
+	PrometheusMetrics    *PrometheusMetrics
+	prometheusServer     *http.Server
+	prometheusServerMu   sync.RWMutex
 
 	keyGen DefaultKeyGenerator
 
@@ -1502,6 +1505,7 @@ func (gw *Gateway) initSystem() error {
 	config.Global = gw.GetConfig
 	gw.getHostDetails()
 	gw.setupInstrumentation()
+	gw.setupPrometheusInstrumentation()
 
 	// cleanIdleMemConnProviders checks memconn.Provider (a part of internal API handling)
 	// instances periodically and deletes idle items, closes net.Listener instances to
@@ -2241,6 +2245,29 @@ func (gw *Gateway) gracefulShutdown(ctx context.Context) error {
 		<-serverShutdownDone
 	}
 
+	// Shutdown Prometheus metrics collection and server if enabled
+	if gw.PrometheusMetrics != nil {
+		mainLog.Info("Waiting for Prometheus metrics collection to stop...")
+		gw.PrometheusMetrics.WaitForShutdown()
+		mainLog.Info("Prometheus metrics collection stopped")
+	}
+
+	gw.prometheusServerMu.RLock()
+	server := gw.prometheusServer
+	gw.prometheusServerMu.RUnlock()
+
+	if server != nil {
+		mainLog.Info("Shutting down Prometheus metrics server...")
+		shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+		if err := server.Shutdown(shutdownCtx); err != nil {
+			mainLog.WithError(err).Error("Error shutting down Prometheus server")
+			errChan <- err
+		} else {
+			mainLog.Info("Prometheus metrics server shut down gracefully")
+		}
+	}
+
 	// Close all cache stores and other resources
 	mainLog.Info("Closing cache stores and other resources...")
 	gw.cacheClose()
diff --git a/generate-traffic.sh b/generate-traffic.sh
new file mode 100755
index 00000000000..d4bededa40c
--- /dev/null
+++ b/generate-traffic.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# Traffic Generator for Tyk Quickstart Endpoint
+# Usage: ./generate-traffic.sh [OPTIONS]
+
+set -e
+
+# Default configuration
+ENDPOINT="${ENDPOINT:-http://localhost:9009/quickstart/}"
+REQUESTS="${REQUESTS:-100}"
+CONCURRENT="${CONCURRENT:-1}"
+DELAY="${DELAY:-0.1}"
+VERBOSE="${VERBOSE:-false}"
+
+# Color output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Display usage
+usage() {
+    cat << EOF
+Traffic Generator for Tyk Gateway
+
+Usage: $0 [OPTIONS]
+
+Options:
+    -e, --endpoint URL       Target endpoint (default: http://localhost:9009/quickstart/)
+    -n, --requests NUM       Number of requests to make (default: 100)
+    -c, --concurrent NUM     Number of concurrent requests (default: 1)
+    -d, --delay SECONDS      Delay between requests in seconds (default: 0.1)
+    -v, --verbose            Enable verbose output
+    -h, --help               Show this help message
+
+Environment Variables:
+    ENDPOINT                 Same as --endpoint
+    REQUESTS                 Same as --requests
+    CONCURRENT               Same as --concurrent
+    DELAY                    Same as --delay
+    VERBOSE                  Set to 'true' for verbose output
+
+Examples:
+    # Generate 100 requests with default settings
+    $0
+
+    # Generate 1000 requests with 10 concurrent connections
+    $0 -n 1000 -c 10
+
+    # Custom endpoint with verbose output
+    $0 -e http://localhost:8080/api/test -v
+
+    # High traffic simulation (1000 requests, 50 concurrent, no delay)
+    $0 -n 1000 -c 50 -d 0
+
+EOF
+    exit 0
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -e|--endpoint)
+            ENDPOINT="$2"
+            shift 2
+            ;;
+        -n|--requests)
+            REQUESTS="$2"
+            shift 2
+            ;;
+        -c|--concurrent)
+            CONCURRENT="$2"
+            shift 2
+            ;;
+        -d|--delay)
+            DELAY="$2"
+            shift 2
+            ;;
+        -v|--verbose)
+            VERBOSE=true
+            shift
+            ;;
+        -h|--help)
+            usage
+            ;;
+        *)
+            echo -e "${RED}Error: Unknown option $1${NC}"
+            usage
+            ;;
+    esac
+done
+
+# Validate inputs
+if ! [[ "$REQUESTS" =~ ^[0-9]+$ ]] || [ "$REQUESTS" -lt 1 ]; then
+    echo -e "${RED}Error: REQUESTS must be a positive integer${NC}"
+    exit 1
+fi
+
+if ! [[ "$CONCURRENT" =~ ^[0-9]+$ ]] || [ "$CONCURRENT" -lt 1 ]; then
+    echo -e "${RED}Error: CONCURRENT must be a positive integer${NC}"
+    exit 1
+fi
+
+# Statistics
+SUCCESS_COUNT=0
+FAIL_COUNT=0
+TOTAL_TIME=0
+START_TIME=$(date +%s)
+
+# Temporary file for parallel processing
+TEMP_DIR=$(mktemp -d)
+trap "rm -rf $TEMP_DIR" EXIT
+
+# Function to make a single request
+make_request() {
+    local request_id=$1
+    local start=$(date +%s.%N)
+
+    if [ "$VERBOSE" = true ]; then
+        response=$(curl -s -w "\n%{http_code}" -X GET "$ENDPOINT" 2>&1)
+        status_code=$(echo "$response" | tail -n1)
+        body=$(echo "$response" | sed '$d')
+
+        end=$(date +%s.%N)
+        duration=$(echo "$end - $start" | bc)
+
+        echo -e "${BLUE}[Request $request_id]${NC} Status: $status_code | Duration: ${duration}s"
+        if [ ${#body} -gt 0 ] && [ ${#body} -lt 200 ]; then
+            echo -e "${YELLOW}Response: $body${NC}"
+        fi
+    else
+        status_code=$(curl -s -o /dev/null -w "%{http_code}" -X GET "$ENDPOINT" 2>&1)
+        end=$(date +%s.%N)
+        duration=$(echo "$end - $start" | bc)
+    fi
+
+    # Write result to temp file
+    echo "$status_code|$duration" > "$TEMP_DIR/$request_id"
+}
+
+# Print configuration
+echo -e "${GREEN}=== Tyk Traffic Generator ===${NC}"
+echo -e "Endpoint:    ${BLUE}$ENDPOINT${NC}"
+echo -e "Requests:    ${BLUE}$REQUESTS${NC}"
+echo -e "Concurrent:  ${BLUE}$CONCURRENT${NC}"
+echo -e "Delay:       ${BLUE}${DELAY}s${NC}"
+echo -e "Verbose:     ${BLUE}$VERBOSE${NC}"
+echo ""
+
+# Generate traffic
+echo -e "${YELLOW}Generating traffic...${NC}"
+echo ""
+
+for ((i=1; i<=REQUESTS; i++)); do
+    # Launch request in background if concurrent
+    if [ "$CONCURRENT" -gt 1 ]; then
+        # Wait if we've reached concurrent limit
+        while [ $(jobs -r | wc -l) -ge "$CONCURRENT" ]; do
+            sleep 0.01
+        done
+        make_request $i &
+    else
+        make_request $i
+    fi
+
+    # Progress indicator (every 10%)
+    if [ $((i % (REQUESTS / 10 + 1))) -eq 0 ] && [ "$VERBOSE" = false ]; then
+        progress=$((i * 100 / REQUESTS))
+        echo -ne "\rProgress: ${progress}% ($i/$REQUESTS)"
+    fi
+
+    # Delay between requests
+    if [ "$DELAY" != "0" ] && [ "$CONCURRENT" -eq 1 ]; then
+        sleep "$DELAY"
+    fi
+done
+
+# Wait for all background jobs to complete
+wait
+
+if [ "$VERBOSE" = false ]; then
+    echo -ne "\rProgress: 100% ($REQUESTS/$REQUESTS)\n"
+fi
+
+echo ""
+echo -e "${YELLOW}Processing results...${NC}"
+
+# Collect statistics from temp files
+TOTAL_DURATION=0
+MIN_DURATION=999999
+MAX_DURATION=0
+
+for file in "$TEMP_DIR"/*; do
+    if [ -f "$file" ]; then
+        IFS='|' read -r status duration < "$file"
+
+        if [ "$status" = "200" ]; then
+            ((SUCCESS_COUNT++))
+        else
+            ((FAIL_COUNT++))
+        fi
+
+        TOTAL_DURATION=$(echo "$TOTAL_DURATION + $duration" | bc)
+
+        if [ $(echo "$duration < $MIN_DURATION" | bc) -eq 1 ]; then
+            MIN_DURATION=$duration
+        fi
+
+        if [ $(echo "$duration > $MAX_DURATION" | bc) -eq 1 ]; then
+            MAX_DURATION=$duration
+        fi
+    fi
+done
+
+END_TIME=$(date +%s)
+ELAPSED=$((END_TIME - START_TIME))
+
+# Calculate average
+if [ "$SUCCESS_COUNT" -gt 0 ]; then
+    AVG_DURATION=$(echo "scale=4; $TOTAL_DURATION / $SUCCESS_COUNT" | bc)
+else
+    AVG_DURATION=0
+fi
+
+# Calculate requests per second
+if [ "$ELAPSED" -gt 0 ]; then
+    RPS=$(echo "scale=2; $REQUESTS / $ELAPSED" | bc)
+else
+    RPS=0
+fi
+
+# Display results
+echo ""
+echo -e "${GREEN}=== Results ===${NC}"
+echo -e "Total Requests:  ${BLUE}$REQUESTS${NC}"
+echo -e "Successful:      ${GREEN}$SUCCESS_COUNT${NC}"
+echo -e "Failed:          ${RED}$FAIL_COUNT${NC}"
+echo -e "Success Rate:    ${BLUE}$(echo "scale=2; $SUCCESS_COUNT * 100 / $REQUESTS" | bc)%${NC}"
+echo ""
+echo -e "${GREEN}=== Performance ===${NC}"
+echo -e "Total Time:      ${BLUE}${ELAPSED}s${NC}"
+echo -e "Requests/sec:    ${BLUE}$RPS${NC}"
+if [ "$SUCCESS_COUNT" -gt 0 ]; then
+    echo -e "Avg Duration:    ${BLUE}${AVG_DURATION}s${NC}"
+    echo -e "Min Duration:    ${BLUE}${MIN_DURATION}s${NC}"
+    echo -e "Max Duration:    ${BLUE}${MAX_DURATION}s${NC}"
+fi
+echo ""
+
+# Exit with error if any requests failed
+if [ "$FAIL_COUNT" -gt 0 ]; then
+    echo -e "${RED}Warning: Some requests failed!${NC}"
+    exit 1
+fi
+
+echo -e "${GREEN}Traffic generation completed successfully!${NC}"
diff --git a/internal/redis/redis.go b/internal/redis/redis.go
index 2a3feb0afd5..37f3e1ac292 100644
--- a/internal/redis/redis.go
+++ b/internal/redis/redis.go
@@ -38,4 +38,6 @@ type (
 	IntCmd         = redis.IntCmd
 	StringCmd      = redis.StringCmd
 	StringSliceCmd = redis.StringSliceCmd
+
+	PoolStats = redis.PoolStats
 )
diff --git a/storage/connection_handler.go b/storage/connection_handler.go
index de22cd7d05b..ca11addd1f4 100644
--- a/storage/connection_handler.go
+++ b/storage/connection_handler.go
@@ -294,3 +294,35 @@ func getExponentialBackoff() *backoff.ExponentialBackOff {
 
 	return exponentialBackoff
 }
+
+// GetRedisStats returns Redis connection pool statistics for the specified connection type
+func (rc *ConnectionHandler) GetRedisStats(connType string) *RedisPoolStats {
+	rc.connectionsMu.RLock()
+	defer rc.connectionsMu.RUnlock()
+
+	conn, ok := rc.connections[connType]
+	if !ok || conn == nil {
+		return nil
+	}
+
+	// Try to get the connector as a RedisCluster
+	var redisCluster *RedisCluster
+	if ok := conn.As(&redisCluster); ok && redisCluster != nil {
+		if stats := redisCluster.PoolStats(); stats != nil {
+			return &RedisPoolStats{
+				TotalConns: stats.TotalConns,
+				IdleConns:  stats.IdleConns,
+				StaleConns: stats.StaleConns,
+			}
+		}
+	}
+
+	return nil
+}
+
+// RedisPoolStats holds simplified Redis pool statistics for Prometheus metrics
+type RedisPoolStats struct {
+	TotalConns uint32
+	IdleConns  uint32
+	StaleConns uint32
+}
diff --git a/storage/redis_cluster.go b/storage/redis_cluster.go
index eaaf3f22a4c..5fcd75c42d4 100644
--- a/storage/redis_cluster.go
+++ b/storage/redis_cluster.go
@@ -1086,3 +1086,15 @@ func (r *RedisCluster) ScanKeys(pattern string) ([]string, error) {
 
 	return storage.Keys(context.Background(), pattern)
 }
+
+// PoolStats returns the connection pool statistics for this Redis cluster
+func (r *RedisCluster) PoolStats() *redis.PoolStats {
+	client, err := r.Client()
+	if err != nil {
+		log.WithError(err).Debug("Failed to get Redis client for pool stats")
+		return nil
+	}
+
+	stats := client.PoolStats()
+	return stats
+}
diff --git a/tyk.conf.example b/tyk.conf.example
index 2ee6c6647f7..2df9dc236ff 100644
--- a/tyk.conf.example
+++ b/tyk.conf.example
@@ -21,6 +21,15 @@
     "type": "",
     "ignored_ips": []
   },
+  "prometheus": {
+    "enabled": false,
+    "listen_address": ":9090",
+    "path": "/metrics",
+    "metric_prefix": "tyk_gateway",
+    "enable_go_collector": true,
+    "enable_process_collector": true,
+    "enable_per_api_metrics": false
+  },
   "dns_cache": {
     "enabled": false,
     "ttl": 3600,