feat: add tracing using otel

This commit is contained in:
Alexis Couvreur
2025-11-22 10:43:59 -05:00
parent dc5fb22b40
commit 1de18d9a7d
12 changed files with 547 additions and 33 deletions

View File

@@ -4,6 +4,7 @@
- [Configuration](/configuration)
- [Strategies](/strategies)
- [Themes](/themes)
- [Tracing](/tracing)
- [Versioning](/versioning)
- **Providers**
- [Overview](/providers/overview)

212
docs/tracing.md Normal file
View File

@@ -0,0 +1,212 @@
# Observability with OpenTelemetry
Sablier includes built-in support for OpenTelemetry, providing comprehensive observability through distributed tracing and metrics.
## Configuration
Enable OpenTelemetry by setting the following configuration:
### YAML Configuration
```yaml
tracing:
enabled: true
endpoint: localhost:4317 # OTLP gRPC endpoint
```
### Environment Variables
```bash
export TRACING_ENABLED=true
export TRACING_ENDPOINT=localhost:4317
```
### Command-Line Flags
```bash
sablier start --tracing.enabled=true --tracing.endpoint=localhost:4317
```
## Traces
Sablier automatically instruments:
- **HTTP requests** - All incoming requests to the Sablier server
- **Provider operations** - Instance start, stop, inspect, list, and group operations
- **Session management** - Session creation and lifecycle
### Trace Attributes
Each trace includes relevant attributes such as:
- `instance` - Instance name
- `provider` - Provider type (docker, kubernetes, etc.)
- `strategy` - Scaling strategy (dynamic, blocking)
- `http.method` - HTTP method
- `http.route` - HTTP route
- `http.status_code` - HTTP response status code
## Metrics
Sablier exposes the following metrics:
### Counters
- `sablier.sessions.total` - Total number of sessions created
- Labels: `strategy`
- `sablier.instances.started` - Total number of instances started
- Labels: `provider`
- `sablier.instances.stopped` - Total number of instances stopped
- Labels: `provider`
### Gauges
- `sablier.sessions.active` - Number of currently active sessions
- Labels: `strategy`
### Histograms
- `sablier.requests.duration` - Request duration in milliseconds
- Labels: `strategy`, `status`
## Using with Jaeger
Example using Jaeger all-in-one:
```bash
# Start Jaeger
docker run -d --name jaeger \
-p 16686:16686 \
-p 4317:4317 \
jaegertracing/all-in-one:latest
# Start Sablier with tracing
sablier start --tracing.enabled=true --tracing.endpoint=localhost:4317
# View traces at http://localhost:16686
```
## Using with Prometheus + Grafana
Example docker-compose setup:
```yaml
version: '3'
services:
otel-collector:
image: otel/opentelemetry-collector:latest
command: ["--config=/etc/otel-config.yaml"]
volumes:
- ./otel-config.yaml:/etc/otel-config.yaml
ports:
- "4317:4317"
- "8889:8889"
prometheus:
image: prom/prometheus:latest
volumes:
- ./prometheus.yaml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
sablier:
image: sablierapp/sablier:latest
environment:
- TRACING_ENABLED=true
- TRACING_ENDPOINT=otel-collector:4317
volumes:
- /var/run/docker.sock:/var/run/docker.sock
```
Example OpenTelemetry Collector configuration (`otel-config.yaml`):
```yaml
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
processors:
batch:
exporters:
prometheus:
endpoint: "0.0.0.0:8889"
otlp:
endpoint: jaeger:4317
tls:
insecure: true
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlp]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [prometheus]
```
Example Prometheus configuration (`prometheus.yaml`):
```yaml
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'otel-collector'
static_configs:
- targets: ['otel-collector:8889']
```
## Custom Instrumentation
If you're building custom integrations, you can use the global tracer:
```go
import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
)
tracer := otel.Tracer("my-component")
ctx, span := tracer.Start(ctx, "operation-name")
defer span.End()
span.SetAttributes(
attribute.String("key", "value"),
)
// Your code here
```
## Troubleshooting
### Tracing not working
1. Verify the OpenTelemetry collector is running and accessible
2. Check the endpoint configuration matches your collector
3. Ensure firewall rules allow connections to port 4317
4. Check Sablier logs for tracing initialization errors
### High memory usage
If you experience high memory usage:
- Reduce the batch size in the collector
- Increase the export interval
- Filter traces and metrics at the collector level
### Missing traces
- Ensure `tracing.enabled=true` is set
- Verify the collector is configured to receive OTLP data
- Check that the correct port (4317 for gRPC) is being used

29
go.mod
View File

@@ -29,7 +29,17 @@ require (
k8s.io/client-go v0.34.2
)
require github.com/containers/image/v5 v5.36.2
require (
github.com/containers/image/v5 v5.36.2
go.opentelemetry.io/otel v1.38.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0
go.opentelemetry.io/otel/metric v1.38.0
go.opentelemetry.io/otel/sdk v1.38.0
go.opentelemetry.io/otel/sdk/metric v1.38.0
go.opentelemetry.io/otel/trace v1.38.0
)
require (
dario.cat/mergo v1.0.2 // indirect
@@ -51,6 +61,7 @@ require (
github.com/bytedance/sonic v1.14.0 // indirect
github.com/bytedance/sonic/loader v0.3.0 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
github.com/charmbracelet/bubbles v0.20.0 // indirect
github.com/charmbracelet/bubbletea v1.3.3 // indirect
github.com/charmbracelet/lipgloss v1.0.0 // indirect
@@ -89,10 +100,10 @@ require (
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/fxamacker/cbor/v2 v2.9.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.8 // indirect
github.com/gabriel-vasile/mimetype v1.4.10 // indirect
github.com/gin-contrib/sse v1.1.0 // indirect
github.com/go-delve/delve v1.24.0 // indirect
github.com/go-jose/go-jose/v4 v4.0.5 // indirect
github.com/go-jose/go-jose/v4 v4.1.1 // indirect
github.com/go-json-experiment/json v0.0.0-20250213060926-925ba3f173fa // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
@@ -116,6 +127,7 @@ require (
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/gorilla/schema v1.4.1 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hpcloud/tail v1.0.0 // indirect
@@ -211,10 +223,9 @@ require (
github.com/yusufpapurcu/wmi v1.2.4 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin v0.63.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect
go.opentelemetry.io/otel v1.35.0 // indirect
go.opentelemetry.io/otel/metric v1.35.0 // indirect
go.opentelemetry.io/otel/trace v1.35.0 // indirect
go.opentelemetry.io/proto/otlp v1.7.1 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/arch v0.20.0 // indirect
@@ -228,9 +239,9 @@ require (
golang.org/x/text v0.30.0 // indirect
golang.org/x/time v0.11.0 // indirect
golang.org/x/tools v0.37.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4 // indirect
google.golang.org/grpc v1.72.2 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect
google.golang.org/grpc v1.75.0 // indirect
google.golang.org/protobuf v1.36.9 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/fsnotify.v1 v1.4.7 // indirect

60
go.sum
View File

@@ -48,6 +48,8 @@ github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZw
github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
@@ -164,14 +166,16 @@ github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sa
github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
github.com/gabriel-vasile/mimetype v1.4.10 h1:zyueNbySn/z8mJZHLt6IPw0KoZsiQNszIpU+bX4+ZK0=
github.com/gabriel-vasile/mimetype v1.4.10/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM=
github.com/gin-gonic/gin v1.11.0 h1:OW/6PLjyusp2PPXtyxKHU0RbX6I/l28FTdDlae5ueWk=
github.com/gin-gonic/gin v1.11.0/go.mod h1:+iq/FyxlGzII0KHiBGjuNn4UNENUlKbGlNmc+W50Dls=
github.com/go-delve/delve v1.24.0 h1:M1auuI7kyfXZm5LMDQEqhqr4koKWOzGKhCgwMxsLQfo=
github.com/go-delve/delve v1.24.0/go.mod h1:yNWXOuo4yslMOOj7O8gIRrf/trDBrFy5ZXwJL4ZzOos=
github.com/go-jose/go-jose/v4 v4.0.5 h1:M6T8+mKZl/+fNNuFHvGIzDz7BTLQPIounk/b9dw3AaE=
github.com/go-jose/go-jose/v4 v4.0.5/go.mod h1:s3P1lRrkT8igV8D9OjyL4WRyHvjB6a4JSllnOrmmBOA=
github.com/go-jose/go-jose/v4 v4.1.1 h1:JYhSgy4mXXzAdF3nUx3ygx347LRXJRrpgyU3adRmkAI=
github.com/go-jose/go-jose/v4 v4.1.1/go.mod h1:BdsZGqgdO3b6tTc6LSE56wcDbMMLuPsw5d4ZD5f94kA=
github.com/go-json-experiment/json v0.0.0-20250213060926-925ba3f173fa h1:Rpu6sKAzIeSWBkrFHD52g8yipagcPbY2Lmm70NL1Gzc=
github.com/go-json-experiment/json v0.0.0-20250213060926-925ba3f173fa/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@@ -254,8 +258,8 @@ github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/gorilla/schema v1.4.1 h1:jUg5hUjCSDZpNGLuXQOgIWGdlgrIdYvgQ0wZtdK1M3E=
github.com/gorilla/schema v1.4.1/go.mod h1:Dg5SSm5PV60mhF2NFaTV1xuYYj8tV8NOPRo4FggUMnM=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 h1:VNqngBF40hVlDloBruUehVYC3ArSgIyScOAyMRqBxRg=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1/go.mod h1:RBRO7fro65R6tjKzYgLAFo0t1QEXY1Dp+i/bvpRiqiQ=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
@@ -535,24 +539,30 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin v0.63.0 h1:5kSIJ0y8ckZZKoDhZHdVtcyjVi6rXyAwyaR8mp4zLbg=
go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin v0.63.0/go.mod h1:i+fIMHvcSQtsIY82/xgiVWRklrNt/O6QriHLjzGeY+s=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ=
go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ=
go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.32.0 h1:IJFEoHiytixx8cMiVAO+GmHR6Frwu+u5Ur8njpFO6Ac=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.32.0/go.mod h1:3rHrKNtLIoS0oZwkY2vxi+oJcwFRWdtUyRII+so45p8=
go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0 h1:vl9obrcoWVKp/lwl8tRE33853I8Xru9HFbw/skNeLs8=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0/go.mod h1:GAXRxmLJcVM3u22IjTg74zWBrRCKq8BnOqUVLodpcpw=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.35.0 h1:xJ2qHD0C1BeYVTLLR9sX12+Qb95kfeD/byKj6Ky1pXg=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.35.0/go.mod h1:u5BF1xyjstDowA1R5QAO9JHzqK+ublenEW/dyqTjBVk=
go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M=
go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE=
go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY=
go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg=
go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o=
go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w=
go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs=
go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
@@ -693,22 +703,24 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb h1:p31xT4yrYrSM/G4Sn2+TNUkVhFCbG9y8itM2S6Th950=
google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:jbe3Bkdp+Dh2IrslsFCklNhweNTBgSYanP1UXhJDhKg=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4 h1:iK2jbkWL86DXjEx0qiHcRE9dE4/Ahua5k6V8OWFb//c=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY=
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
google.golang.org/grpc v1.72.2 h1:TdbGzwb82ty4OusHWepvFWGLgIbNo1/SUynEN0ssqv8=
google.golang.org/grpc v1.72.2/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM=
google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4=
google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=

View File

@@ -11,6 +11,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/sablierapp/sablier/internal/api"
"github.com/sablierapp/sablier/pkg/config"
"go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin"
)
func setupRouter(ctx context.Context, logger *slog.Logger, serverConf config.Server, s *api.ServeStrategy) *gin.Engine {
@@ -18,6 +19,7 @@ func setupRouter(ctx context.Context, logger *slog.Logger, serverConf config.Ser
r.Use(StructuredLogger(logger))
r.Use(gin.Recovery())
r.Use(otelgin.Middleware("sablier"))
registerRoutes(ctx, r, serverConf, s)

View File

@@ -7,6 +7,7 @@ type Config struct {
Sessions Sessions
Logging Logging
Strategy Strategy
Tracing Tracing
}
func NewConfig() Config {
@@ -17,5 +18,6 @@ func NewConfig() Config {
Sessions: NewSessionsConfig(),
Logging: NewLoggingConfig(),
Strategy: NewStrategyConfig(),
Tracing: NewTracingConfig(),
}
}

13
pkg/config/tracing.go Normal file
View File

@@ -0,0 +1,13 @@
package config
type Tracing struct {
Enabled bool `mapstructure:"ENABLED" yaml:"enabled,omitempty" default:"false"`
Endpoint string `mapstructure:"ENDPOINT" yaml:"endpoint,omitempty" default:"localhost:4317"`
}
func NewTracingConfig() Tracing {
return Tracing{
Enabled: false,
Endpoint: "localhost:4317",
}
}

View File

@@ -83,6 +83,12 @@ It provides integrations with multiple reverse proxies and different loading str
startCmd.Flags().DurationVar(&conf.Strategy.Blocking.DefaultRefreshFrequency, "strategy.blocking.default-refresh-frequency", 5*time.Second, "Default refresh frequency at which the instances status are checked for blocking strategy")
_ = viper.BindPFlag("strategy.blocking.default-refresh-frequency", startCmd.Flags().Lookup("strategy.blocking.default-refresh-frequency"))
// Tracing flags
startCmd.Flags().BoolVar(&conf.Tracing.Enabled, "tracing.enabled", false, "Enable OpenTelemetry tracing and metrics")
_ = viper.BindPFlag("tracing.enabled", startCmd.Flags().Lookup("tracing.enabled"))
startCmd.Flags().StringVar(&conf.Tracing.Endpoint, "tracing.endpoint", "localhost:4317", "OpenTelemetry collector endpoint")
_ = viper.BindPFlag("tracing.endpoint", startCmd.Flags().Lookup("tracing.endpoint"))
rootCmd.AddCommand(startCmd)
rootCmd.AddCommand(NewVersionCmd())

View File

@@ -13,6 +13,7 @@ import (
"github.com/sablierapp/sablier/pkg/config"
"github.com/sablierapp/sablier/pkg/sablier"
"github.com/sablierapp/sablier/pkg/store/inmemory"
"github.com/sablierapp/sablier/pkg/tracing"
"github.com/sablierapp/sablier/pkg/version"
"github.com/spf13/cobra"
"github.com/spf13/viper"
@@ -45,6 +46,26 @@ func Start(ctx context.Context, conf config.Config) error {
logger.Info("running Sablier version " + version.Info())
// Initialize tracing
tracingConfig := tracing.Config{
ServiceName: "sablier",
ServiceVersion: version.Version,
Endpoint: conf.Tracing.Endpoint,
Enabled: conf.Tracing.Enabled,
}
tracer, err := tracing.New(ctx, tracingConfig, logger)
if err != nil {
return fmt.Errorf("failed to initialize tracing: %w", err)
}
defer func() {
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := tracer.Shutdown(shutdownCtx); err != nil {
logger.Error("failed to shutdown tracing", "error", err)
}
}()
provider, err := setupProvider(ctx, logger, conf.Provider)
if err != nil {
return fmt.Errorf("cannot setup provider: %w", err)

109
pkg/tracing/metrics.go Normal file
View File

@@ -0,0 +1,109 @@
package tracing
import (
"context"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)
type Metrics struct {
sessionsActive metric.Int64UpDownCounter
sessionsTotal metric.Int64Counter
instancesStarted metric.Int64Counter
instancesStopped metric.Int64Counter
requestsDuration metric.Float64Histogram
}
func InitMetrics() (*Metrics, error) {
meter := otel.Meter("sablier")
sessionsActive, err := meter.Int64UpDownCounter("sablier.sessions.active",
metric.WithDescription("Number of currently active sessions"))
if err != nil {
return nil, err
}
sessionsTotal, err := meter.Int64Counter("sablier.sessions.total",
metric.WithDescription("Total number of sessions created"))
if err != nil {
return nil, err
}
instancesStarted, err := meter.Int64Counter("sablier.instances.started",
metric.WithDescription("Total number of instances started"))
if err != nil {
return nil, err
}
instancesStopped, err := meter.Int64Counter("sablier.instances.stopped",
metric.WithDescription("Total number of instances stopped"))
if err != nil {
return nil, err
}
requestsDuration, err := meter.Float64Histogram("sablier.requests.duration",
metric.WithDescription("Duration of requests in milliseconds"),
metric.WithUnit("ms"))
if err != nil {
return nil, err
}
return &Metrics{
sessionsActive: sessionsActive,
sessionsTotal: sessionsTotal,
instancesStarted: instancesStarted,
instancesStopped: instancesStopped,
requestsDuration: requestsDuration,
}, nil
}
func (m *Metrics) RecordSessionStart(ctx context.Context, strategy string) {
if m == nil {
return
}
m.sessionsActive.Add(ctx, 1, metric.WithAttributes(
attribute.String("strategy", strategy),
))
m.sessionsTotal.Add(ctx, 1, metric.WithAttributes(
attribute.String("strategy", strategy),
))
}
func (m *Metrics) RecordSessionEnd(ctx context.Context, strategy string) {
if m == nil {
return
}
m.sessionsActive.Add(ctx, -1, metric.WithAttributes(
attribute.String("strategy", strategy),
))
}
func (m *Metrics) RecordInstanceStart(ctx context.Context, provider string) {
if m == nil {
return
}
m.instancesStarted.Add(ctx, 1, metric.WithAttributes(
attribute.String("provider", provider),
))
}
func (m *Metrics) RecordInstanceStop(ctx context.Context, provider string) {
if m == nil {
return
}
m.instancesStopped.Add(ctx, 1, metric.WithAttributes(
attribute.String("provider", provider),
))
}
func (m *Metrics) RecordRequestDuration(ctx context.Context, duration float64, strategy string, status string) {
if m == nil {
return
}
m.requestsDuration.Record(ctx, duration, metric.WithAttributes(
attribute.String("strategy", strategy),
attribute.String("status", status),
))
}

121
pkg/tracing/tracing.go Normal file
View File

@@ -0,0 +1,121 @@
package tracing
import (
"context"
"fmt"
"log/slog"
"time"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
)
type Config struct {
ServiceName string
ServiceVersion string
Endpoint string
Enabled bool
}
type Telemetry struct {
tracerProvider *trace.TracerProvider
meterProvider *metric.MeterProvider
logger *slog.Logger
}
// New initializes OpenTelemetry with tracing and metrics
func New(ctx context.Context, cfg Config, logger *slog.Logger) (*Telemetry, error) {
if !cfg.Enabled {
logger.Info("OpenTelemetry disabled")
return &Telemetry{logger: logger}, nil
}
res, err := resource.New(ctx,
resource.WithAttributes(
semconv.ServiceName(cfg.ServiceName),
semconv.ServiceVersion(cfg.ServiceVersion),
),
)
if err != nil {
return nil, fmt.Errorf("failed to create resource: %w", err)
}
// Setup Tracer Provider
traceExporter, err := otlptrace.New(ctx, otlptracegrpc.NewClient(
otlptracegrpc.WithEndpoint(cfg.Endpoint),
otlptracegrpc.WithInsecure(),
))
if err != nil {
return nil, fmt.Errorf("failed to create trace exporter: %w", err)
}
tracerProvider := trace.NewTracerProvider(
trace.WithBatcher(traceExporter),
trace.WithResource(res),
)
otel.SetTracerProvider(tracerProvider)
// Setup Meter Provider
metricExporter, err := otlpmetricgrpc.New(ctx,
otlpmetricgrpc.WithEndpoint(cfg.Endpoint),
otlpmetricgrpc.WithInsecure(),
)
if err != nil {
return nil, fmt.Errorf("failed to create metric exporter: %w", err)
}
meterProvider := metric.NewMeterProvider(
metric.WithReader(metric.NewPeriodicReader(metricExporter,
metric.WithInterval(10*time.Second))),
metric.WithResource(res),
)
otel.SetMeterProvider(meterProvider)
// Setup propagators
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
))
logger.Info("OpenTelemetry initialized",
"service", cfg.ServiceName,
"version", cfg.ServiceVersion,
"endpoint", cfg.Endpoint)
return &Telemetry{
tracerProvider: tracerProvider,
meterProvider: meterProvider,
logger: logger,
}, nil
}
// Shutdown gracefully shuts down the telemetry providers
func (t *Telemetry) Shutdown(ctx context.Context) error {
if t.tracerProvider == nil && t.meterProvider == nil {
return nil
}
var err error
if t.tracerProvider != nil {
if shutdownErr := t.tracerProvider.Shutdown(ctx); shutdownErr != nil {
err = shutdownErr
t.logger.Error("failed to shutdown tracer provider", "error", err)
}
}
if t.meterProvider != nil {
if shutdownErr := t.meterProvider.Shutdown(ctx); shutdownErr != nil {
err = shutdownErr
t.logger.Error("failed to shutdown meter provider", "error", err)
}
}
return err
}

View File

@@ -10,6 +10,10 @@ sessions:
expiration-interval: 20s
logging:
level: info
# OpenTelemetry tracing and metrics configuration
tracing:
enabled: false
endpoint: "localhost:4317" # OpenTelemetry collector endpoint
strategy:
dynamic:
custom-themes-path: