diff --git a/.serena/memories/metrics_patterns.md b/.serena/memories/metrics_patterns.md index 62d9d4d58..e1aef97c8 100644 --- a/.serena/memories/metrics_patterns.md +++ b/.serena/memories/metrics_patterns.md @@ -3,30 +3,34 @@ ## Current Prometheus Metrics Setup ### Structure -- **Central metrics files**: - - `metrics/gateway.go` - Gateway-level metrics (requests, duration, response size) - - `metrics/server.go` - Prometheus metrics server setup - - `metrics/prometheus_reporter.go` - Metrics reporter interface - - `cmd/metrics.go` - Metrics server initialization -### Protocol-specific metrics +**Central metrics files:** +- `metrics/gateway.go` - Gateway-level metrics (requests, duration, response size) +- `metrics/server.go` - Prometheus metrics server setup +- `metrics/prometheus_reporter.go` - Metrics reporter interface +- `cmd/metrics.go` - Metrics server initialization + +**Protocol-specific metrics:** - `metrics/protocol/shannon/metrics.go` - Shannon protocol metrics -### QoS-specific metrics +**QoS-specific metrics:** - `metrics/qos/evm/metrics.go` - EVM QoS metrics - `metrics/qos/solana/metrics.go` - Solana QoS metrics ## Pattern Analysis ### Common Constants + - All metrics files use `pathProcess = "path"` as the subsystem name - Each file defines specific metric name constants (e.g., `requestsTotal`, `relaysTotalMetric`) ### Registration Pattern + - Each metrics file has an `init()` function that calls `prometheus.MustRegister()` for its metrics - Metrics are defined as package-level variables using `prometheus.NewCounterVec`, `prometheus.NewHistogramVec` ### Metric Definition Structure + ```go var myMetric = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -39,11 +43,13 @@ var myMetric = prometheus.NewCounterVec( ``` ### Publishing Pattern + - Each metrics file has a `PublishMetrics()` function - Uses `prometheus.Labels{}` to set label values - Calls `.With(labels).Inc()` or `.Observe(value)` on metrics ## Version Handling + - Project uses git-based versioning: `git describe --tags --always --dirty` - No existing version metric found in current setup -- Build process uses ldflags in `makefiles/release.mk` \ No newline at end of file +- Build process uses ldflags in `makefiles/release.mk` diff --git a/gateway/http_request_context_handle_request.go b/gateway/http_request_context_handle_request.go index e8818636e..f9712fb19 100644 --- a/gateway/http_request_context_handle_request.go +++ b/gateway/http_request_context_handle_request.go @@ -10,6 +10,7 @@ import ( "github.com/pokt-network/poktroll/pkg/polylog" shannonmetrics "github.com/buildwithgrove/path/metrics/protocol/shannon" + "github.com/buildwithgrove/path/observation" "github.com/buildwithgrove/path/protocol" ) @@ -59,6 +60,8 @@ func (rc *requestContext) HandleRelayRequest() error { // If we have multiple protocol contexts, send parallel requests if isParallel { + // TODO_CONSIDERATION: Should only organic requests ever have the option to be parallelized? + // This will require a reconsideration of this feature, how observation are managed, etc... logger.Debug().Msgf("Handling %d parallel relay requests", len(rc.protocolContexts)) return rc.handleParallelRelayRequests() } @@ -70,9 +73,11 @@ func (rc *requestContext) HandleRelayRequest() error { // handleSingleRelayRequest handles a single relay request (original behavior) func (rc *requestContext) handleSingleRelayRequest() error { - // Send the service request payload, through the protocol context, to the selected endpoint. // In this code path, we are always guaranteed to have exactly one protocol context. - endpointResponse, err := rc.protocolContexts[0].HandleServiceRequest(rc.qosCtx.GetServicePayload()) + protocolRequestContext := rc.protocolContexts[0] + + // Send the service request payload, through the protocol context, to the selected endpoint. + endpointResponse, err := protocolRequestContext.HandleServiceRequest(rc.qosCtx.GetServicePayload()) if err != nil { rc.logger.Warn().Err(err).Msg("Failed to send a single relay request.") return err @@ -84,6 +89,10 @@ func (rc *requestContext) handleSingleRelayRequest() error { // handleParallelRelayRequests orchestrates parallel relay requests and returns the first successful response. func (rc *requestContext) handleParallelRelayRequests() error { + // Only the gateway context can trigger parallel requests. + // We update the request type accordingly here. + rc.gatewayObservations.RequestType = observation.RequestType_REQUEST_TYPE_PARALLEL + metrics := ¶llelRequestMetrics{ numRequestsToAttempt: len(rc.protocolContexts), overallStartTime: time.Now(), @@ -134,13 +143,13 @@ func (rc *requestContext) launchParallelRequests(ctx context.Context, logger pol func (rc *requestContext) executeOneOfParallelRequests( ctx context.Context, logger polylog.Logger, - protocolCtx ProtocolRequestContext, + protocolRequestCtx ProtocolRequestContext, index int, resultChan chan<- parallelRelayResult, qosContextMutex *sync.Mutex, ) { startTime := time.Now() - endpointResponse, err := protocolCtx.HandleServiceRequest(rc.qosCtx.GetServicePayload()) + endpointResponse, err := protocolRequestCtx.HandleServiceRequest(rc.qosCtx.GetServicePayload()) duration := time.Since(startTime) result := parallelRelayResult{ diff --git a/gateway/observation.go b/gateway/observation.go index d3d8d0528..b684ca2af 100644 --- a/gateway/observation.go +++ b/gateway/observation.go @@ -67,7 +67,7 @@ const ( // regionEuropeNorth1 = "europe-north1" // Europe North region ) -// ---------- User Requests ---------- +// ---------- User (i.e. Organic) Requests ---------- // getUserRequestGatewayObservations returns gateway-level observations for an organic, i.e. from a user, request. func getUserRequestGatewayObservations(httpReq *http.Request) *observation.GatewayObservations { diff --git a/observation/gateway.pb.go b/observation/gateway.pb.go index 9b382c52a..65fe0ab4e 100644 --- a/observation/gateway.pb.go +++ b/observation/gateway.pb.go @@ -26,17 +26,21 @@ const ( ) // RequestType captures the origin of the request. -// As of PR #72, it is one of: -// 1. Organic: a real user sent a service request to a PATH instance -// 2. Synthetic: internal infrastructure generated the service request for simulation and data purposes. +// Next free index: 5 type RequestType int32 const ( RequestType_REQUEST_TYPE_UNSPECIFIED RequestType = 0 - // Organic: Service request sent by a user. + // Organic: A real user sent a service request to a PATH instance RequestType_REQUEST_TYPE_ORGANIC RequestType = 1 - // Synthetic: Service request sent by the endpoint hydrator: see gateway/hydrator.go. + // Synthetic: Internal infrastructure generated the service request for simulation and data purposes. RequestType_REQUEST_TYPE_SYNTHETIC RequestType = 2 + // Parallel: PATH sent the service request for higher redundancy + RequestType_REQUEST_TYPE_PARALLEL RequestType = 3 + // Fallback: PATH sent the service request for higher redundancy + // TODO_TECHDEBT(@adshmh): Use this when fallback endpoints are part of the gateway context. + // DUe to the current structure of the code, there is not clean way to apply this type to the RPC observations.. + RequestType_REQUEST_TYPE_FALLBACK RequestType = 4 ) // Enum value maps for RequestType. @@ -45,11 +49,15 @@ var ( 0: "REQUEST_TYPE_UNSPECIFIED", 1: "REQUEST_TYPE_ORGANIC", 2: "REQUEST_TYPE_SYNTHETIC", + 3: "REQUEST_TYPE_PARALLEL", + 4: "REQUEST_TYPE_FALLBACK", } RequestType_value = map[string]int32{ "REQUEST_TYPE_UNSPECIFIED": 0, "REQUEST_TYPE_ORGANIC": 1, "REQUEST_TYPE_SYNTHETIC": 2, + "REQUEST_TYPE_PARALLEL": 3, + "REQUEST_TYPE_FALLBACK": 4, } ) @@ -414,11 +422,13 @@ const file_path_gateway_proto_rawDesc = "" + "\x0enum_successful\x18\x02 \x01(\x05R\rnumSuccessful\x12\x1d\n" + "\n" + "num_failed\x18\x03 \x01(\x05R\tnumFailed\x12!\n" + - "\fnum_canceled\x18\x04 \x01(\x05R\vnumCanceled*a\n" + + "\fnum_canceled\x18\x04 \x01(\x05R\vnumCanceled*\x97\x01\n" + "\vRequestType\x12\x1c\n" + "\x18REQUEST_TYPE_UNSPECIFIED\x10\x00\x12\x18\n" + "\x14REQUEST_TYPE_ORGANIC\x10\x01\x12\x1a\n" + - "\x16REQUEST_TYPE_SYNTHETIC\x10\x02*\x9e\x02\n" + + "\x16REQUEST_TYPE_SYNTHETIC\x10\x02\x12\x19\n" + + "\x15REQUEST_TYPE_PARALLEL\x10\x03\x12\x19\n" + + "\x15REQUEST_TYPE_FALLBACK\x10\x04*\x9e\x02\n" + "\x17GatewayRequestErrorKind\x12*\n" + "&GATEWAY_REQUEST_ERROR_KIND_UNSPECIFIED\x10\x00\x121\n" + "-GATEWAY_REQUEST_ERROR_KIND_MISSING_SERVICE_ID\x10\x01\x12.\n" + diff --git a/observation/protocol/shannon.pb.go b/observation/protocol/shannon.pb.go index 4198c9873..5e4066d3f 100644 --- a/observation/protocol/shannon.pb.go +++ b/observation/protocol/shannon.pb.go @@ -783,6 +783,7 @@ func (x *ShannonWebsocketMessageObservation) GetIsFallbackEndpoint() bool { // ShannonRequestObservations represents observations collected during the processing // of a single Shannon protocol relay request. +// Next free field: 6 type ShannonRequestObservations struct { state protoimpl.MessageState `protogen:"open.v1"` // Service ID (i.e. chain ID) for which the observation was made diff --git a/proto/path/gateway.proto b/proto/path/gateway.proto index 8065a90dd..540e0544e 100644 --- a/proto/path/gateway.proto +++ b/proto/path/gateway.proto @@ -10,17 +10,23 @@ import "google/protobuf/timestamp.proto"; import "path/auth.proto"; // import RequestAuth message. // RequestType captures the origin of the request. -// As of PR #72, it is one of: -// 1. Organic: a real user sent a service request to a PATH instance -// 2. Synthetic: internal infrastructure generated the service request for simulation and data purposes. +// Next free index: 5 enum RequestType { REQUEST_TYPE_UNSPECIFIED = 0; - // Organic: Service request sent by a user. + // Organic: A real user sent a service request to a PATH instance REQUEST_TYPE_ORGANIC = 1; - // Synthetic: Service request sent by the endpoint hydrator: see gateway/hydrator.go. + // Synthetic: Internal infrastructure generated the service request for simulation and data purposes. REQUEST_TYPE_SYNTHETIC = 2; + + // Parallel: PATH sent the service request for higher redundancy + REQUEST_TYPE_PARALLEL = 3; + + // Fallback: PATH sent the service request for higher redundancy + // TODO_TECHDEBT(@adshmh): Use this when fallback endpoints are part of the gateway context. + // DUe to the current structure of the code, there is not clean way to apply this type to the RPC observations.. + REQUEST_TYPE_FALLBACK = 4; } enum GatewayRequestErrorKind { diff --git a/protocol/shannon/context.go b/protocol/shannon/context.go index cfbcbc695..a32addf0c 100644 --- a/protocol/shannon/context.go +++ b/protocol/shannon/context.go @@ -92,6 +92,7 @@ type requestContext struct { httpClient *httpClientWithDebugMetrics // fallbackEndpoints is used to retrieve a fallback endpoint by an endpoint address. + // TODO_TECHDEBT(@adshmh): This should be part of the gateway context and not the protocol specific request context. fallbackEndpoints map[protocol.EndpointAddr]endpoint } @@ -173,6 +174,9 @@ func (rc *requestContext) executeRelayRequest(payload protocol.Payload) (protoco selectedEndpoint := rc.getSelectedEndpoint() rc.hydratedLogger("executeRelayRequest") + // TODO_TECHDEBT(@adshmh): This should be handled by the gateway context. + // Fallback logic is not "shannon specific". It should apply to any protocol we ever support. + // The separation of concerns leaked throughout development and must be updated. switch { // ** Priority 1: Check Endpoint type ** // Direct fallback endpoint @@ -180,6 +184,8 @@ func (rc *requestContext) executeRelayRequest(payload protocol.Payload) (protoco // - Used when endpoint is explicitly configured as a fallback endpoint case selectedEndpoint.IsFallback(): rc.logger.Debug().Msg("Executing fallback relay") + // TODO_TECHDEBT(@adshmh): Find a place to ensure fallback RPC type is set. + return rc.sendFallbackRelay(selectedEndpoint, payload) // ** Priority 2: Check Network conditions **