From 8b7f5747aa321c8561eb2f53c7822b78324fd3a8 Mon Sep 17 00:00:00 2001 From: lea konvalinka Date: Mon, 10 Nov 2025 15:08:51 +0100 Subject: [PATCH 1/8] fix(flagd): no retry for certain error codes, implement test steps Signed-off-by: lea konvalinka --- .../contrib/providers/flagd/FlagdOptions.java | 9 +++++++++ .../connector/sync/SyncStreamQueueSource.java | 18 +++++++++++++++--- .../flagd/e2e/steps/ProviderSteps.java | 14 ++++++++++++++ .../providers/flagd/e2e/steps/Utils.java | 3 +++ .../flagd/e2e/steps/config/ConfigSteps.java | 1 + providers/flagd/test-harness | 2 +- 6 files changed, 43 insertions(+), 4 deletions(-) diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java index a2463a946..9b5bb61a5 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java @@ -10,6 +10,7 @@ import io.grpc.ClientInterceptor; import io.opentelemetry.api.GlobalOpenTelemetry; import io.opentelemetry.api.OpenTelemetry; +import java.util.ArrayList; import java.util.List; import java.util.function.Function; import lombok.Builder; @@ -122,6 +123,14 @@ public class FlagdOptions { @Builder.Default private int retryGracePeriod = fallBackToEnvOrDefault(Config.STREAM_RETRY_GRACE_PERIOD, Config.DEFAULT_STREAM_RETRY_GRACE_PERIOD); + + /** + * List of grpc response status codes for which failed connections are not retried. + * Defaults to empty list + */ + @Builder.Default + private List nonRetryableStatusCodes = new ArrayList<>(); + /** * Selector to be used with flag sync gRPC contract. **/ diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java index 196ab77a6..0cc787db4 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java @@ -16,10 +16,12 @@ import dev.openfeature.flagd.grpc.sync.Sync.SyncFlagsRequest; import dev.openfeature.flagd.grpc.sync.Sync.SyncFlagsResponse; import dev.openfeature.sdk.Awaitable; +import dev.openfeature.sdk.exceptions.FatalError; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.grpc.Status; import io.grpc.StatusRuntimeException; import io.grpc.stub.StreamObserver; +import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; @@ -49,6 +51,7 @@ public class SyncStreamQueueSource implements QueueSource { private final BlockingQueue outgoingQueue = new LinkedBlockingQueue<>(QUEUE_SIZE); private final FlagSyncServiceStub flagSyncStub; private final FlagSyncServiceBlockingStub metadataStub; + private final List nonRetryableStatusCodes; /** * Creates a new SyncStreamQueueSource responsible for observing the event stream. @@ -65,6 +68,7 @@ public SyncStreamQueueSource(final FlagdOptions options, Consumer Date: Fri, 14 Nov 2025 12:14:38 +0100 Subject: [PATCH 2/8] attempt to handle fatal error Signed-off-by: lea konvalinka --- .../contrib/providers/flagd/FlagdOptions.java | 2 +- .../providers/flagd/FlagdProvider.java | 27 +++++++----- .../resolver/process/InProcessResolver.java | 3 ++ .../resolver/process/storage/FlagStore.java | 5 +++ .../storage/connector/QueuePayloadType.java | 3 +- .../connector/sync/SyncStreamQueueSource.java | 42 ++++++++++++++----- .../flagd/e2e/steps/ProviderSteps.java | 2 +- .../flagd/e2e/steps/config/ConfigSteps.java | 1 - 8 files changed, 60 insertions(+), 25 deletions(-) diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java index 9b5bb61a5..993f55bdd 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java @@ -129,7 +129,7 @@ public class FlagdOptions { * Defaults to empty list */ @Builder.Default - private List nonRetryableStatusCodes = new ArrayList<>(); + private List fatalStatusCodes = new ArrayList<>(); /** * Selector to be used with flag sync gRPC contract. diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java index 4ce6e06ee..082f5a59e 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java @@ -5,6 +5,7 @@ import dev.openfeature.contrib.providers.flagd.resolver.process.InProcessResolver; import dev.openfeature.contrib.providers.flagd.resolver.rpc.RpcResolver; import dev.openfeature.contrib.providers.flagd.resolver.rpc.cache.Cache; +import dev.openfeature.sdk.ErrorCode; import dev.openfeature.sdk.EvaluationContext; import dev.openfeature.sdk.EventProvider; import dev.openfeature.sdk.Hook; @@ -135,7 +136,7 @@ public void initialize(EvaluationContext evaluationContext) throws Exception { public void shutdown() { synchronized (syncResources) { try { - if (!syncResources.isInitialized() || syncResources.isShutDown()) { + if (syncResources.isShutDown()) { return; } @@ -193,7 +194,7 @@ EvaluationContext getEnrichedContext() { @SuppressWarnings("checkstyle:fallthrough") private void onProviderEvent(FlagdProviderEvent flagdProviderEvent) { - log.debug("FlagdProviderEvent event {} ", flagdProviderEvent.getEvent()); + log.info("FlagdProviderEvent event {} ", flagdProviderEvent.getEvent()); synchronized (syncResources) { /* * We only use Error and Ready as previous states. @@ -222,20 +223,26 @@ private void onProviderEvent(FlagdProviderEvent flagdProviderEvent) { onReady(); syncResources.setPreviousEvent(ProviderEvent.PROVIDER_READY); break; - - case PROVIDER_ERROR: - if (syncResources.getPreviousEvent() != ProviderEvent.PROVIDER_ERROR) { - onError(); - syncResources.setPreviousEvent(ProviderEvent.PROVIDER_ERROR); + case PROVIDER_STALE: + if (syncResources.getPreviousEvent() != ProviderEvent.PROVIDER_STALE) { + onStale(); + syncResources.setPreviousEvent(ProviderEvent.PROVIDER_STALE); } break; - + case PROVIDER_ERROR: + onError(); + break; default: log.warn("Unknown event {}", flagdProviderEvent.getEvent()); } } } + private void onError() { + this.emitProviderError(ProviderEventDetails.builder().errorCode(ErrorCode.PROVIDER_FATAL).build()); + shutdown(); + } + private void onConfigurationChanged(FlagdProviderEvent flagdProviderEvent) { this.emitProviderConfigurationChanged(ProviderEventDetails.builder() .flagsChanged(flagdProviderEvent.getFlagsChanged()) @@ -255,7 +262,7 @@ private void onReady() { ProviderEventDetails.builder().message("connected to flagd").build()); } - private void onError() { + private void onStale() { log.debug( "Stream error. Emitting STALE, scheduling ERROR, and waiting {}s for connection to become available.", gracePeriod); @@ -270,7 +277,7 @@ private void onError() { if (!errorExecutor.isShutdown()) { errorTask = errorExecutor.schedule( () -> { - if (syncResources.getPreviousEvent() == ProviderEvent.PROVIDER_ERROR) { + if (syncResources.getPreviousEvent() == ProviderEvent.PROVIDER_STALE) { log.error( "Provider did not reconnect successfully within {}s. Emitting ERROR event...", gracePeriod); diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/InProcessResolver.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/InProcessResolver.java index e54c938cf..f313d943b 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/InProcessResolver.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/InProcessResolver.java @@ -77,6 +77,9 @@ public void init() throws Exception { storageStateChange.getSyncMetadata())); log.debug("post onConnectionEvent.accept ProviderEvent.PROVIDER_CONFIGURATION_CHANGED"); break; + case STALE: + onConnectionEvent.accept(new FlagdProviderEvent(ProviderEvent.PROVIDER_STALE)); + break; case ERROR: onConnectionEvent.accept(new FlagdProviderEvent(ProviderEvent.PROVIDER_ERROR)); break; diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/FlagStore.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/FlagStore.java index eaa3dfa5f..a01f93c23 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/FlagStore.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/FlagStore.java @@ -138,6 +138,11 @@ private void streamerListener(final QueueSource connector) throws InterruptedExc } break; case ERROR: + if (!stateBlockingQueue.offer(new StorageStateChange(StorageState.STALE))) { + log.warn("Failed to convey STALE status, queue is full"); + } + break; + case FATAL: if (!stateBlockingQueue.offer(new StorageStateChange(StorageState.ERROR))) { log.warn("Failed to convey ERROR status, queue is full"); } diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/QueuePayloadType.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/QueuePayloadType.java index 93675fb60..74e02912e 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/QueuePayloadType.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/QueuePayloadType.java @@ -3,5 +3,6 @@ /** Payload type emitted by {@link QueueSource}. */ public enum QueuePayloadType { DATA, - ERROR + ERROR, + FATAL } diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java index 0cc787db4..915855b27 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java @@ -16,7 +16,6 @@ import dev.openfeature.flagd.grpc.sync.Sync.SyncFlagsRequest; import dev.openfeature.flagd.grpc.sync.Sync.SyncFlagsResponse; import dev.openfeature.sdk.Awaitable; -import dev.openfeature.sdk.exceptions.FatalError; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.grpc.Status; import io.grpc.StatusRuntimeException; @@ -51,7 +50,7 @@ public class SyncStreamQueueSource implements QueueSource { private final BlockingQueue outgoingQueue = new LinkedBlockingQueue<>(QUEUE_SIZE); private final FlagSyncServiceStub flagSyncStub; private final FlagSyncServiceBlockingStub metadataStub; - private final List nonRetryableStatusCodes; + private final List fatalStatusCodes; /** * Creates a new SyncStreamQueueSource responsible for observing the event stream. @@ -68,7 +67,7 @@ public SyncStreamQueueSource(final FlagdOptions options, Consumer queue, String message) { if (!queue.offer(new QueuePayload(QueuePayloadType.ERROR, message, null))) { log.error("Failed to convey ERROR status, queue is full"); } } + private static void enqueueFatal(BlockingQueue queue, String message) { + if (!queue.offer(new QueuePayload(QueuePayloadType.FATAL, message, null))) { + log.error("Failed to convey FATAL status, queue is full"); + } + } + private static class SyncStreamObserver implements StreamObserver { private final BlockingQueue outgoingQueue; private final AtomicBoolean shouldThrottle; private final Awaitable done = new Awaitable(); + private final List fatalStatusCodes; private Struct metadata; - public SyncStreamObserver(BlockingQueue outgoingQueue, AtomicBoolean shouldThrottle) { + public SyncStreamObserver(BlockingQueue outgoingQueue, AtomicBoolean shouldThrottle, List fatalStatusCodes) { this.outgoingQueue = outgoingQueue; this.shouldThrottle = shouldThrottle; + this.fatalStatusCodes = fatalStatusCodes; } @Override @@ -260,9 +275,14 @@ public void onNext(SyncFlagsResponse syncFlagsResponse) { @Override public void onError(Throwable throwable) { try { + Status status = Status.fromThrowable(throwable); String message = throwable != null ? throwable.getMessage() : "unknown"; log.debug("Stream error: {}, will restart", message, throwable); - enqueueError(outgoingQueue, String.format("Error from stream: %s", message)); + if (fatalStatusCodes.contains(status.getCode())) { + enqueueFatal(outgoingQueue, String.format("Error from stream: %s", message)); + } else { + enqueueError(outgoingQueue, String.format("Error from stream: %s", message)); + } // Set throttling flag to ensure backoff before retry this.shouldThrottle.set(true); diff --git a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java index e2c7eef1e..230446f88 100644 --- a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java +++ b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java @@ -202,6 +202,6 @@ public void the_flag_was_modded() { @Then("the client is in {} state") public void the_client_is_in_fatal_state(String clientState) { - assertThat(state.client.getProviderState()).isEqualTo(ProviderState.FATAL); + assertThat(state.client.getProviderState()).isEqualTo(ProviderState.valueOf(clientState.toUpperCase())); } } diff --git a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/config/ConfigSteps.java b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/config/ConfigSteps.java index c1dad8dae..25a6cdc7d 100644 --- a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/config/ConfigSteps.java +++ b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/config/ConfigSteps.java @@ -121,7 +121,6 @@ private static String mapOptionNames(String option) { propertyMapper.put("keepAliveTime", "keepAlive"); propertyMapper.put("retryBackoffMaxMs", "keepAlive"); propertyMapper.put("cache", "cacheType"); - propertyMapper.put("fatalStatusCodes", "nonRetryableStatusCodes"); if (propertyMapper.get(option) != null) { option = propertyMapper.get(option); From 654c8dad658e891f61cd8af51ed7385e4a8cabe2 Mon Sep 17 00:00:00 2001 From: lea konvalinka Date: Mon, 24 Nov 2025 10:49:57 +0100 Subject: [PATCH 3/8] fix(flagd): update testbed + step, fix event Signed-off-by: lea konvalinka --- .../providers/flagd/resolver/common/ChannelConnector.java | 2 +- .../contrib/providers/flagd/e2e/steps/ProviderSteps.java | 4 ++-- .../openfeature/contrib/providers/flagd/e2e/steps/Utils.java | 5 ++++- providers/flagd/test-harness | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/common/ChannelConnector.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/common/ChannelConnector.java index 6261affe7..032b1766c 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/common/ChannelConnector.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/common/ChannelConnector.java @@ -86,7 +86,7 @@ private void onStateChange() { log.debug("Channel state changed to: {}", currentState); if (currentState == ConnectivityState.TRANSIENT_FAILURE || currentState == ConnectivityState.SHUTDOWN) { this.onConnectionEvent.accept(new FlagdProviderEvent( - ProviderEvent.PROVIDER_ERROR, Collections.emptyList(), new ImmutableStructure())); + ProviderEvent.PROVIDER_STALE, Collections.emptyList(), new ImmutableStructure())); } if (currentState != ConnectivityState.SHUTDOWN) { log.debug("continuing to monitor the grpc channel"); diff --git a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java index 230446f88..0467c56e5 100644 --- a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java +++ b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java @@ -200,8 +200,8 @@ public void the_flag_was_modded() { .statusCode(200); } - @Then("the client is in {} state") - public void the_client_is_in_fatal_state(String clientState) { + @Then("the client should be in {} state") + public void the_client_should_be_in_fatal_state(String clientState) { assertThat(state.client.getProviderState()).isEqualTo(ProviderState.valueOf(clientState.toUpperCase())); } } diff --git a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/Utils.java b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/Utils.java index c50c08397..a89f8560e 100644 --- a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/Utils.java +++ b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/Utils.java @@ -4,8 +4,10 @@ import dev.openfeature.contrib.providers.flagd.resolver.rpc.cache.CacheType; import dev.openfeature.sdk.Value; import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Objects; +import java.util.stream.Collectors; import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper; public final class Utils { @@ -39,7 +41,8 @@ public static Object convert(String value, String type) throws ClassNotFoundExce case "CacheType": return CacheType.valueOf(value.toUpperCase()).getValue(); case "StringList": - return List.of(value); + return value.isEmpty() ? List.of() : Arrays.stream(value.split(",")).map(String::trim).collect( + Collectors.toList()); case "Object": return Value.objectToValue(new ObjectMapper().readValue(value, Object.class)); } diff --git a/providers/flagd/test-harness b/providers/flagd/test-harness index bde8977a4..6948dcbab 160000 --- a/providers/flagd/test-harness +++ b/providers/flagd/test-harness @@ -1 +1 @@ -Subproject commit bde8977a4fa2b59ba4359bcf902e9adf4555d085 +Subproject commit 6948dcbabef284fae4a4c1d03ce5e0bd9ea34c17 From 07195a7180817c06c866210b6f46eddaf9f3ee28 Mon Sep 17 00:00:00 2001 From: lea konvalinka Date: Fri, 12 Dec 2025 12:04:41 +0100 Subject: [PATCH 4/8] adjust rpc resolver Signed-off-by: lea konvalinka --- .../contrib/providers/flagd/FlagdProvider.java | 3 +-- .../connector/sync/SyncStreamQueueSource.java | 1 - .../flagd/resolver/rpc/RpcResolver.java | 18 ++++++++++++++++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java index 082f5a59e..ddd87f949 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java @@ -194,7 +194,7 @@ EvaluationContext getEnrichedContext() { @SuppressWarnings("checkstyle:fallthrough") private void onProviderEvent(FlagdProviderEvent flagdProviderEvent) { - log.info("FlagdProviderEvent event {} ", flagdProviderEvent.getEvent()); + log.debug("FlagdProviderEvent event {} ", flagdProviderEvent.getEvent()); synchronized (syncResources) { /* * We only use Error and Ready as previous states. @@ -240,7 +240,6 @@ private void onProviderEvent(FlagdProviderEvent flagdProviderEvent) { private void onError() { this.emitProviderError(ProviderEventDetails.builder().errorCode(ErrorCode.PROVIDER_FATAL).build()); - shutdown(); } private void onConfigurationChanged(FlagdProviderEvent flagdProviderEvent) { diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java index 915855b27..a0e66ee97 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java @@ -143,7 +143,6 @@ private void observeSyncStream() { if (fatalStatusCodes.contains(metaEx.getStatus().getCode().name())) { //throw new FatalError("Failed to connect for metadata request, not retrying for error " + metaEx.getStatus()); enqueueFatal("Fatal: Failed to connect for metadata request, not retrying for error " + metaEx.getStatus().getCode()); - return; } // retry for other status codes String message = metaEx.getMessage(); diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java index 1f3101d00..eebd7f2a0 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java @@ -68,6 +68,7 @@ public final class RpcResolver implements Resolver { private final Consumer onProviderEvent; private final ServiceStub stub; private final ServiceBlockingStub blockingStub; + private final List fatalStatusCodes; /** * Resolves flag values using @@ -89,6 +90,7 @@ public RpcResolver( this.stub = ServiceGrpc.newStub(this.connector.getChannel()).withWaitForReady(); this.blockingStub = ServiceGrpc.newBlockingStub(this.connector.getChannel()).withWaitForReady(); + this.fatalStatusCodes = options.getFatalStatusCodes(); } // testing only @@ -107,6 +109,7 @@ protected RpcResolver( this.onProviderEvent = onProviderEvent; this.stub = mockStub; this.blockingStub = mockBlockingStub; + this.fatalStatusCodes = options.getFatalStatusCodes(); } /** @@ -353,7 +356,12 @@ private void observeEventStream() throws InterruptedException { log.debug( "Exception in event stream connection, streamException {}, will reconnect", streamException); - this.handleErrorOrComplete(); + if (streamException instanceof StatusRuntimeException && fatalStatusCodes.contains( + ((StatusRuntimeException) streamException).getStatus().getCode().name())) { + this.handleFatalError(); + } else { + this.handleErrorOrComplete(); + } break; } @@ -412,9 +420,15 @@ private void handleProviderReadyEvent() { * Handles provider error events by clearing the cache (if enabled) and notifying listeners of the error. */ private void handleErrorOrComplete() { - log.debug("Emitting provider error event"); + log.debug("Emitting provider stale event"); // complete is an error, logically...even if the server went down gracefully we need to reconnect. + onProviderEvent.accept(new FlagdProviderEvent(ProviderEvent.PROVIDER_STALE)); + } + + private void handleFatalError() { + log.debug("Emitting provider error event"); + onProviderEvent.accept(new FlagdProviderEvent(ProviderEvent.PROVIDER_ERROR)); } } From e6d40578c72c67f2f9366211b7f32585f5de5ab6 Mon Sep 17 00:00:00 2001 From: lea konvalinka Date: Wed, 17 Dec 2025 10:15:55 +0100 Subject: [PATCH 5/8] fix e2e tests Signed-off-by: Konvalinka --- .../contrib/providers/flagd/Config.java | 14 +++++++++++ .../contrib/providers/flagd/FlagdOptions.java | 4 ++-- .../process/storage/StorageState.java | 2 +- .../connector/sync/SyncStreamQueueSource.java | 23 +++++++++---------- .../providers/flagd/e2e/RunInProcessTest.java | 2 +- providers/flagd/test-harness | 2 +- 6 files changed, 30 insertions(+), 17 deletions(-) diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/Config.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/Config.java index 417826437..a2ae3e9ea 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/Config.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/Config.java @@ -1,7 +1,10 @@ package dev.openfeature.contrib.providers.flagd; import dev.openfeature.contrib.providers.flagd.resolver.rpc.cache.CacheType; +import java.util.Arrays; +import java.util.List; import java.util.function.Function; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; /** Helper class to hold configuration default values. */ @@ -36,6 +39,7 @@ public final class Config { static final String FLAGD_RETRY_BACKOFF_MAX_MS_VAR_NAME = "FLAGD_RETRY_BACKOFF_MAX_MS"; static final String STREAM_DEADLINE_MS_ENV_VAR_NAME = "FLAGD_STREAM_DEADLINE_MS"; static final String SOURCE_SELECTOR_ENV_VAR_NAME = "FLAGD_SOURCE_SELECTOR"; + static final String FATAL_STATUS_CODES_ENV_VAR_NAME = "FLAGD_FATAL_STATUS_CODES"; /** * Environment variable to fetch Provider id. * @@ -91,6 +95,16 @@ static long fallBackToEnvOrDefault(String key, long defaultValue) { } } + static List fallBackToEnvOrDefaultList(String key, List defaultValue) { + try { + return System.getenv(key) != null ? Arrays.stream(System.getenv(key).split(",")) + .map(String::trim) + .collect(Collectors.toList()) : defaultValue; + } catch (Exception e) { + return defaultValue; + } + } + static Resolver fromValueProvider(Function provider) { final String resolverVar = provider.apply(RESOLVER_ENV_VAR); if (resolverVar == null) { diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java index f537dfb25..1005c4a4c 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java @@ -1,6 +1,7 @@ package dev.openfeature.contrib.providers.flagd; import static dev.openfeature.contrib.providers.flagd.Config.fallBackToEnvOrDefault; +import static dev.openfeature.contrib.providers.flagd.Config.fallBackToEnvOrDefaultList; import static dev.openfeature.contrib.providers.flagd.Config.fromValueProvider; import dev.openfeature.contrib.providers.flagd.resolver.process.storage.connector.QueueSource; @@ -10,7 +11,6 @@ import io.grpc.ClientInterceptor; import io.opentelemetry.api.GlobalOpenTelemetry; import io.opentelemetry.api.OpenTelemetry; -import java.util.ArrayList; import java.util.List; import java.util.function.Function; import lombok.Builder; @@ -129,7 +129,7 @@ public class FlagdOptions { * Defaults to empty list */ @Builder.Default - private List fatalStatusCodes = new ArrayList<>(); + private List fatalStatusCodes = fallBackToEnvOrDefaultList(Config.FATAL_STATUS_CODES_ENV_VAR_NAME, List.of()); /** * Selector to be used with flag sync gRPC contract. diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/StorageState.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/StorageState.java index c47670b7d..d6b8b30c5 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/StorageState.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/StorageState.java @@ -1,6 +1,6 @@ package dev.openfeature.contrib.providers.flagd.resolver.process.storage; -/** Satus of the storage. */ +/** Status of the storage. */ public enum StorageState { /** Storage is upto date and working as expected. */ OK, diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java index 61ecca041..c49423c7d 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java @@ -141,13 +141,13 @@ private void observeSyncStream() { observer.metadata = getMetadata(); } catch (StatusRuntimeException metaEx) { if (fatalStatusCodes.contains(metaEx.getStatus().getCode().name())) { - //throw new FatalError("Failed to connect for metadata request, not retrying for error " + metaEx.getStatus()); - enqueueFatal("Fatal: Failed to connect for metadata request, not retrying for error " + metaEx.getStatus().getCode()); + enqueueFatal(String.format("Fatal: Failed to connect for metadata request, not retrying for error %s", metaEx.getStatus().getCode())); + } else { + // retry for other status codes + String message = metaEx.getMessage(); + log.debug("Metadata request error: {}, will restart", message, metaEx); + enqueueError(String.format("Error in getMetadata request: %s", message)); } - // retry for other status codes - String message = metaEx.getMessage(); - log.debug("Metadata request error: {}, will restart", message, metaEx); - enqueueError(String.format("Error in getMetadata request: %s", message)); shouldThrottle.set(true); continue; } @@ -156,13 +156,12 @@ private void observeSyncStream() { syncFlags(observer); } catch (StatusRuntimeException ex) { if (fatalStatusCodes.contains(ex.getStatus().getCode().toString())) { - //throw new FatalError("Failed to connect for metadata request, not retrying for error " + ex.getStatus().getCode()); - enqueueFatal("Fatal: Failed to connect for metadata request, not retrying for error " + ex.getStatus().getCode()); - return; + enqueueFatal(String.format("Fatal: Failed to connect for metadata request, not retrying for error %s", ex.getStatus().getCode())); + } else { + // retry for other status codes + log.error("Unexpected sync stream exception, will restart.", ex); + enqueueError(String.format("Error in syncStream: %s", ex.getMessage())); } - // retry for other status codes - log.error("Unexpected sync stream exception, will restart.", ex); - enqueueError(String.format("Error in syncStream: %s", ex.getMessage())); shouldThrottle.set(true); } } catch (InterruptedException ie) { diff --git a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/RunInProcessTest.java b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/RunInProcessTest.java index c694aa9ef..3f859d984 100644 --- a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/RunInProcessTest.java +++ b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/RunInProcessTest.java @@ -28,7 +28,7 @@ @ConfigurationParameter(key = GLUE_PROPERTY_NAME, value = "dev.openfeature.contrib.providers.flagd.e2e.steps") @ConfigurationParameter(key = OBJECT_FACTORY_PROPERTY_NAME, value = "io.cucumber.picocontainer.PicoFactory") @IncludeTags("in-process") -@ExcludeTags({"unixsocket"}) +@ExcludeTags({"unixsocket","sync-port"}) @Testcontainers public class RunInProcessTest { diff --git a/providers/flagd/test-harness b/providers/flagd/test-harness index 6948dcbab..9b73b3a95 160000 --- a/providers/flagd/test-harness +++ b/providers/flagd/test-harness @@ -1 +1 @@ -Subproject commit 6948dcbabef284fae4a4c1d03ce5e0bd9ea34c17 +Subproject commit 9b73b3a95cd9e0885937d244b118713b26374b1d From 95a880c68e488774e0eef03152c87d0a08524786 Mon Sep 17 00:00:00 2001 From: lea konvalinka Date: Wed, 17 Dec 2025 10:44:26 +0100 Subject: [PATCH 6/8] clean up Signed-off-by: Konvalinka --- .../openfeature/contrib/providers/flagd/FlagdProvider.java | 2 +- .../process/storage/connector/sync/SyncStreamQueueSource.java | 2 ++ .../contrib/providers/flagd/resolver/rpc/RpcResolver.java | 4 ++++ .../contrib/providers/flagd/e2e/steps/ProviderSteps.java | 2 +- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java index ddd87f949..88cd71fa7 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java @@ -136,7 +136,7 @@ public void initialize(EvaluationContext evaluationContext) throws Exception { public void shutdown() { synchronized (syncResources) { try { - if (syncResources.isShutDown()) { + if (!syncResources.isInitialized() || syncResources.isShutDown()) { return; } diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java index 5613f50cb..8545519d4 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java @@ -138,6 +138,7 @@ private void observeSyncStream() { observer.metadata = getMetadata(); } catch (StatusRuntimeException metaEx) { if (fatalStatusCodes.contains(metaEx.getStatus().getCode().name())) { + log.debug("Fatal status code for metadata request: {}, not retrying", metaEx.getStatus().getCode()); enqueueFatal(String.format("Fatal: Failed to connect for metadata request, not retrying for error %s", metaEx.getStatus().getCode())); } else { // retry for other status codes @@ -153,6 +154,7 @@ private void observeSyncStream() { syncFlags(observer); } catch (StatusRuntimeException ex) { if (fatalStatusCodes.contains(ex.getStatus().getCode().toString())) { + log.debug("Fatal status code during sync stream: {}, not retrying", ex.getStatus().getCode()); enqueueFatal(String.format("Fatal: Failed to connect for metadata request, not retrying for error %s", ex.getStatus().getCode())); } else { // retry for other status codes diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java index 5df2d7844..15fea898b 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java @@ -424,6 +424,10 @@ private void handleErrorOrComplete() { onProviderEvent.accept(new FlagdProviderEvent(ProviderEvent.PROVIDER_STALE)); } + /** + * Handles fatal error events (i.e. error codes defined in fatalStatusCodes) by transitioning the provider into + * fatal state + */ private void handleFatalError() { log.debug("Emitting provider error event"); diff --git a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java index 489002f68..90d082292 100644 --- a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java +++ b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/steps/ProviderSteps.java @@ -53,7 +53,7 @@ public static void beforeAll() throws IOException { .withExposedService("flagd", 8015, Wait.forListeningPort()) .withExposedService("flagd", 8080, Wait.forListeningPort()) .withExposedService("envoy", 9211, Wait.forListeningPort()) - .withExposedService("envoy", 9212, Wait.forListeningPort()) + .withExposedService("envoy", FORBIDDEN_PORT, Wait.forListeningPort()) .withStartupTimeout(Duration.ofSeconds(45)); container.start(); } From 45a9822a8555ba9dcca3ba2dc753bd1308f018d1 Mon Sep 17 00:00:00 2001 From: lea konvalinka Date: Wed, 17 Dec 2025 11:27:58 +0100 Subject: [PATCH 7/8] fatal only on first connection Signed-off-by: Konvalinka --- .../storage/connector/sync/SyncStreamQueueSource.java | 6 ++++-- .../providers/flagd/resolver/rpc/RpcResolver.java | 11 +++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java index 8545519d4..1118dc7e0 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java @@ -38,6 +38,7 @@ public class SyncStreamQueueSource implements QueueSource { private final AtomicBoolean shutdown = new AtomicBoolean(false); private final AtomicBoolean shouldThrottle = new AtomicBoolean(false); + private final AtomicBoolean successfulSync = new AtomicBoolean(false); private final int streamDeadline; private final int deadline; private final int maxBackoffMs; @@ -137,7 +138,7 @@ private void observeSyncStream() { try { observer.metadata = getMetadata(); } catch (StatusRuntimeException metaEx) { - if (fatalStatusCodes.contains(metaEx.getStatus().getCode().name())) { + if (fatalStatusCodes.contains(metaEx.getStatus().getCode().name()) && !successfulSync.get()) { log.debug("Fatal status code for metadata request: {}, not retrying", metaEx.getStatus().getCode()); enqueueFatal(String.format("Fatal: Failed to connect for metadata request, not retrying for error %s", metaEx.getStatus().getCode())); } else { @@ -152,8 +153,9 @@ private void observeSyncStream() { try { syncFlags(observer); + successfulSync.set(true); } catch (StatusRuntimeException ex) { - if (fatalStatusCodes.contains(ex.getStatus().getCode().toString())) { + if (fatalStatusCodes.contains(ex.getStatus().getCode().toString()) && !successfulSync.get()) { log.debug("Fatal status code during sync stream: {}, not retrying", ex.getStatus().getCode()); enqueueFatal(String.format("Fatal: Failed to connect for metadata request, not retrying for error %s", ex.getStatus().getCode())); } else { diff --git a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java index 15fea898b..1f601151e 100644 --- a/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java +++ b/providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/rpc/RpcResolver.java @@ -60,6 +60,7 @@ public final class RpcResolver implements Resolver { private static final int QUEUE_SIZE = 5; private final AtomicBoolean shutdown = new AtomicBoolean(false); + private final AtomicBoolean successfulConnection = new AtomicBoolean(false); private final ChannelConnector connector; private final Cache cache; private final ResolveStrategy strategy; @@ -351,18 +352,20 @@ private void observeEventStream() throws InterruptedException { Throwable streamException = taken.getError(); if (streamException != null) { - log.debug( - "Exception in event stream connection, streamException {}, will reconnect", - streamException); if (streamException instanceof StatusRuntimeException && fatalStatusCodes.contains( - ((StatusRuntimeException) streamException).getStatus().getCode().name())) { + ((StatusRuntimeException) streamException).getStatus().getCode().name()) && !successfulConnection.get()) { + log.debug("Fatal error code received: {}", ((StatusRuntimeException) streamException).getStatus().getCode()); this.handleFatalError(); } else { + log.debug( + "Exception in event stream connection, streamException {}, will reconnect", + streamException); this.handleErrorOrComplete(); } break; } + successfulConnection.set(true); final EventStreamResponse response = taken.getResponse(); log.debug("Got stream response: {}", response); From e50aa7f69fe619d3407cf678abb91ba01b947b8b Mon Sep 17 00:00:00 2001 From: lea konvalinka Date: Wed, 17 Dec 2025 12:45:54 +0100 Subject: [PATCH 8/8] remove exclusion of sync e2e test tag Signed-off-by: Konvalinka --- .../contrib/providers/flagd/e2e/RunInProcessTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/RunInProcessTest.java b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/RunInProcessTest.java index 3f859d984..c694aa9ef 100644 --- a/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/RunInProcessTest.java +++ b/providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/RunInProcessTest.java @@ -28,7 +28,7 @@ @ConfigurationParameter(key = GLUE_PROPERTY_NAME, value = "dev.openfeature.contrib.providers.flagd.e2e.steps") @ConfigurationParameter(key = OBJECT_FACTORY_PROPERTY_NAME, value = "io.cucumber.picocontainer.PicoFactory") @IncludeTags("in-process") -@ExcludeTags({"unixsocket","sync-port"}) +@ExcludeTags({"unixsocket"}) @Testcontainers public class RunInProcessTest {