diff --git a/aiopslab/generators/fault/chaos-yaml/pod-kill.yaml b/aiopslab/generators/fault/chaos-yaml/pod-kill.yaml index e85729c9..7c6cfc3d 100644 --- a/aiopslab/generators/fault/chaos-yaml/pod-kill.yaml +++ b/aiopslab/generators/fault/chaos-yaml/pod-kill.yaml @@ -4,7 +4,7 @@ metadata: name: pod-kill-mesh namespace: default spec: - action: pod-kill + action: pod-failure mode: one selector: namespaces: diff --git a/aiopslab/generators/fault/inject_symp.py b/aiopslab/generators/fault/inject_symp.py index cf04feb8..1c56bdf4 100644 --- a/aiopslab/generators/fault/inject_symp.py +++ b/aiopslab/generators/fault/inject_symp.py @@ -164,6 +164,11 @@ def recover_network_delay(self): def inject_pod_kill(self, microservices: List[str], duration: str = "200s"): """ Inject a pod kill fault targeting specified microservices by label in the configured namespace. + + Note: This uses 'pod-failure' action instead of 'pod-kill' to prevent Kubernetes from immediately + recreating the pod. The 'pod-kill' action forcefully deletes pods, causing Kubernetes controllers + (Deployment/ReplicaSet) to immediately recreate them. The 'pod-failure' action makes pods unavailable + for the specified duration without deletion, allowing proper fault injection testing. Args: microservices (List[str]): A list of microservices labels to target for the pod kill experiment. @@ -174,7 +179,7 @@ def inject_pod_kill(self, microservices: List[str], duration: str = "200s"): "kind": "PodChaos", "metadata": {"name": "pod-kill", "namespace": self.namespace}, "spec": { - "action": "pod-kill", + "action": "pod-failure", "mode": "one", "duration": duration, "selector": {