我在 Helm 管理的 EKS 中看到了这个错误,其中为来自 github 操作的每个新部署生成了两个副本集,第二个副本集 pod 在终止之前存在 1000 秒。因为我们使用滚动更新作为我们的部署策略,所以 replicaset 仍然可以运行。 我还在所有微服务中发现了相同的行为。 我尝试使用 deoployment.yaml 文件在本地重现此问题,也尝试使用 helm chart 但没有发现任何错误。
kubectl describe pods eun-6f58xxxxdb-28ddd -n staging
Name: eun-6f58xxxxdb-28ddd
Namespace: staging
Priority: 0
Service Account: default
Node: ip-10-40.us-east-2.compute.internal/10.40
Start Time: Tue, 21 Feb 2023 12:59:59 +0530
Labels: app.kubernetes.io/component=loud
app.kubernetes.io/instance=eun
app.kubernetes.io/managed-by=Helm
app.kubernetes.io/name=eun
app.kubernetes.io/version=PR-xxx-e13xx1a
helm.sh/chart=cloud-0.0.47
pod-template-hash=6f58xxxxdb
Annotations: kubernetes.io/psp: eks.privileged
Status: Running
IP: 10.xx.xx.214
IPs:
IP: 10.xx.xx.214
Controlled By: ReplicaSet/eun-6f58xxxxdb
Containers:
eun:
Container ID: docker://e3fed29f82836b7b4b2
Image: 940xxxx.dkr.ecr.us-east-2.amazonaws.com/eun:PR-xxx-e13xx1a
Image ID: docker-pullable://940xxxx.dkr.ecr.us-west-2.amazonaws.com/eun@sha256:bff318
Port: 10001/TCP
Host Port: 0/TCP
State: Running
Started: Tue, 21 Feb 2023 13:00:07 +0530
Ready: True
Restart Count: 0
Limits:
cpu: 600m
memory: 628Mi
Requests:
cpu: 200m
memory: 248Mi
Liveness: tcp-socket :10001 delay=20s timeout=1s period=60s #success=1 #failure=3
Readiness: http-get http://:10001/ delay=20s timeout=60s period=60s #success=1 #failure=3
Environment Variables from:
eunomia Secret Optional: false
Environment:
DD_ENTITY_ID: (v1:metadata.uid)
DD_AGENT_HOST: (v1:status.hostIP)
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-qdssj (ro)
Readiness Gates:
Type Status
target-health.elbv2.k8s.aws/k8s-staging-eunomia-eddd6c8379 True
Conditions:
Type Status
target-health.elbv2.k8s.aws/k8s-staging-eunomia-eddd6c8379 True
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
kube-api-access-qdssj:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Topology Spread Constraints: kubernetes.io/hostname:ScheduleAnyway when max skew 1 is exceeded
Events: <none>
kubectl describe deployment enu -n staging
Name: enu
Namespace: staging
CreationTimestamp: Fri, 17 Dec 2021 20:14:58 +0530
Labels: app.kubernetes.io/component=cloud
app.kubernetes.io/instance=enu
app.kubernetes.io/managed-by=Helm
app.kubernetes.io/name=enu
app.kubernetes.io/version=PR-xxx-e13xx1a
argocd.argoproj.io/instance=stg-enu
helm.sh/chart=cloud-0.0.47
Annotations: deployment.kubernetes.io/revision: 104
Selector: app.kubernetes.io/component=cloud,app.kubernetes.io/instance=enu,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=enu
Replicas: 1 desired | 1 updated | 1 total | 1 available | 0 unavailable
StrategyType: RollingUpdate
MinReadySeconds: 0
RollingUpdateStrategy: 25% max unavailable, 25% max surge
Pod Template:
Labels: app.kubernetes.io/component=cloud
app.kubernetes.io/instance=enu
app.kubernetes.io/managed-by=Helm
app.kubernetes.io/name=enu
app.kubernetes.io/version=PR-xxx-e13xx1a
helm.sh/chart=cloud-0.0.47
Containers:
enu:
Image: 940xxxx.dkr.ecr.us-east-2.amazonaws.com/enu:PR-xxx-e13xx1a
Port: 10001/TCP
Host Port: 0/TCP
Limits:
cpu: 600m
memory: 628Mi
Requests:
cpu: 200m
memory: 248Mi
Liveness: tcp-socket :10001 delay=20s timeout=1s period=60s #success=1 #failure=3
Readiness: http-get http://:10001/ delay=20s timeout=60s period=60s #success=1 #failure=3
Environment Variables from:
enu Secret Optional: false
Environment: <none>
Mounts: <none>
Volumes: <none>
Topology Spread Constraints: kubernetes.io/hostname:ScheduleAnyway when max skew 1 is exceeded
Conditions:
Type Status Reason
---- ------ ------
Available True MinimumReplicasAvailable
Progressing True NewReplicaSetAvailable
OldReplicaSets: <none>
NewReplicaSet: enu-6f5897d6db (1/1 replicas created)
Events: <none>