5.3.
5.3.1.
- 주의참고
5.3.2.
5.3.2.1.
5.3.2.2.
- 중요
- 참고
$ sudo mv -v /etc/kubernetes/manifests/etcd-pod.yaml /tmp
$ sudo crictl ps | grep etcd | egrep -v "operator|etcd-guard"
$ sudo mv -v /etc/kubernetes/manifests/kube-apiserver-pod.yaml /tmp
$ sudo crictl ps | grep kube-apiserver | egrep -v "operator|guard"
$ sudo mv -v /etc/kubernetes/manifests/kube-controller-manager-pod.yaml /tmp
$ sudo crictl ps | grep kube-controller-manager | egrep -v "operator|guard"
$ sudo mv -v /etc/kubernetes/manifests/kube-scheduler-pod.yaml /tmp
$ sudo crictl ps | grep kube-scheduler | egrep -v "operator|guard"
$ sudo mv -v /var/lib/etcd/ /tmp
$ sudo mv -v /etc/kubernetes/manifests/keepalived.yaml /tmp
$ sudo crictl ps --name keepalived
$ ip -o address | egrep '<api_vip>|<ingress_vip>'
$ sudo ip address del <reported_vip> dev <reported_vip_device>
$ ip -o address | grep <api_vip>
- 작은 정보
$ sudo -E /usr/local/bin/cluster-restore.sh /home/core/assets/backup
...stopping kube-scheduler-pod.yaml ...stopping kube-controller-manager-pod.yaml ...stopping etcd-pod.yaml ...stopping kube-apiserver-pod.yaml Waiting for container etcd to stop .complete Waiting for container etcdctl to stop .............................complete Waiting for container etcd-metrics to stop complete Waiting for container kube-controller-manager to stop complete Waiting for container kube-apiserver to stop ..........................................................................................complete Waiting for container kube-scheduler to stop complete Moving etcd data-dir /var/lib/etcd/member to /var/lib/etcd-backup starting restore-etcd static pod starting kube-apiserver-pod.yaml static-pod-resources/kube-apiserver-pod-7/kube-apiserver-pod.yaml starting kube-controller-manager-pod.yaml static-pod-resources/kube-controller-manager-pod-7/kube-controller-manager-pod.yaml starting kube-scheduler-pod.yaml static-pod-resources/kube-scheduler-pod-8/kube-scheduler-pod.yaml
참고$ oc get nodes -w
NAME STATUS ROLES AGE VERSION host-172-25-75-28 Ready master 3d20h v1.29.4 host-172-25-75-38 Ready infra,worker 3d20h v1.29.4 host-172-25-75-40 Ready master 3d20h v1.29.4 host-172-25-75-65 Ready master 3d20h v1.29.4 host-172-25-75-74 Ready infra,worker 3d20h v1.29.4 host-172-25-75-79 Ready worker 3d20h v1.29.4 host-172-25-75-86 Ready worker 3d20h v1.29.4 host-172-25-75-98 Ready infra,worker 3d20h v1.29.4
$ ssh -i <ssh-key-path> core@<master-hostname>
sh-4.4# pwd /var/lib/kubelet/pki sh-4.4# ls kubelet-client-2022-04-28-11-24-09.pem kubelet-server-2022-04-28-11-24-15.pem kubelet-client-current.pem kubelet-server-current.pem
$ sudo systemctl restart kubelet.service
- 참고
$ oc get csr
NAME AGE SIGNERNAME REQUESTOR CONDITION csr-2s94x 8m3s kubernetes.io/kubelet-serving system:node:<node_name> Pending 1 csr-4bd6t 8m3s kubernetes.io/kubelet-serving system:node:<node_name> Pending 2 csr-4hl85 13m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending 3 csr-zhhhp 3m8s kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending 4 ...
$ oc describe csr <csr_name> 1
$ oc adm certificate approve <csr_name>
$ oc adm certificate approve <csr_name>
$ sudo crictl ps | grep etcd | egrep -v "operator|etcd-guard"
3ad41b7908e32 36f86e2eeaaffe662df0d21041eb22b8198e0e58abeeae8c743c3e6e977e8009 About a minute ago Running etcd 0 7c05f8af362f0
$ oc -n openshift-etcd get pods -l k8s-app=etcd
NAME READY STATUS RESTARTS AGE etcd-ip-10-0-143-125.ec2.internal 1/1 Running 1 2m47s
$ oc -n openshift-ovn-kubernetes delete pod -l app=ovnkube-control-plane
$ oc -n openshift-ovn-kubernetes get pod -l app=ovnkube-control-plane
- 중요참고
$ sudo rm -f /var/lib/ovn-ic/etc/*.db
$ sudo systemctl restart ovs-vswitchd ovsdb-server
$ oc -n openshift-ovn-kubernetes delete pod -l app=ovnkube-node --field-selector=spec.nodeName==<node>
$ oc -n openshift-ovn-kubernetes get pod -l app=ovnkube-node --field-selector=spec.nodeName==<node>
참고
- 주의
- 주의
$ oc get machines -n openshift-machine-api -o wide
NAME PHASE TYPE REGION ZONE AGE NODE PROVIDERID STATE clustername-8qw5l-master-0 Running m4.xlarge us-east-1 us-east-1a 3h37m ip-10-0-131-183.ec2.internal aws:///us-east-1a/i-0ec2782f8287dfb7e stopped 1 clustername-8qw5l-master-1 Running m4.xlarge us-east-1 us-east-1b 3h37m ip-10-0-143-125.ec2.internal aws:///us-east-1b/i-096c349b700a19631 running clustername-8qw5l-master-2 Running m4.xlarge us-east-1 us-east-1c 3h37m ip-10-0-154-194.ec2.internal aws:///us-east-1c/i-02626f1dba9ed5bba running clustername-8qw5l-worker-us-east-1a-wbtgd Running m4.large us-east-1 us-east-1a 3h28m ip-10-0-129-226.ec2.internal aws:///us-east-1a/i-010ef6279b4662ced running clustername-8qw5l-worker-us-east-1b-lrdxb Running m4.large us-east-1 us-east-1b 3h28m ip-10-0-144-248.ec2.internal aws:///us-east-1b/i-0cb45ac45a166173b running clustername-8qw5l-worker-us-east-1c-pkg26 Running m4.large us-east-1 us-east-1c 3h28m ip-10-0-170-181.ec2.internal aws:///us-east-1c/i-06861c00007751b0a running
$ oc delete machine -n openshift-machine-api clustername-8qw5l-master-0 1
$ oc get machines -n openshift-machine-api -o wide
NAME PHASE TYPE REGION ZONE AGE NODE PROVIDERID STATE clustername-8qw5l-master-1 Running m4.xlarge us-east-1 us-east-1b 3h37m ip-10-0-143-125.ec2.internal aws:///us-east-1b/i-096c349b700a19631 running clustername-8qw5l-master-2 Running m4.xlarge us-east-1 us-east-1c 3h37m ip-10-0-154-194.ec2.internal aws:///us-east-1c/i-02626f1dba9ed5bba running clustername-8qw5l-master-3 Provisioning m4.xlarge us-east-1 us-east-1a 85s ip-10-0-173-171.ec2.internal aws:///us-east-1a/i-015b0888fe17bc2c8 running 1 clustername-8qw5l-worker-us-east-1a-wbtgd Running m4.large us-east-1 us-east-1a 3h28m ip-10-0-129-226.ec2.internal aws:///us-east-1a/i-010ef6279b4662ced running clustername-8qw5l-worker-us-east-1b-lrdxb Running m4.large us-east-1 us-east-1b 3h28m ip-10-0-144-248.ec2.internal aws:///us-east-1b/i-0cb45ac45a166173b running clustername-8qw5l-worker-us-east-1c-pkg26 Running m4.large us-east-1 us-east-1c 3h28m ip-10-0-170-181.ec2.internal aws:///us-east-1c/i-06861c00007751b0a running
$ oc patch etcd/cluster --type=merge -p '{"spec": {"unsupportedConfigOverrides": {"useUnsupportedUnsafeNonHANonProductionUnstableEtcd": true}}}'
$ export KUBECONFIG=/etc/kubernetes/static-pod-resources/kube-apiserver-certs/secrets/node-kubeconfigs/localhost-recovery.kubeconfig
$ oc patch etcd cluster -p='{"spec": {"forceRedeploymentReason": "recovery-'"$( date --rfc-3339=ns )"'"}}' --type=merge 1
$ oc patch etcd/cluster --type=merge -p '{"spec": {"unsupportedConfigOverrides": null}}'
$ oc get etcd/cluster -oyaml
$ oc get etcd -o=jsonpath='{range .items[0].status.conditions[?(@.type=="NodeInstallerProgressing")]}{.reason}{"\n"}{.message}{"\n"}'
AllNodesAtLatestRevision 3 nodes are at revision 7 1
$ oc patch kubeapiserver cluster -p='{"spec": {"forceRedeploymentReason": "recovery-'"$( date --rfc-3339=ns )"'"}}' --type=merge
$ oc get kubeapiserver -o=jsonpath='{range .items[0].status.conditions[?(@.type=="NodeInstallerProgressing")]}{.reason}{"\n"}{.message}{"\n"}'
AllNodesAtLatestRevision 3 nodes are at revision 7 1
$ oc patch kubecontrollermanager cluster -p='{"spec": {"forceRedeploymentReason": "recovery-'"$( date --rfc-3339=ns )"'"}}' --type=merge
$ oc get kubecontrollermanager -o=jsonpath='{range .items[0].status.conditions[?(@.type=="NodeInstallerProgressing")]}{.reason}{"\n"}{.message}{"\n"}'
AllNodesAtLatestRevision 3 nodes are at revision 7 1
$ oc patch kubescheduler cluster -p='{"spec": {"forceRedeploymentReason": "recovery-'"$( date --rfc-3339=ns )"'"}}' --type=merge
$ oc get kubescheduler -o=jsonpath='{range .items[0].status.conditions[?(@.type=="NodeInstallerProgressing")]}{.reason}{"\n"}{.message}{"\n"}'
AllNodesAtLatestRevision 3 nodes are at revision 7 1
$ oc adm wait-for-stable-cluster
$ oc -n openshift-etcd get pods -l k8s-app=etcd
etcd-ip-10-0-143-125.ec2.internal 2/2 Running 0 9h etcd-ip-10-0-154-194.ec2.internal 2/2 Running 0 9h etcd-ip-10-0-173-171.ec2.internal 2/2 Running 0 9h
$ export KUBECONFIG=<installation_directory>/auth/kubeconfig
$ oc whoami
5.3.2.3.
5.3.2.4.
5.3.3.
5.3.3.1.
$ oc get csr
NAME AGE SIGNERNAME REQUESTOR CONDITION csr-2s94x 8m3s kubernetes.io/kubelet-serving system:node:<node_name> Pending 1 csr-4bd6t 8m3s kubernetes.io/kubelet-serving system:node:<node_name> Pending csr-4hl85 13m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending 2 csr-zhhhp 3m8s kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending ...
$ oc describe csr <csr_name> 1
$ oc adm certificate approve <csr_name>
$ oc adm certificate approve <csr_name>