4.14.
4.14.1.
4.14.1.1.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4.14.2.
$ alias velero='oc -n openshift-adp exec deployment/velero -c velero -it -- ./velero'
4.14.3.
$ oc describe <velero_cr> <cr_name>
$ oc logs pod/<velero>
apiVersion: oadp.openshift.io/v1alpha1 kind: DataProtectionApplication metadata: name: velero-sample spec: configuration: velero: logLevel: warning
4.14.4.
$ oc -n openshift-adp exec deployment/velero -c velero -- ./velero \ <backup_restore_cr> <command> <cr_name>
$ oc -n openshift-adp exec deployment/velero -c velero -- ./velero \ backup describe 0e44ae00-5dc3-11eb-9ca8-df7e5254778b-2d8ql
$ oc -n openshift-adp exec deployment/velero -c velero -- ./velero \ --help
$ oc -n openshift-adp exec deployment/velero -c velero -- ./velero \ <backup_restore_cr> describe <cr_name>
$ oc -n openshift-adp exec deployment/velero -c velero -- ./velero \ backup describe 0e44ae00-5dc3-11eb-9ca8-df7e5254778b-2d8ql
$ oc -n openshift-adp exec deployment/velero -c velero -- ./velero \ <backup_restore_cr> logs <cr_name>
$ oc -n openshift-adp exec deployment/velero -c velero -- ./velero \ restore logs ccc7c2d0-6017-11eb-afab-85d0007f5a19-x4lbf
4.14.5.
4.14.5.1.
apiVersion: oadp.openshift.io/v1alpha1 kind: DataProtectionApplication ... configuration: velero: podConfig: resourceAllocations: 1 requests: cpu: 200m memory: 256Mi
4.14.5.2.
apiVersion: oadp.openshift.io/v1alpha1 kind: DataProtectionApplication ... configuration: restic: podConfig: resourceAllocations: 1 requests: cpu: 1000m memory: 16Gi
requests: cpu: 500m memory: 128Mi
4.14.6.
Velero: pod volume restore failed: data path restore failed: \ Failed to run kopia restore: Failed to copy snapshot data to the target: \ restore error: copy file: error creating file: \ open /host_pods/b4d...6/volumes/kubernetes.io~nfs/pvc-53...4e5/userdata/base/13493/2681: \ no such file or directory
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: nfs-client
provisioner: k8s-sigs.io/nfs-subdir-external-provisioner
parameters:
pathPattern: "${.PVC.namespace}/${.PVC.annotations.nfs.io/storage-path}" 1
onDelete: delete
4.14.7.
4.14.7.1.
4.14.7.1.1.
$ velero restore <restore_name> \ --from-backup=<backup_name> --include-resources \ service.serving.knavtive.dev
4.14.7.1.2.
$ oc get mutatingwebhookconfigurations
4.14.7.2.
4.14.7.2.1.
024-02-27T10:46:50.028951744Z time="2024-02-27T10:46:50Z" level=error msg="Error backing up item" backup=openshift-adp/<backup name> error="error executing custom action (groupResource=imagestreams.image.openshift.io, namespace=<BSL Name>, name=postgres): rpc error: code = Aborted desc = plugin panicked: runtime error: index out of range with length 1, stack trace: goroutine 94…
4.14.7.2.1.1.
$ oc label backupstoragelocations.velero.io <bsl_name> app.kubernetes.io/component=bsl
- 참고
$ oc -n openshift-adp get secret/oadp-<bsl_name>-<bsl_provider>-registry-secret -o json | jq -r '.data'
4.14.7.2.2.
4.14.7.2.2.1.
4.14.7.3.
4.14.8.
4.14.8.1.
4.14.8.2.
[default] 1 aws_access_key_id=AKIAIOSFODNN7EXAMPLE 2 aws_secret_access_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
4.14.9.
4.14.9.1.
$ oc get backupstoragelocations.velero.io -A
$ velero backup-location get -n <OADP_Operator_namespace>
$ oc get backupstoragelocations.velero.io -n <namespace> -o yaml
apiVersion: v1 items: - apiVersion: velero.io/v1 kind: BackupStorageLocation metadata: creationTimestamp: "2023-11-03T19:49:04Z" generation: 9703 name: example-dpa-1 namespace: openshift-adp-operator ownerReferences: - apiVersion: oadp.openshift.io/v1alpha1 blockOwnerDeletion: true controller: true kind: DataProtectionApplication name: example-dpa uid: 0beeeaff-0287-4f32-bcb1-2e3c921b6e82 resourceVersion: "24273698" uid: ba37cd15-cf17-4f7d-bf03-8af8655cea83 spec: config: enableSharedConfig: "true" region: us-west-2 credential: key: credentials name: cloud-credentials default: true objectStorage: bucket: example-oadp-operator prefix: example provider: aws status: lastValidationTime: "2023-11-10T22:06:46Z" message: "BackupStorageLocation \"example-dpa-1\" is unavailable: rpc error: code = Unknown desc = WebIdentityErr: failed to retrieve credentials\ncaused by: AccessDenied: Not authorized to perform sts:AssumeRoleWithWebIdentity\n\tstatus code: 403, request id: d3f2e099-70a0-467b-997e-ff62345e3b54" phase: Unavailable kind: List metadata: resourceVersion: ""
4.14.10.
4.14.10.1.
level=error msg="Error backing up item" backup=velero/monitoring error="timed out waiting for all PodVolumeBackups to complete"
apiVersion: oadp.openshift.io/v1alpha1 kind: DataProtectionApplication metadata: name: <dpa_name> spec: configuration: nodeAgent: enable: true uploaderType: restic timeout: 1h # ...
4.14.10.2.
apiVersion: oadp.openshift.io/v1alpha1 kind: DataProtectionApplication metadata: name: <dpa_name> spec: configuration: velero: resourceTimeout: 10m # ...
4.14.10.3.
apiVersion: oadp.openshift.io/v1alpha1 kind: DataProtectionApplication metadata: name: <dpa_name> spec: features: dataMover: timeout: 10m # ...
4.14.10.4.
apiVersion: velero.io/v1 kind: Backup metadata: name: <backup_name> spec: csiSnapshotTimeout: 10m # ...
4.14.10.5.
apiVersion: oadp.openshift.io/v1alpha1 kind: DataProtectionApplication metadata: name: <dpa_name> spec: configuration: velero: defaultItemOperationTimeout: 1h # ...
4.14.10.6.
apiVersion: velero.io/v1 kind: Restore metadata: name: <restore_name> spec: itemOperationTimeout: 1h # ...
4.14.10.7.
apiVersion: velero.io/v1 kind: Backup metadata: name: <backup_name> spec: itemOperationTimeout: 1h # ...
4.14.11.
4.14.11.1.
4.14.11.2.
$ oc -n {namespace} exec deployment/velero -c velero -- ./velero \ backup describe <backup>
$ oc delete backups.velero.io <backup> -n openshift-adp
$ velero backup describe <backup-name> --details
4.14.11.3.
time="2023-02-17T16:33:13Z" level=error msg="Error backing up item" backup=openshift-adp/user1-backup-check5 error="error executing custom action (groupResource=persistentvolumeclaims, namespace=busy1, name=pvc1-user1): rpc error: code = Unknown desc = failed to get volumesnapshotclass for storageclass ocs-storagecluster-ceph-rbd: failed to get volumesnapshotclass for provisioner openshift-storage.rbd.csi.ceph.com, ensure that the desired volumesnapshot class has the velero.io/csi-volumesnapshot-class label" logSource="/remote-source/velero/app/pkg/backup/backup.go:417" name=busybox-79799557b5-vprq
$ oc delete backups.velero.io <backup> -n openshift-adp
$ oc label volumesnapshotclass/<snapclass_name> velero.io/csi-volumesnapshot-class=true
4.14.12.
4.14.12.1.
apiVersion: oadp.openshift.io/v1alpha1 kind: DataProtectionApplication # ... spec: configuration: nodeAgent: enable: true uploaderType: restic supplementalGroups: - <group_id> 1 # ...
4.14.12.2.
$ oc delete resticrepository openshift-adp <name_of_the_restic_repository>
time="2021-12-29T18:29:14Z" level=info msg="1 errors encountered backup up item" backup=velero/backup65 logSource="pkg/backup/backup.go:431" name=mysql-7d99fc949-qbkds time="2021-12-29T18:29:14Z" level=error msg="Error backing up item" backup=velero/backup65 error="pod volume backup failed: error running restic backup, stderr=Fatal: unable to open config file: Stat: The specified key does not exist.\nIs there a repository at the following location?\ns3:http://minio-minio.apps.mayap-oadp- veleo-1234.qe.devcluster.openshift.com/mayapvelerooadp2/velero1/ restic/mysql-persistent\n: exit status 1" error.file="/remote-source/ src/github.com/vmware-tanzu/velero/pkg/restic/backupper.go:184" error.function="github.com/vmware-tanzu/velero/ pkg/restic.(*backupper).BackupPodVolumes" logSource="pkg/backup/backup.go:435" name=mysql-7d99fc949-qbkds
4.14.12.3.
\"level=error\" in line#2273: time=\"2023-06-12T06:50:04Z\" level=error msg=\"error restoring mysql-869f9f44f6-tp5lv: pods\\\ "mysql-869f9f44f6-tp5lv\\\" is forbidden: violates PodSecurity\\\ "restricted:v1.24\\\": privil eged (container \\\"mysql\\\ " must not set securityContext.privileged=true), allowPrivilegeEscalation != false (containers \\\ "restic-wait\\\", \\\"mysql\\\" must set securityContext.allowPrivilegeEscalation=false), unrestricted capabilities (containers \\\ "restic-wait\\\", \\\"mysql\\\" must set securityContext.capabilities.drop=[\\\"ALL\\\"]), seccompProfile (pod or containers \\\ "restic-wait\\\", \\\"mysql\\\" must set securityContext.seccompProfile.type to \\\ "RuntimeDefault\\\" or \\\"Localhost\\\")\" logSource=\"/remote-source/velero/app/pkg/restore/restore.go:1388\" restore=openshift-adp/todolist-backup-0780518c-08ed-11ee-805c-0a580a80e92c\n velero container contains \"level=error\" in line#2447: time=\"2023-06-12T06:50:05Z\" level=error msg=\"Namespace todolist-mariadb, resource restore error: error restoring pods/todolist-mariadb/mysql-869f9f44f6-tp5lv: pods \\\ "mysql-869f9f44f6-tp5lv\\\" is forbidden: violates PodSecurity \\\"restricted:v1.24\\\": privileged (container \\\ "mysql\\\" must not set securityContext.privileged=true), allowPrivilegeEscalation != false (containers \\\ "restic-wait\\\",\\\"mysql\\\" must set securityContext.allowPrivilegeEscalation=false), unrestricted capabilities (containers \\\ "restic-wait\\\", \\\"mysql\\\" must set securityContext.capabilities.drop=[\\\"ALL\\\"]), seccompProfile (pod or containers \\\ "restic-wait\\\", \\\"mysql\\\" must set securityContext.seccompProfile.type to \\\ "RuntimeDefault\\\" or \\\"Localhost\\\")\" logSource=\"/remote-source/velero/app/pkg/controller/restore_controller.go:510\" restore=openshift-adp/todolist-backup-0780518c-08ed-11ee-805c-0a580a80e92c\n]",
$ oc get dpa -o yaml
# ... configuration: restic: enable: true velero: args: restore-resource-priorities: 'securitycontextconstraints,customresourcedefinitions,namespaces,storageclasses,volumesnapshotclass.snapshot.storage.k8s.io,volumesnapshotcontents.snapshot.storage.k8s.io,volumesnapshots.snapshot.storage.k8s.io,datauploads.velero.io,persistentvolumes,persistentvolumeclaims,serviceaccounts,secrets,configmaps,limitranges,pods,replicasets.apps,clusterclasses.cluster.x-k8s.io,endpoints,services,-,clusterbootstraps.run.tanzu.vmware.com,clusters.cluster.x-k8s.io,clusterresourcesets.addons.cluster.x-k8s.io' 1 defaultPlugins: - gcp - openshift
4.14.13.
$ oc adm must-gather --image=registry.redhat.io/oadp/oadp-mustgather-rhel9:v1.4
$ oc adm must-gather --image=registry.redhat.io/oadp/oadp-mustgather-rhel9:v1.4 \ -- /usr/bin/gather_<time>_essential 1
$ oc adm must-gather --image=registry.redhat.io/oadp/oadp-mustgather-rhel9:v1.4 \ -- /usr/bin/gather_with_timeout <timeout> 1
$ oc adm must-gather --image=registry.redhat.io/oadp/oadp-mustgather-rhel9:v1.4 -- /usr/bin/gather_metrics_dump
4.14.13.1.
$ oc adm must-gather --image=registry.redhat.io/oadp/oadp-mustgather-rhel9:v1.4 -- /usr/bin/gather_without_tls <true/false>
4.14.13.2.
$ oc adm must-gather --image=registry.redhat.io/oadp/oadp-mustgather-rhel9:v1.4 -- skip_tls=true /usr/bin/gather_with_timeout <timeout_value_in_seconds>
$ oc adm must-gather --image=registry.redhat.io/oadp/oadp-mustgather-rhel9:v1.4 -- /usr/bin/gather_without_tls true
4.14.14.
4.14.14.1.
$ oc edit configmap cluster-monitoring-config -n openshift-monitoring
apiVersion: v1 data: config.yaml: | enableUserWorkload: true 1 kind: ConfigMap metadata: # ...
$ oc get pods -n openshift-user-workload-monitoring
NAME READY STATUS RESTARTS AGE prometheus-operator-6844b4b99c-b57j9 2/2 Running 0 43s prometheus-user-workload-0 5/5 Running 0 32s prometheus-user-workload-1 5/5 Running 0 32s thanos-ruler-user-workload-0 3/3 Running 0 32s thanos-ruler-user-workload-1 3/3 Running 0 32s
$ oc get configmap user-workload-monitoring-config -n openshift-user-workload-monitoring
Error from server (NotFound): configmaps "user-workload-monitoring-config" not found
apiVersion: v1 kind: ConfigMap metadata: name: user-workload-monitoring-config namespace: openshift-user-workload-monitoring data: config.yaml: |
$ oc apply -f 2_configure_user_workload_monitoring.yaml configmap/user-workload-monitoring-config created
4.14.14.2.
$ oc get svc -n openshift-adp -l app.kubernetes.io/name=velero
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE openshift-adp-velero-metrics-svc ClusterIP 172.30.38.244 <none> 8085/TCP 1h
apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: labels: app: oadp-service-monitor name: oadp-service-monitor namespace: openshift-adp spec: endpoints: - interval: 30s path: /metrics targetPort: 8085 scheme: http selector: matchLabels: app.kubernetes.io/name: "velero"
$ oc apply -f 3_create_oadp_service_monitor.yaml
servicemonitor.monitoring.coreos.com/oadp-service-monitor created
그림 4.1.
[D]
4.14.14.3.
apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: sample-oadp-alert namespace: openshift-adp spec: groups: - name: sample-oadp-backup-alert rules: - alert: OADPBackupFailing annotations: description: 'OADP had {{$value | humanize}} backup failures over the last 2 hours.' summary: OADP has issues creating backups expr: | increase(velero_backup_failure_total{job="openshift-adp-velero-metrics-svc"}[2h]) > 0 for: 5m labels: severity: warning
$ oc apply -f 4_create_oadp_alert_rule.yaml
prometheusrule.monitoring.coreos.com/sample-oadp-alert created
그림 4.2.
[D]
4.14.14.4.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4.14.14.5.
그림 4.3.
[D]