From 6f674981e004f7c499102622f18b256c13ab193a Mon Sep 17 00:00:00 2001 From: wayblink Date: Thu, 22 Aug 2024 11:44:27 +0800 Subject: [PATCH] Refine cross storage backup and add doc (#407) Signed-off-by: wayblink --- configs/backup.yaml | 11 ++-- core/backup_context.go | 32 +++++++--- core/backup_impl_create_backup.go | 11 ++-- core/backup_impl_restore_backup.go | 4 +- core/paramtable/params.go | 25 ++++---- core/storage/copier.go | 6 +- docs/cross_storage_backup_restore.md | 92 ++++++++++++++++++++++++++++ 7 files changed, 142 insertions(+), 39 deletions(-) create mode 100644 docs/cross_storage_backup_restore.md diff --git a/configs/backup.yaml b/configs/backup.yaml index 286ab555..ae5a4bb2 100644 --- a/configs/backup.yaml +++ b/configs/backup.yaml @@ -42,6 +42,12 @@ minio: backupBucketName: "a-bucket" # Bucket name to store backup data. Backup data will store to backupBucketName/backupRootPath backupRootPath: "backup" # Rootpath to store backup data. Backup data will store to backupBucketName/backupRootPath + # If you need to back up or restore data between two different storage systems, direct client-side copying is not supported. + # Set this option to true to enable data transfer through Milvus Backup. + # Note: This option will be automatically set to true if `minio.storageType` and `minio.backupStorageType` differ. + # However, if they are the same but belong to different services, you must manually set this option to `true`. + crossStorage: "false" + backup: maxSegmentGroupSize: 2G @@ -61,8 +67,3 @@ backup: enable: true seconds: 7200 address: http://localhost:9091 - - # If you need to backup or restore data between two different storage systems, - # direct client-side copying is not supported. - # Set this option to true to enable data transfer through Milvus Backup. - copyByServer: "false" diff --git a/core/backup_context.go b/core/backup_context.go index 96ec0649..d7441836 100644 --- a/core/backup_context.go +++ b/core/backup_context.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "path" "sync" "time" @@ -217,6 +218,10 @@ func (b *BackupContext) getBackupStorageClient() storage.ChunkManager { } func (b *BackupContext) getBackupCopier() *storage.Copier { + crossStorage := b.params.MinioCfg.CrossStorage + if b.getBackupStorageClient().Config().StorageType != b.getMilvusStorageClient().Config().StorageType { + crossStorage = true + } if b.backupCopier == nil { b.backupCopier = storage.NewCopier( b.getMilvusStorageClient(), @@ -224,13 +229,18 @@ func (b *BackupContext) getBackupCopier() *storage.Copier { storage.CopyOption{ WorkerNum: b.params.BackupCfg.BackupCopyDataParallelism, RPS: RPS, - CopyByServer: b.params.BackupCfg.CopyByServer, + CopyByServer: crossStorage, }) } return b.backupCopier } func (b *BackupContext) getRestoreCopier() *storage.Copier { + crossStorage := b.params.MinioCfg.CrossStorage + // force set copyByServer is true if two storage type is different + if b.getBackupStorageClient().Config().StorageType != b.getMilvusStorageClient().Config().StorageType { + crossStorage = true + } if b.restoreCopier == nil { b.restoreCopier = storage.NewCopier( b.getBackupStorageClient(), @@ -238,7 +248,7 @@ func (b *BackupContext) getRestoreCopier() *storage.Copier { storage.CopyOption{ WorkerNum: b.params.BackupCfg.BackupCopyDataParallelism, RPS: RPS, - CopyByServer: b.params.BackupCfg.CopyByServer, + CopyByServer: crossStorage, }) } return b.restoreCopier @@ -670,36 +680,38 @@ func (b *BackupContext) Check(ctx context.Context) string { "backup-rootpath: %s\n", version, b.milvusBucketName, b.milvusRootPath, b.backupBucketName, b.backupRootPath) - paths, _, err := b.getMilvusStorageClient().ListWithPrefix(ctx, b.milvusBucketName, b.milvusRootPath+SEPERATOR, false) + milvusFiles, _, err := b.getMilvusStorageClient().ListWithPrefix(ctx, b.milvusBucketName, b.milvusRootPath+SEPERATOR, false) if err != nil { return "Failed to connect to storage milvus path\n" + info + err.Error() } - if len(paths) == 0 { + if len(milvusFiles) == 0 { return "Milvus storage is empty. Please verify whether your cluster is really empty. If not, the configs(minio address, port, bucket, rootPath) may be wrong\n" + info } - paths, _, err = b.getBackupStorageClient().ListWithPrefix(ctx, b.backupBucketName, b.backupRootPath+SEPERATOR, false) + _, _, err = b.getBackupStorageClient().ListWithPrefix(ctx, b.backupBucketName, b.backupRootPath+SEPERATOR, false) if err != nil { return "Failed to connect to storage backup path " + info + err.Error() } - CHECK_PATH := "milvus_backup_check_" + time.Now().String() + checkSrcPath := path.Join(b.milvusRootPath, "milvus_backup_check_src_"+string(time.Now().Unix())) + checkDstPath := path.Join(b.backupRootPath, "milvus_backup_check_dst_"+string(time.Now().Unix())) - err = b.getMilvusStorageClient().Write(ctx, b.milvusBucketName, b.milvusRootPath+SEPERATOR+CHECK_PATH, []byte{1}) + err = b.getMilvusStorageClient().Write(ctx, b.milvusBucketName, checkSrcPath, []byte{1}) if err != nil { return "Failed to connect to storage milvus path\n" + info + err.Error() } defer func() { - b.getMilvusStorageClient().Remove(ctx, b.milvusBucketName, b.milvusRootPath+SEPERATOR+CHECK_PATH) + b.getMilvusStorageClient().Remove(ctx, b.milvusBucketName, checkSrcPath) }() - err = b.getMilvusStorageClient().Copy(ctx, b.milvusBucketName, b.backupBucketName, b.milvusRootPath+SEPERATOR+CHECK_PATH, b.backupRootPath+SEPERATOR+CHECK_PATH) + log.Debug("check copy", zap.String("srcBucket", b.milvusBucketName), zap.String("destBucket", b.backupBucketName), zap.String("key", checkSrcPath), zap.String("destKey", checkDstPath)) + err = b.getBackupCopier().Copy(ctx, checkSrcPath, checkDstPath, b.milvusBucketName, b.backupBucketName) if err != nil { return "Failed to copy file from milvus storage to backup storage\n" + info + err.Error() } defer func() { - b.getBackupStorageClient().Remove(ctx, b.backupBucketName, b.backupRootPath+SEPERATOR+CHECK_PATH) + b.getBackupStorageClient().Remove(ctx, b.backupBucketName, checkDstPath) }() return "Succeed to connect to milvus and storage.\n" + info diff --git a/core/backup_impl_create_backup.go b/core/backup_impl_create_backup.go index 672d7d56..9c5727c4 100644 --- a/core/backup_impl_create_backup.go +++ b/core/backup_impl_create_backup.go @@ -19,7 +19,6 @@ import ( "go.uber.org/zap" "github.com/zilliztech/milvus-backup/core/proto/backuppb" - "github.com/zilliztech/milvus-backup/core/storage" "github.com/zilliztech/milvus-backup/core/utils" "github.com/zilliztech/milvus-backup/internal/log" "github.com/zilliztech/milvus-backup/internal/util/retry" @@ -856,8 +855,8 @@ func (b *BackupContext) copySegment(ctx context.Context, backupBinlogPath string } err = retry.Do(ctx, func() error { - attr := storage.ObjectAttr{Key: binlog.GetLogPath()} - return b.getBackupCopier().Copy(ctx, attr, targetPath, b.milvusBucketName, b.backupBucketName) + path := binlog.GetLogPath() + return b.getBackupCopier().Copy(ctx, path, targetPath, b.milvusBucketName, b.backupBucketName) }, retry.Sleep(2*time.Second), retry.Attempts(5)) if err != nil { log.Info("Fail to copy file after retry", @@ -901,8 +900,8 @@ func (b *BackupContext) copySegment(ctx context.Context, backupBinlogPath string return errors.New("Binlog file not exist " + binlog.GetLogPath()) } err = retry.Do(ctx, func() error { - attr := storage.ObjectAttr{Key: binlog.GetLogPath()} - return b.getBackupCopier().Copy(ctx, attr, targetPath, b.milvusBucketName, b.backupBucketName) + path := binlog.GetLogPath() + return b.getBackupCopier().Copy(ctx, path, targetPath, b.milvusBucketName, b.backupBucketName) }, retry.Sleep(2*time.Second), retry.Attempts(5)) if err != nil { log.Info("Fail to copy file after retry", @@ -1076,7 +1075,7 @@ func (b *BackupContext) backupRBAC(ctx context.Context, backupInfo *backuppb.Bac Roles: roles, Grants: grants, } - + log.Info("backup RBAC", zap.Int("users", len(users)), zap.Int("roles", len(roles)), zap.Int("grants", len(grants))) b.meta.UpdateBackup(backupInfo.Id, setRBACMeta(rbacPb)) return nil diff --git a/core/backup_impl_restore_backup.go b/core/backup_impl_restore_backup.go index bf08571f..21064f59 100644 --- a/core/backup_impl_restore_backup.go +++ b/core/backup_impl_restore_backup.go @@ -16,7 +16,6 @@ import ( "go.uber.org/zap" "github.com/zilliztech/milvus-backup/core/proto/backuppb" - "github.com/zilliztech/milvus-backup/core/storage" "github.com/zilliztech/milvus-backup/core/utils" "github.com/zilliztech/milvus-backup/internal/common" "github.com/zilliztech/milvus-backup/internal/log" @@ -590,8 +589,7 @@ func (b *BackupContext) executeRestoreCollectionTask(ctx context.Context, backup tempFilekey := path.Join(tempDir, strings.Replace(file, b.params.MinioCfg.BackupRootPath, "", 1)) log.Debug("Copy temporary restore file", zap.String("from", file), zap.String("to", tempFilekey)) err := retry.Do(ctx, func() error { - attr := storage.ObjectAttr{Key: file} - return b.getRestoreCopier().Copy(ctx, attr, tempFilekey, backupBucketName, b.milvusBucketName) + return b.getRestoreCopier().Copy(ctx, file, tempFilekey, backupBucketName, b.milvusBucketName) }, retry.Sleep(2*time.Second), retry.Attempts(5)) if err != nil { log.Error("fail to copy backup date from backup bucket to restore target milvus bucket after retry", zap.Error(err)) diff --git a/core/paramtable/params.go b/core/paramtable/params.go index 80994dce..769f893f 100644 --- a/core/paramtable/params.go +++ b/core/paramtable/params.go @@ -43,8 +43,6 @@ type BackupConfig struct { GcPauseEnable bool GcPauseSeconds int GcPauseAddress string - - CopyByServer bool } func (p *BackupConfig) init(base *BaseTable) { @@ -58,7 +56,6 @@ func (p *BackupConfig) init(base *BaseTable) { p.initGcPauseEnable() p.initGcPauseSeconds() p.initGcPauseAddress() - p.initCopyByServer() } func (p *BackupConfig) initMaxSegmentGroupSize() { @@ -104,15 +101,6 @@ func (p *BackupConfig) initGcPauseAddress() { p.GcPauseAddress = address } -func (p *BackupConfig) initCopyByServer() { - copyByServer := p.Base.LoadWithDefault("backup.copyByServer", "false") - var err error - p.CopyByServer, err = strconv.ParseBool(copyByServer) - if err != nil { - panic("parse bool CopyByServer:" + err.Error()) - } -} - type MilvusConfig struct { Base *BaseTable @@ -229,6 +217,8 @@ type MinioConfig struct { BackupRootPath string BackupUseIAM bool BackupIAMEndpoint string + + CrossStorage bool } func (p *MinioConfig) init(base *BaseTable) { @@ -256,6 +246,8 @@ func (p *MinioConfig) init(base *BaseTable) { p.initBackupRootPath() p.initBackupUseIAM() p.initBackupIAMEndpoint() + + p.initCrossStorage() } func (p *MinioConfig) initAddress() { @@ -400,6 +392,15 @@ func (p *MinioConfig) initBackupRootPath() { p.BackupRootPath = rootPath } +func (p *MinioConfig) initCrossStorage() { + crossStorage := p.Base.LoadWithDefault("backup.crossStorage", "false") + var err error + p.CrossStorage, err = strconv.ParseBool(crossStorage) + if err != nil { + panic("parse bool CrossStorage:" + err.Error()) + } +} + type HTTPConfig struct { Base *BaseTable diff --git a/core/storage/copier.go b/core/storage/copier.go index 3d0afccb..e0404dd9 100644 --- a/core/storage/copier.go +++ b/core/storage/copier.go @@ -214,14 +214,14 @@ func (c *Copier) CopyPrefix(ctx context.Context, i CopyPathInput) error { return nil } -func (c *Copier) Copy(ctx context.Context, attr ObjectAttr, destPrefix, srcBucket, destBucket string) error { +func (c *Copier) Copy(ctx context.Context, srcPrefix, destPrefix, srcBucket, destBucket string) error { fn := c.selectCopyFn() - srcAttrs, err := c.getAttrs(ctx, srcBucket, attr.Key, "") + srcAttrs, err := c.getAttrs(ctx, srcBucket, srcPrefix, "") if err != nil { return fmt.Errorf("storage: copier get src attrs %w", err) } for _, srcAttr := range srcAttrs { - destKey := strings.Replace(srcAttr.Key, attr.Key, destPrefix, 1) + destKey := strings.Replace(srcAttr.Key, srcPrefix, destPrefix, 1) err := fn(ctx, srcAttr, destKey, srcBucket, destBucket) if err != nil { return err diff --git a/docs/cross_storage_backup_restore.md b/docs/cross_storage_backup_restore.md new file mode 100644 index 00000000..67af8196 --- /dev/null +++ b/docs/cross_storage_backup_restore.md @@ -0,0 +1,92 @@ +# Cross storage backup & restore + +Previously, Milvus-backup utilized the Copy API of the storage client to back up data. +This limited the backup capability to the same storage type as the Milvus cluster. +However, there's a significant demand for cross-storage backups—for instance, +backup data from Minio to a local disk or backup from in-house storage to cloud storage. + +Starting from version v0.4.21, Milvus-backup now supports cross-storage backups. +In this process, data is read from the source storage and written to the target storage through the Milvus-backup service. + +This feature is currently in Beta. `azure` is not supported. Not all storage types are fully tested. + +## Usage + +To enable cross-storage backup, you only need to adjust the configurations in backup.yaml. + +You can use `./milvus-backup check` first to see if the cross copy is working. + +For example + +*Back up data from Minio to a local disk*: + +```yaml +# Related configuration of minio, which is responsible for data persistence for Milvus. +minio: + storageType: "minio" + address: localhost + port: 9000 + accessKeyID: minioadmin + secretAccessKey: minioadmin + bucketName: "a-bucket" + rootPath: "files" + + backupStorageType: "local" + backupRootPath: "/root/backup/" +``` + +*Backup from Minio to S3* + +```yaml +minio: + storageType: "minio" + address: localhost + port: 9000 + accessKeyID: minioadmin + secretAccessKey: minioadmin + useSSL: false + useIAM: false + iamEndpoint: "" + bucketName: "a-bucket" + rootPath: "files" + + backupStorageType: "s3" + backupAddress: s3Address + backupPort: 443 + backupAccessKeyID: s3AccessKey + backupSecretAccessKey: s3SecretAccessKey + backupBucketName: "s3-bucket" + backupRootPath: "s3-backup-path" +``` + +*Backup from Minio A to Minio B* + +If the two storage locations are of the same type but belong to different services, +you need to add an additional configuration crossStorage=true to explicitly indicate that it is a cross-storage backup or restore operation. +```yaml +minio: + storageType: "minio" + address: addressA + port: 9000 + accessKeyID: userA + secretAccessKey: passwdB + useSSL: false + useIAM: false + iamEndpoint: "" + bucketName: "a-bucket" + rootPath: "files" + + backupStorageType: "minio" + backupAddress: addressB + backupPort: 9000 + backupAccessKeyID: userB + backupSecretAccessKey: passwdB + backupBucketName: "b-bucket" + backupRootPath: "backup" + + # If you need to back up or restore data between two different storage systems, direct client-side copying is not supported. + # Set this option to true to enable data transfer through Milvus Backup. + # Note: This option will be automatically set to true if `minio.storageType` and `minio.backupStorageType` differ. + # However, if they are the same but belong to different services, you must manually set this option to `true`. + crossStorage: "true" +```