Skip to content

Commit

Permalink
feat: Adjust remote config dependency for Google Cloud Storage to be …
Browse files Browse the repository at this point in the history
…more inline with git.

Instead of copying the skaffold config from GCS the user specifies a source that signifies the bucket and directory structure to copy recursively. For example, specifying the source "gs://my-bucket/dir1/*" will copy all the files and subdirectories stored in "gs://my-bucket/dir1/". This requires the user to also specify the path to the skaffold config relative to the provided source. In the example mentioned, if the skaffold config was stored in "gs://my-bucket/dir1/configs/skaffold.yaml" then the path required would be "configs/skaffold.yaml".
  • Loading branch information
mattsanta committed Aug 30, 2023
1 parent 57a13dc commit 42f7e02
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 54 deletions.
5 changes: 3 additions & 2 deletions docs-v2/content/en/docs/design/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,11 @@ requires:
ref: main
- configs: ["cfg3"]
googleCloudStorage:
path: gs://storage-bucket/dir/skaffold.yaml
source: gs://my-bucket/dir1/*
path: config/skaffold.yaml
```

The environment variable `SKAFFOLD_REMOTE_CACHE_DIR` or flag `--remote-cache-dir` specifies the download location for all remote dependency contents. If undefined then it defaults to `~/.skaffold/remote-cache`. The remote cache directory consists of subdirectories with the contents retrieved from the remote dependency. For git dependencies the subdirectory name is a hash of the repo `uri` and the `branch/ref`. For Google Cloud Storage dependencies the subdirectory name is a hash of the `path`.
The environment variable `SKAFFOLD_REMOTE_CACHE_DIR` or flag `--remote-cache-dir` specifies the download location for all remote dependency contents. If undefined then it defaults to `~/.skaffold/remote-cache`. The remote cache directory consists of subdirectories with the contents retrieved from the remote dependency. For git dependencies the subdirectory name is a hash of the repo `uri` and the `branch/ref`. For Google Cloud Storage dependencies the subdirectory name is a hash of the `source`.

The remote config gets treated like a local config after substituting the path with the actual path in the cache directory.

Expand Down
51 changes: 23 additions & 28 deletions pkg/skaffold/gcs/gsutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"os"
"os/exec"
"path/filepath"
"strings"

"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/config"
"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/output/log"
Expand Down Expand Up @@ -56,33 +55,30 @@ func (g *Gsutil) Copy(ctx context.Context, src, dst string, recursive bool) erro
return nil
}

// SyncObject syncs the target Google Cloud Storage object with skaffold's local cache and returns the local path to the object.
func SyncObject(ctx context.Context, g latest.GoogleCloudStorageInfo, opts config.SkaffoldOptions) (string, error) {
// SyncObjects syncs the target Google Cloud Storage objects with skaffold's local cache and returns the local path to the objects.
func SyncObjects(ctx context.Context, g latest.GoogleCloudStorageInfo, opts config.SkaffoldOptions) (string, error) {
remoteCacheDir, err := config.GetRemoteCacheDir(opts)
if err != nil {
return "", fmt.Errorf("failed determining Google Cloud Storage object cache directory: %w", err)
return "", fmt.Errorf("failed determining remote cache directory: %w", err)
}
if err := os.MkdirAll(remoteCacheDir, 0700); err != nil {
return "", fmt.Errorf("failed creating Google Cloud Storage object cache directory: %w", err)
return "", fmt.Errorf("failed creating remote cache directory: %w", err)
}

subDir, err := getPerObjectCacheDir(g)
sourceDir, err := getPerSourceDir(g)
if err != nil {
return "", fmt.Errorf("failed determining Google Cloud Storage object cache directory for %s: %w", g.Path, err)
return "", fmt.Errorf("failed determining Google Cloud Storage remote cache directory for %q: %w", g.Source, err)
}
// Determine the name of the file to preserve it in the cache.
parts := strings.Split(g.Path, "/")
if len(parts) == 0 {
return "", fmt.Errorf("failed parsing Google Cloud Storage object %s", g.Path)
}
fileName := parts[len(parts)-1]
if len(fileName) == 0 {
return "", fmt.Errorf("failed parsing Google Cloud Storage object %s", g.Path)
}
cacheDir := filepath.Join(remoteCacheDir, subDir, fileName)
// If cache doesn't exist and cloning is disabled then we can't move forward.
if _, err := os.Stat(cacheDir); os.IsNotExist(err) && opts.SyncRemoteCache.CloneDisabled() {
return "", syncDisabledErr(g, cacheDir)
cacheDir := filepath.Join(remoteCacheDir, sourceDir)
if _, err := os.Stat(cacheDir); os.IsNotExist(err) {
// If cache doesn't exist and cloning is disabled then we can't move forward.
if opts.SyncRemoteCache.CloneDisabled() {
return "", syncDisabledErr(g, cacheDir)
}
// The subdirectory needs to exist to work with gsutil.
if err := os.MkdirAll(cacheDir, 0700); err != nil {
return "", fmt.Errorf("failed creating Google Cloud Storage cache directory for %q: %w", g.Source, err)
}
}
// If sync property is false then skip fetching latest object from remote storage.
if g.Sync != nil && !*g.Sync {
Expand All @@ -94,16 +90,15 @@ func SyncObject(ctx context.Context, g latest.GoogleCloudStorageInfo, opts confi
}

gcs := Gsutil{}
// Non-recursive since this should only download a single file - the remote Skaffold Config.
if err := gcs.Copy(ctx, g.Path, cacheDir, false); err != nil {
return "", fmt.Errorf("failed to cache Google Cloud Storage object %s: %w", g.Path, err)
if err := gcs.Copy(ctx, g.Source, cacheDir, true); err != nil {
return "", fmt.Errorf("failed to cache Google Cloud Storage objects from %q: %w", g.Source, err)
}
return cacheDir, nil
}

// getPerObjectCacheDir returns the directory used per Google Cloud Storage Object. Directory is a hash of the path provided.
func getPerObjectCacheDir(g latest.GoogleCloudStorageInfo) (string, error) {
inputs := []string{g.Path}
// getPerSourceDir returns the directory used per Google Cloud Storage source. Directory is a hash of the source provided.
func getPerSourceDir(g latest.GoogleCloudStorageInfo) (string, error) {
inputs := []string{g.Source}
hasher := sha256.New()
enc := json.NewEncoder(hasher)
if err := enc.Encode(inputs); err != nil {
Expand All @@ -115,15 +110,15 @@ func getPerObjectCacheDir(g latest.GoogleCloudStorageInfo) (string, error) {

// syncDisabledErr returns error to use when remote sync is turned off by the user and the Google Cloud Storage object doesn't exist inside the cache directory.
func syncDisabledErr(g latest.GoogleCloudStorageInfo, cacheDir string) error {
msg := fmt.Sprintf("cache directory %q for Google Cloud Storage object %q does not exist and remote cache sync is explicitly disabled via flag `--sync-remote-cache`", cacheDir, g.Path)
msg := fmt.Sprintf("cache directory %q for Google Cloud Storage source %q does not exist and remote cache sync is explicitly disabled via flag `--sync-remote-cache`", cacheDir, g.Source)
return sErrors.NewError(fmt.Errorf(msg),
&proto.ActionableErr{
Message: msg,
ErrCode: proto.StatusCode(proto.StatusCode_CONFIG_REMOTE_REPO_CACHE_NOT_FOUND_ERR),

Check failure on line 117 in pkg/skaffold/gcs/gsutil.go

View workflow job for this annotation

GitHub Actions / PR linters, checks

unnecessary conversion (unconvert)
Suggestions: []*proto.Suggestion{
{
SuggestionCode: proto.SuggestionCode_CONFIG_ENABLE_REMOTE_REPO_SYNC,
Action: fmt.Sprintf("Either download the Google Cloud Storage object manually to %q or set flag `--sync-remote-cache` to `always` or `missing`", cacheDir),
Action: fmt.Sprintf("Either download the Google Cloud Storage objects manually to %q or set flag `--sync-remote-cache` to `always` or `missing`", cacheDir),
},
},
})
Expand Down
34 changes: 17 additions & 17 deletions pkg/skaffold/gcs/gsutil_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ func TestCopy(t *testing.T) {
}

func TestSyncObject(t *testing.T) {
gcsPath := "gs://bucket/skaffold.yaml"
hash := "uJqp7MPtzRQ0g7nR-pH-O7RBEfCEt6aY"
localObjectCache := fmt.Sprintf("%s/skaffold.yaml", hash)
source := "gs://my-bucket/dir1/*"
path := "configs/skaffold.yaml"
sourceHash := "B7fSU6BUuHFJuLenty9ErOwyxPVqO5cB"

tests := []struct {
description string
Expand All @@ -93,51 +93,51 @@ func TestSyncObject(t *testing.T) {
}{
{
description: "first time copy succeeds",
g: latest.GoogleCloudStorageInfo{Path: gcsPath},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path},
syncFlag: "always",
expected: localObjectCache,
expected: sourceHash,
},
{
description: "first time copy fails",
g: latest.GoogleCloudStorageInfo{Path: gcsPath},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path},
gsutilErr: fmt.Errorf("not found"),
syncFlag: "always",
shouldErr: true,
},
{
description: "first time copy with sync off via flag fails",
g: latest.GoogleCloudStorageInfo{Path: gcsPath},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path},
syncFlag: "never",
shouldErr: true,
},
{
description: "existing copy update succeeds",
g: latest.GoogleCloudStorageInfo{Path: gcsPath},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path},
syncFlag: "always",
existing: true,
expected: localObjectCache,
expected: sourceHash,
},
{
description: "existing copy with sync off via flag succeeds",
g: latest.GoogleCloudStorageInfo{Path: gcsPath},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path},
syncFlag: "never",
existing: true,
expected: localObjectCache,
expected: sourceHash,
},
{
description: "existing copy with sync off succeeds",
g: latest.GoogleCloudStorageInfo{Path: gcsPath, Sync: util.Ptr(false)},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path, Sync: util.Ptr(false)},
syncFlag: "always",
existing: true,
expected: localObjectCache,
expected: sourceHash,
},
}

for _, test := range tests {
testutil.Run(t, test.description, func(t *testutil.T) {
td := t.NewTempDir()
if test.existing {
td.Touch(localObjectCache)
td.Touch(sourceHash)
}

syncRemote := &config.SyncRemoteCacheOption{}
Expand All @@ -146,13 +146,13 @@ func TestSyncObject(t *testing.T) {

var cmd *testutil.FakeCmd
if test.gsutilErr == nil {
cmd = testutil.CmdRunOut(fmt.Sprintf("gsutil cp %s %s", gcsPath, td.Path(localObjectCache)), "logs")
cmd = testutil.CmdRunOut(fmt.Sprintf("gsutil cp -r %s %s", source, td.Path(sourceHash)), "logs")
} else {
cmd = testutil.CmdRunOutErr(fmt.Sprintf("gsutil cp %s %s", gcsPath, td.Path(localObjectCache)), "logs", test.gsutilErr)
cmd = testutil.CmdRunOutErr(fmt.Sprintf("gsutil cp -r %s %s", source, td.Path(sourceHash)), "logs", test.gsutilErr)
}
t.Override(&util.DefaultExecCommand, cmd)

path, err := SyncObject(context.Background(), test.g, opts)
path, err := SyncObjects(context.Background(), test.g, opts)
var expected string
if !test.shouldErr {
expected = filepath.Join(td.Root(), test.expected)
Expand Down
10 changes: 5 additions & 5 deletions pkg/skaffold/parser/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,25 +353,25 @@ func cacheRepo(ctx context.Context, g latest.GitInfo, opts config.SkaffoldOption

// cacheGCSObject downloads the referenced Google Cloud Storage object to skaffold's cache if required and returns the path to the target configuration file.
func cacheGCSObject(ctx context.Context, g latest.GoogleCloudStorageInfo, opts config.SkaffoldOptions, r *record) (string, error) {
key := g.Path
key := g.Source
if p, found := r.cachedObjects[key]; found {
switch v := p.(type) {
case string:
return v, nil
return filepath.Join(v, g.Path), nil
case error:
return "", v
default:
log.Entry(context.TODO()).Fatalf("unable to check download status of Google Cloud Storage object at %s", g.Path)
log.Entry(context.TODO()).Fatalf("unable to check download status of Google Cloud Storage objects at %s", g.Source)
return "", nil
}
}
p, err := gcs.SyncObject(ctx, g, opts)
p, err := gcs.SyncObjects(ctx, g, opts)
if err != nil {
r.cachedObjects[key] = err
return "", err
}
r.cachedObjects[key] = p
return p, nil
return filepath.Join(p, g.Path), nil
}

// checkRevisit ensures that each config is activated with the same set of active profiles
Expand Down
7 changes: 5 additions & 2 deletions pkg/skaffold/schema/latest/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,12 @@ type GitInfo struct {
Sync *bool `yaml:"sync,omitempty"`
}

// GoogleCloudStorageInfo contains information on the origin of skaffold configurations stored in Google Cloud Storage.
// GoogleCloudStorageInfo contains information on the origin of skaffold configurations copied from Google Cloud Storage.
type GoogleCloudStorageInfo struct {
// Google Cloud Storage path to the skaffold configuration file. e.g. `gs://bucket/dir/skaffold.yaml`
// Source is the Google Cloud Storage objects to copy. e.g. `gs://my-bucket/dir1/dir2/*`.
Source string `yaml:"source,omitempty"`

// Path is the relative path from the source to the skaffold configuration file. e.g. `configs/skaffold.yaml`.
Path string `yaml:"path,omitempty"`

// Sync when set to `true` will reset the cached object to the latest remote version on every run.
Expand Down

0 comments on commit 42f7e02

Please sign in to comment.