Skip to content

Commit

Permalink
Add support for normalizing the archived files metadata
Browse files Browse the repository at this point in the history
When archiving files from different machines, even if the source content is the same, different archive files may be generated due to a variety of factors, like compression libraries or file metadata difference (permissions, etc.).

This creates problems in situations where it's expected that the exact same archive should always be generated from a given source content (typically, a Lambda function resource).

An example context that causes differences is Git on different O/Ss - filesystems may have different mount permission masks, and be the same for Git (which doesn't distinguish 0644 from 0664), but end up generating a different archive.

The `normalize_files_metadata` option makes the archiver produce a "normalized" archive, which solves the problem. In the Zip case, this is obtained by setting:

- the compression method to Store
- the modification date to a fixed one
- the permissions to 0644

Disabling compression is a tradeoff, however, in the context where this functionality is used (eg. Lambda function resources), the difference is negligible.
  • Loading branch information
64kramsystem committed Jul 5, 2019
1 parent 51d0ab4 commit 0bfacd0
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 32 deletions.
8 changes: 4 additions & 4 deletions archive/archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ import (
)

type Archiver interface {
ArchiveContent(content []byte, infilename string) error
ArchiveFile(infilename string) error
ArchiveDir(indirname string, excludes []string) error
ArchiveMultiple(content map[string][]byte) error
ArchiveContent(content []byte, infilename string, normalizeFilesMetadata bool) error
ArchiveFile(infilename string, normalizeFilesMetadata bool) error
ArchiveDir(indirname string, excludes []string, normalizeFilesMetadata bool) error
ArchiveMultiple(content map[string][]byte, normalizeFilesMetadata bool) error
}

type ArchiverBuilder func(filepath string) Archiver
Expand Down
17 changes: 12 additions & 5 deletions archive/data_source_archive_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ func dataSourceFile() *schema.Resource {
Required: true,
ForceNew: true,
},
"normalize_files_metadata": {
Type: schema.TypeBool,
Optional: true,
ForceNew: true,
Default: false,
},
"source": {
Type: schema.TypeSet,
Optional: true,
Expand Down Expand Up @@ -165,6 +171,7 @@ func expandStringList(configured []interface{}) []string {
func archive(d *schema.ResourceData) error {
archiveType := d.Get("type").(string)
outputPath := d.Get("output_path").(string)
normalizeFilesMetadata := d.Get("normalize_files_metadata").(bool)

archiver := getArchiver(archiveType, outputPath)
if archiver == nil {
Expand All @@ -175,21 +182,21 @@ func archive(d *schema.ResourceData) error {
if excludes, ok := d.GetOk("excludes"); ok {
excludeList := expandStringList(excludes.(*schema.Set).List())

if err := archiver.ArchiveDir(dir.(string), excludeList); err != nil {
if err := archiver.ArchiveDir(dir.(string), excludeList, normalizeFilesMetadata); err != nil {
return fmt.Errorf("error archiving directory: %s", err)
}
} else {
if err := archiver.ArchiveDir(dir.(string), []string{""}); err != nil {
if err := archiver.ArchiveDir(dir.(string), []string{""}, normalizeFilesMetadata); err != nil {
return fmt.Errorf("error archiving directory: %s", err)
}
}
} else if file, ok := d.GetOk("source_file"); ok {
if err := archiver.ArchiveFile(file.(string)); err != nil {
if err := archiver.ArchiveFile(file.(string), normalizeFilesMetadata); err != nil {
return fmt.Errorf("error archiving file: %s", err)
}
} else if filename, ok := d.GetOk("source_content_filename"); ok {
content := d.Get("source_content").(string)
if err := archiver.ArchiveContent([]byte(content), filename.(string)); err != nil {
if err := archiver.ArchiveContent([]byte(content), filename.(string), normalizeFilesMetadata); err != nil {
return fmt.Errorf("error archiving content: %s", err)
}
} else if v, ok := d.GetOk("source"); ok {
Expand All @@ -199,7 +206,7 @@ func archive(d *schema.ResourceData) error {
src := v.(map[string]interface{})
content[src["filename"].(string)] = []byte(src["content"].(string))
}
if err := archiver.ArchiveMultiple(content); err != nil {
if err := archiver.ArchiveMultiple(content, normalizeFilesMetadata); err != nil {
return fmt.Errorf("error archiving content: %s", err)
}
} else {
Expand Down
105 changes: 89 additions & 16 deletions archive/zip_archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@ package archive
import (
"archive/zip"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"sort"
"time"
)

const (
uint32max = (1 << 32) - 1
)

type ZipArchiver struct {
filepath string
filewriter *os.File
Expand All @@ -22,22 +27,35 @@ func NewZipArchiver(filepath string) Archiver {
}
}

func (a *ZipArchiver) ArchiveContent(content []byte, infilename string) error {
func (a *ZipArchiver) ArchiveContent(content []byte, infilename string, normalizeFilesMetadata bool) error {
if err := a.open(); err != nil {
return err
}
defer a.close()

f, err := a.writer.Create(filepath.ToSlash(infilename))
if err != nil {
return err
var f io.Writer
var err error

if normalizeFilesMetadata {
fh := prepareEmptyHeader(content, infilename)
normalizeCompressingFile(fh)

f, err = a.writer.CreateHeader(fh)
if err != nil {
return fmt.Errorf("error creating file inside archive: %s", err)
}
} else {
f, err = a.writer.Create(filepath.ToSlash(infilename))
if err != nil {
return err
}
}

_, err = f.Write(content)
return err
}

func (a *ZipArchiver) ArchiveFile(infilename string) error {
func (a *ZipArchiver) ArchiveFile(infilename string, normalizeFilesMetadata bool) error {
fi, err := assertValidFile(infilename)
if err != nil {
return err
Expand All @@ -58,9 +76,14 @@ func (a *ZipArchiver) ArchiveFile(infilename string) error {
return fmt.Errorf("error creating file header: %s", err)
}
fh.Name = filepath.ToSlash(fi.Name())
fh.Method = zip.Deflate
// fh.Modified alone isn't enough when using a zero value
fh.SetModTime(time.Time{})

if normalizeFilesMetadata {
normalizeCompressingFile(fh)
} else {
fh.Method = zip.Deflate
// fh.Modified alone isn't enough when using a zero value
fh.SetModTime(time.Time{})
}

f, err := a.writer.CreateHeader(fh)
if err != nil {
Expand All @@ -84,7 +107,38 @@ func checkMatch(fileName string, excludes []string) (value bool) {
return false
}

func (a *ZipArchiver) ArchiveDir(indirname string, excludes []string) error {
// The basic file header is very simple. The UncompressedSize logic is not a real-world use case
// in this context, but "640K ought to be enough for anybody".
//
// For reference, see golang/src/archive/zip/struct.go.
func prepareEmptyHeader(content []byte, infilename string) *zip.FileHeader {
fh := &zip.FileHeader{
Name: filepath.ToSlash(infilename),
UncompressedSize64: uint64(len(content)),
}

if fh.UncompressedSize64 > uint32max {
fh.UncompressedSize = uint32max
} else {
fh.UncompressedSize = uint32(fh.UncompressedSize64)
}

return fh
}

// Normalize the fields:
//
// - no compression, so the compressed stream is essentially a copy;
// - fixed date;
// - fixed file permissions.
//
func normalizeCompressingFile(fh *zip.FileHeader) {
fh.Method = zip.Store
fh.SetModTime(time.Date(1981, 4, 10, 0, 0, 0, 0, time.UTC))
fh.SetMode(0644)
}

func (a *ZipArchiver) ArchiveDir(indirname string, excludes []string, normalizeFilesMetadata bool) error {
_, err := assertValidDir(indirname)
if err != nil {
return err
Expand Down Expand Up @@ -128,9 +182,14 @@ func (a *ZipArchiver) ArchiveDir(indirname string, excludes []string) error {
return fmt.Errorf("error creating file header: %s", err)
}
fh.Name = filepath.ToSlash(relname)
fh.Method = zip.Deflate
// fh.Modified alone isn't enough when using a zero value
fh.SetModTime(time.Time{})

if normalizeFilesMetadata {
normalizeCompressingFile(fh)
} else {
fh.Method = zip.Deflate
// fh.Modified alone isn't enough when using a zero value
fh.SetModTime(time.Time{})
}

f, err := a.writer.CreateHeader(fh)
if err != nil {
Expand All @@ -145,7 +204,7 @@ func (a *ZipArchiver) ArchiveDir(indirname string, excludes []string) error {
})
}

func (a *ZipArchiver) ArchiveMultiple(content map[string][]byte) error {
func (a *ZipArchiver) ArchiveMultiple(content map[string][]byte, normalizeFilesMetadata bool) error {
if err := a.open(); err != nil {
return err
}
Expand All @@ -161,10 +220,24 @@ func (a *ZipArchiver) ArchiveMultiple(content map[string][]byte) error {
sort.Strings(keys)

for _, filename := range keys {
f, err := a.writer.Create(filepath.ToSlash(filename))
if err != nil {
return err
var f io.Writer
var err error

if normalizeFilesMetadata {
fh := prepareEmptyHeader(content[filename], filename)
normalizeCompressingFile(fh)

f, err = a.writer.CreateHeader(fh)
if err != nil {
return fmt.Errorf("error creating file inside archive: %s", err)
}
} else {
f, err = a.writer.Create(filepath.ToSlash(filename))
if err != nil {
return err
}
}

_, err = f.Write(content[filename])
if err != nil {
return err
Expand Down
60 changes: 53 additions & 7 deletions archive/zip_archiver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
func TestZipArchiver_Content(t *testing.T) {
zipfilepath := "archive-content.zip"
archiver := NewZipArchiver(zipfilepath)
if err := archiver.ArchiveContent([]byte("This is some content"), "content.txt"); err != nil {
if err := archiver.ArchiveContent([]byte("This is some content"), "content.txt", false); err != nil {
t.Fatalf("unexpected error: %s", err)
}

Expand All @@ -25,10 +25,20 @@ func TestZipArchiver_Content(t *testing.T) {
})
}

func TestZipArchiver_Content_WithNormalizedFilesMetadata(t *testing.T) {
zipfilepath := "archive-content.zip"
archiver := NewZipArchiver(zipfilepath)
if err := archiver.ArchiveContent([]byte("This is some content"), "content.txt", true); err != nil {
t.Fatalf("unexpected error: %s", err)
}

ensureFileChecksum(t, zipfilepath, "952e89afb0435cd5e01e3e4cdf22c5b0")
}

func TestZipArchiver_File(t *testing.T) {
zipfilepath := "archive-file.zip"
archiver := NewZipArchiver(zipfilepath)
if err := archiver.ArchiveFile("./test-fixtures/test-file.txt"); err != nil {
if err := archiver.ArchiveFile("./test-fixtures/test-file.txt", false); err != nil {
t.Fatalf("unexpected error: %s", err)
}

Expand All @@ -45,7 +55,7 @@ func TestZipArchiver_FileModified(t *testing.T) {

var zip = func() {
archiver := NewZipArchiver(zipFilePath)
if err := archiver.ArchiveFile(toZipPath); err != nil {
if err := archiver.ArchiveFile(toZipPath, false); err != nil {
t.Fatalf("unexpected error: %s", err)
}
}
Expand Down Expand Up @@ -75,10 +85,20 @@ func TestZipArchiver_FileModified(t *testing.T) {
}
}

func TestZipArchiver_File_WithNormalizedFilesMetadata(t *testing.T) {
zipfilepath := "archive-file.zip"
archiver := NewZipArchiver(zipfilepath)
if err := archiver.ArchiveFile("./test-fixtures/test-file.txt", true); err != nil {
t.Fatalf("unexpected error: %s", err)
}

ensureFileChecksum(t, zipfilepath, "86f7cb871bc437b8174fca96bf7a464f")
}

func TestZipArchiver_Dir(t *testing.T) {
zipfilepath := "archive-dir.zip"
archiver := NewZipArchiver(zipfilepath)
if err := archiver.ArchiveDir("./test-fixtures/test-dir", []string{""}); err != nil {
if err := archiver.ArchiveDir("./test-fixtures/test-dir", []string{""}, false); err != nil {
t.Fatalf("unexpected error: %s", err)
}

Expand All @@ -92,7 +112,7 @@ func TestZipArchiver_Dir(t *testing.T) {
func TestZipArchiver_Dir_Exclude(t *testing.T) {
zipfilepath := "archive-dir.zip"
archiver := NewZipArchiver(zipfilepath)
if err := archiver.ArchiveDir("./test-fixtures/test-dir", []string{"file2.txt"}); err != nil {
if err := archiver.ArchiveDir("./test-fixtures/test-dir", []string{"file2.txt"}, false); err != nil {
t.Fatalf("unexpected error: %s", err)
}

Expand All @@ -105,7 +125,7 @@ func TestZipArchiver_Dir_Exclude(t *testing.T) {
func TestZipArchiver_Dir_Exclude_With_Directory(t *testing.T) {
zipfilepath := "archive-dir.zip"
archiver := NewZipArchiver(zipfilepath)
if err := archiver.ArchiveDir("./test-fixtures/", []string{"test-dir", "test-dir2/file2.txt"}); err != nil {
if err := archiver.ArchiveDir("./test-fixtures/", []string{"test-dir", "test-dir2/file2.txt"}, false); err != nil {
t.Fatalf("unexpected error: %s", err)
}

Expand All @@ -116,6 +136,16 @@ func TestZipArchiver_Dir_Exclude_With_Directory(t *testing.T) {
})
}

func TestZipArchiver_Dir_WithNormalizedFilesMetadata(t *testing.T) {
zipfilepath := "archive-dir.zip"
archiver := NewZipArchiver(zipfilepath)
if err := archiver.ArchiveDir("./test-fixtures/test-dir", []string{""}, true); err != nil {
t.Fatalf("unexpected error: %s", err)
}

ensureFileChecksum(t, zipfilepath, "dfb9a8da8c73034f51a5c3c5d822e64b")
}

func TestZipArchiver_Multiple(t *testing.T) {
zipfilepath := "archive-content.zip"
content := map[string][]byte{
Expand All @@ -125,14 +155,30 @@ func TestZipArchiver_Multiple(t *testing.T) {
}

archiver := NewZipArchiver(zipfilepath)
if err := archiver.ArchiveMultiple(content); err != nil {
if err := archiver.ArchiveMultiple(content, false); err != nil {
t.Fatalf("unexpected error: %s", err)
}

ensureContents(t, zipfilepath, content)

}

func TestZipArchiver_Multiple_WithNormalizedFilesMetadata(t *testing.T) {
zipfilepath := "archive-content.zip"
content := map[string][]byte{
"file1.txt": []byte("This is file 1"),
"file2.txt": []byte("This is file 2"),
"file3.txt": []byte("This is file 3"),
}

archiver := NewZipArchiver(zipfilepath)
if err := archiver.ArchiveMultiple(content, true); err != nil {
t.Fatalf("unexpected error: %s", err)
}

ensureFileChecksum(t, zipfilepath, "dfb9a8da8c73034f51a5c3c5d822e64b")
}

func ensureContents(t *testing.T, zipfilepath string, wants map[string][]byte) {
r, err := zip.OpenReader(zipfilepath)
if err != nil {
Expand Down

0 comments on commit 0bfacd0

Please sign in to comment.