diff --git a/README.md b/README.md index cfe2f4c..746e696 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,19 @@ func main() { panic(err) } + // read the replacement image + bs, err := os.ReadFile("./test/cameraman.jpg") + if err != nil { + panic(err) + } + + // replace the lenna image with the cameraman image + // image attributes, e.g. size, position, keep unchanged + err = doc.SetFile("word/media/image1.jpg", bs) + if err != nil { + panic(err) + } + // write out a new file err = doc.WriteToFile("replaced.docx") if err != nil { @@ -98,6 +111,13 @@ Although I do not recommend to do that as the WordprocessingML spec is somewhat But, for whatever reason there might be, you can do that. +#### Image replace +Image replacing is slightly different from text replacing. To replace an image, you need to know its path within the docx archive, rather than using a placeholder. + +To find the path, use unzip or similar tools to extract the contents of a docx file, then locate the image to be replaced inside the `word/media/` folder. Assume the path is `word/media/image1.jpg`, then you can use the `SetFile()` to overwrite the old image with a new one. It should be noted that: +- The image format (encoding) should keep the same during the replacement. +- Since the metadata of the image is not changed, only the image file itself is replaced, the new image will appear in its original location, with its original size. In other words, the image attributes keep unchanged. + ### ➤ Terminology To not cause too much confusion, here is a list of terms which you might come across. diff --git a/document.go b/document.go index 75ff8c1..e721727 100644 --- a/document.go +++ b/document.go @@ -26,6 +26,8 @@ var ( HeaderPathRegex = regexp.MustCompile(`word/header[0-9]*.xml`) // FooterPathRegex matches all footer files inside the docx-archive. FooterPathRegex = regexp.MustCompile(`word/footer[0-9]*.xml`) + // MediaPathRegex matches all media files inside the docx-archive. + MediaPathRegex = regexp.MustCompile(`word/media/*`) ) // Document exposes the main API of the library. It represents the actual docx document which is going to be modified. @@ -42,6 +44,8 @@ type Document struct { headerFiles []string // paths to all footer files inside the zip archive footerFiles []string + // paths to all media files inside the zip archive + mediaFiles []string // The document contains multiple files which eventually need a parser each. // The map key is the file path inside the document to which the parser belongs. runParsers map[string]*RunParser @@ -168,7 +172,7 @@ func (d *Document) GetPlaceHoldersList() ([]string, error) { for file := range d.files { if _, ok := d.runParsers[file]; !ok { return nil, fmt.Errorf("no parser for file %s", file) - } + } replacer := d.fileReplacers[file] placeholders := replacer.placeholders for _, placeholder := range placeholders { @@ -178,6 +182,7 @@ func (d *Document) GetPlaceHoldersList() ([]string, error) { return placeholdersTextList, nil } + // replace will create a parser on the given bytes, execute it and replace every placeholders found with the data // from the placeholderMap. func (d *Document) replace(placeholderMap PlaceholderMap, file string) ([]byte, error) { @@ -294,6 +299,7 @@ func (d *Document) SetFile(fileName string, fileBytes []byte) error { // - word/document.xml // - word/header*.xml // - word/footer*.xml +// - word/media/* func (d *Document) parseArchive() error { readZipFile := func(file *zip.File) []byte { readCloser, err := file.Open() @@ -320,6 +326,10 @@ func (d *Document) parseArchive() error { d.files[file.Name] = readZipFile(file) d.footerFiles = append(d.footerFiles, file.Name) } + if MediaPathRegex.MatchString(file.Name) { + d.files[file.Name] = readZipFile(file) + d.mediaFiles = append(d.mediaFiles, file.Name) + } } return nil } @@ -402,6 +412,7 @@ func (d *Document) Write(writer io.Writer) error { // isModifiedFile will look through all modified files and check if the searchFileName exists func (d *Document) isModifiedFile(searchFileName string) bool { allFiles := append(d.headerFiles, d.footerFiles...) + allFiles = append(allFiles, d.mediaFiles...) allFiles = append(allFiles, DocumentXml) for _, file := range allFiles { diff --git a/examples/simple/cameraman.jpg b/examples/simple/cameraman.jpg new file mode 100644 index 0000000..684c644 Binary files /dev/null and b/examples/simple/cameraman.jpg differ diff --git a/examples/simple/main.go b/examples/simple/main.go index c1ea63e..b188f7d 100644 --- a/examples/simple/main.go +++ b/examples/simple/main.go @@ -3,6 +3,7 @@ package main import ( "flag" "log" + "os" "time" "github.com/lukasjarosch/go-docx" @@ -48,6 +49,18 @@ func main() { log.Printf("replace took: %s", time.Since(startTime)) + bs, err := os.ReadFile("./cameraman.jpg") + if err != nil { + panic(err) + } + + err = doc.SetFile("word/media/image1.jpg", bs) + if err != nil { + panic(err) + } + + log.Printf("replace image took: %s", time.Since(startTime)) + err = doc.WriteToFile(outputPath) if err != nil { panic(err) diff --git a/examples/simple/template.docx b/examples/simple/template.docx index 3521cbd..a128b25 100644 Binary files a/examples/simple/template.docx and b/examples/simple/template.docx differ diff --git a/replace_test.go b/replace_test.go index 2e21081..4734e36 100644 --- a/replace_test.go +++ b/replace_test.go @@ -32,6 +32,18 @@ func TestReplacer_Replace(t *testing.T) { return } + bs, err := os.ReadFile("./test/cameraman.jpg") + if err != nil { + t.Error(err) + return + } + + err = doc.SetFile("word/media/image1.jpg", bs) + if err != nil { + t.Error("replacing image failed", err) + return + } + err = doc.WriteToFile("./test/out.docx") if err != nil { t.Error("unable to write", err) diff --git a/test/cameraman.jpg b/test/cameraman.jpg new file mode 100644 index 0000000..684c644 Binary files /dev/null and b/test/cameraman.jpg differ diff --git a/test/template.docx b/test/template.docx index 141e6bf..368d428 100644 Binary files a/test/template.docx and b/test/template.docx differ