2024-09-19 08:03:23 +00:00
|
|
|
// Copyright 2024 Matthew Rich <matthewrich.conf@gmail.com>. All rights reserved.
|
|
|
|
|
|
|
|
package fan
|
|
|
|
|
|
|
|
import (
|
|
|
|
_ "context"
|
|
|
|
_ "encoding/json"
|
|
|
|
"fmt"
|
|
|
|
_ "gopkg.in/yaml.v3"
|
|
|
|
"net/url"
|
|
|
|
"decl/internal/transport"
|
|
|
|
"decl/internal/data"
|
|
|
|
"decl/internal/folio"
|
|
|
|
"archive/tar"
|
|
|
|
_ "regexp"
|
|
|
|
"io"
|
|
|
|
"io/fs"
|
|
|
|
"log"
|
|
|
|
"log/slog"
|
|
|
|
"path/filepath"
|
|
|
|
)
|
|
|
|
|
|
|
|
type Tar struct {
|
|
|
|
Uri folio.URI `yaml:"uri" json:"uri"`
|
|
|
|
parsedURI *url.URL `yaml:"-" json:"-"`
|
|
|
|
emitResource data.Resource `yaml:"-" json:"-"`
|
|
|
|
reader io.ReadCloser `yaml:"-" json:"-"`
|
|
|
|
writer io.WriteCloser `yaml:"-" json:"-"`
|
|
|
|
targetArchive *tar.Writer `yaml:"-" json:"-"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewTar() *Tar {
|
|
|
|
return &Tar{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
folio.DocumentRegistry.ConverterTypes.Register([]string{"tar"}, func(u *url.URL) data.Converter {
|
|
|
|
t := NewTar()
|
|
|
|
t.SetURI(u)
|
|
|
|
return t
|
|
|
|
})
|
|
|
|
|
|
|
|
folio.DocumentRegistry.ConverterTypes.RegisterContentType([]string{"tar", "tar.gz", "tgz"}, func(u *url.URL) data.Converter {
|
|
|
|
t := NewTar()
|
|
|
|
t.SetURI(u)
|
|
|
|
return t
|
|
|
|
})
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *Tar) Type() data.TypeName { return "tar" }
|
|
|
|
|
|
|
|
func (t *Tar) SetURI(u *url.URL) {
|
|
|
|
slog.Info("Tar.SetURI()", "tar", t)
|
|
|
|
u.Scheme = "file"
|
|
|
|
if u.Path == "" || u.Path == "-" {
|
|
|
|
} else {
|
|
|
|
fileAbsolutePath, _ := filepath.Abs(filepath.Join(u.Hostname(), u.Path))
|
|
|
|
u.Path = fileAbsolutePath
|
|
|
|
}
|
|
|
|
t.Uri.SetURL(u)
|
|
|
|
t.parsedURI = u
|
2024-10-04 00:30:49 +00:00
|
|
|
/*
|
|
|
|
exttype, fileext := t.Uri.Extension()
|
|
|
|
if exttype == "tgz" || fileext == "tgz" {
|
2024-09-19 08:03:23 +00:00
|
|
|
q := u.Query()
|
|
|
|
q.Set("gzip", string("true"))
|
|
|
|
u.RawQuery = q.Encode()
|
|
|
|
}
|
2024-10-04 00:30:49 +00:00
|
|
|
*/
|
2024-09-19 08:03:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (t *Tar) targetResource() (target data.Resource, err error) {
|
|
|
|
if t.emitResource == nil {
|
|
|
|
|
|
|
|
if t.emitResource, err = t.Uri.NewResource(nil); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var tarTargetFile data.FileResource = t.emitResource.(data.FileResource)
|
|
|
|
tarTargetFile.SetContentSourceRef(t.Uri.String())
|
2024-10-04 00:30:49 +00:00
|
|
|
tarTargetFile.SetGzipContent(true)
|
2024-09-19 08:03:23 +00:00
|
|
|
|
|
|
|
t.writer, err = tarTargetFile.(data.ContentReadWriter).ContentWriterStream()
|
|
|
|
if err == io.EOF {
|
|
|
|
slog.Info("Tar.targetResource() ContentWriterStream", "target", tarTargetFile, "tar", t.writer.(*transport.Writer), "error", err)
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
t.targetArchive = tar.NewWriter(t.writer)
|
|
|
|
slog.Info("Tar.targetResource() SetContentSourceRef", "target", tarTargetFile, "uri", t.Uri.String(), "tar", t.targetArchive, "error", err)
|
2024-10-04 00:30:49 +00:00
|
|
|
|
2024-09-19 08:03:23 +00:00
|
|
|
}
|
|
|
|
target = t.emitResource
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Convert a document of file resources to a tar file resource
|
|
|
|
func (t *Tar) Emit(document data.Document, filter data.ElementSelector) (resourceTarget data.Resource, err error) {
|
|
|
|
|
|
|
|
resourceTarget, err = t.targetResource()
|
|
|
|
|
|
|
|
slog.Info("Tar.Emit()", "writer", t.writer.(*transport.Writer), "error", err)
|
|
|
|
|
|
|
|
for _,res := range document.Filter(func(d data.Declaration) bool {
|
|
|
|
return d.ResourceType() == "file"
|
|
|
|
}) {
|
|
|
|
|
|
|
|
var f data.FileResource = res.(*folio.Declaration).Attributes.(data.FileResource)
|
|
|
|
|
|
|
|
//f.PathNormalization(true)
|
|
|
|
//err = f.NormalizePath()
|
|
|
|
|
|
|
|
fileInfo := f.FileInfo()
|
|
|
|
slog.Info("Tar.Emit() FileInfo", "fileinfo", fileInfo, "size", fileInfo.Size(), "file", f)
|
|
|
|
if fileInfo.Size() < 1 {
|
|
|
|
if len(f.GetContentSourceRef()) > 0 {
|
|
|
|
rs, _ := f.(data.ContentReader).ContentReaderStream()
|
|
|
|
info, _ := rs.Stat()
|
|
|
|
err = f.SetFileInfo(info)
|
|
|
|
slog.Info("Tar.Emit() Set FileInfo from ContentSourceRef", "fileinfo", f.FileInfo(), "file", f)
|
|
|
|
rs.Close()
|
|
|
|
} else {
|
|
|
|
if err = f.(data.Info).ReadStat(); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
slog.Info("Tar.Emit", "file", f, "size", fileInfo.Size(), "error", err)
|
|
|
|
hdr, fiErr := tar.FileInfoHeader(fileInfo, "")
|
|
|
|
|
|
|
|
if fileInfo.Mode() & fs.ModeSymlink != 0 {
|
|
|
|
hdr.Linkname = f.GetTarget()
|
|
|
|
}
|
|
|
|
|
|
|
|
slog.Info("Tar.Emit", "header", hdr, "size", fileInfo.Size(), "err", fiErr)
|
|
|
|
if err := t.targetArchive.WriteHeader(hdr); err != nil {
|
|
|
|
slog.Error("Tar.Emit() WriteHeader", "target", t.targetArchive, "header", hdr, "resource", f, "fileinfo", fileInfo, "error", err)
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if fileInfo.IsDir() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
slog.Info("Tar.Emit - writing resource to target archive", "target", t.targetArchive, "resource", f, "err", err)
|
|
|
|
if _, err := f.GetContent(t.targetArchive); err != nil {
|
|
|
|
slog.Error("Tar.Emit() Content", "target", t.targetArchive, "resource", f, "fileinfo", fileInfo, "error", err)
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
slog.Info("Tar.Emit - wrote", "resource", f, "err", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Convert a tar file resource to a document of file resources
|
|
|
|
func (t *Tar) Extract(resourceSource data.Resource, filter data.ElementSelector) (document data.Document, err error) {
|
|
|
|
document = folio.DocumentRegistry.NewDocument("")
|
|
|
|
var tarSourceFile data.FileResource = resourceSource.(data.FileResource)
|
|
|
|
//tarSourceFile := resourceSource.(*resource.File)
|
|
|
|
|
2024-10-04 00:30:49 +00:00
|
|
|
tarSourceFile.SetGzipContent(true)
|
2024-09-19 08:03:23 +00:00
|
|
|
t.reader, err = tarSourceFile.GetContent(nil)
|
|
|
|
sourceArchive := tar.NewReader(t.reader)
|
|
|
|
|
|
|
|
defer t.reader.Close()
|
|
|
|
|
|
|
|
for {
|
|
|
|
var hdr *tar.Header
|
|
|
|
hdr, err = sourceArchive.Next()
|
|
|
|
if err == io.EOF {
|
2024-10-04 00:30:49 +00:00
|
|
|
slog.Info("Tar.Extract() EOF", "source", sourceArchive)
|
2024-09-19 08:03:23 +00:00
|
|
|
err = nil
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if err != nil {
|
2024-10-04 00:30:49 +00:00
|
|
|
slog.Info("Tar.Extract() ERROR", "source", sourceArchive, "error", err)
|
2024-09-19 08:03:23 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
var fileResource data.Resource
|
|
|
|
uri := fmt.Sprintf("file://%s", hdr.Name)
|
|
|
|
if fileResource, err = document.(*folio.Document).NewResource(uri); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
var f data.FileResource = fileResource.(data.FileResource)
|
|
|
|
|
|
|
|
if err = f.SetFileInfo(hdr.FileInfo()); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
err = f.SetContent(sourceArchive)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *Tar) Close() (err error) {
|
|
|
|
if t.reader != nil {
|
|
|
|
if err = t.reader.Close(); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err = t.targetArchive.Close(); err == nil {
|
|
|
|
if t.writer != nil {
|
|
|
|
err = t.writer.Close()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|