jx/internal/fan/tar.go

213 lines
5.6 KiB
Go
Raw Permalink Normal View History

// Copyright 2024 Matthew Rich <matthewrich.conf@gmail.com>. All rights reserved.
package fan
import (
_ "context"
_ "encoding/json"
"fmt"
_ "gopkg.in/yaml.v3"
"net/url"
"decl/internal/transport"
"decl/internal/data"
"decl/internal/folio"
"archive/tar"
_ "regexp"
"io"
"io/fs"
"log"
"log/slog"
"path/filepath"
)
type Tar struct {
Uri folio.URI `yaml:"uri" json:"uri"`
parsedURI *url.URL `yaml:"-" json:"-"`
emitResource data.Resource `yaml:"-" json:"-"`
reader io.ReadCloser `yaml:"-" json:"-"`
writer io.WriteCloser `yaml:"-" json:"-"`
targetArchive *tar.Writer `yaml:"-" json:"-"`
}
func NewTar() *Tar {
return &Tar{}
}
func init() {
folio.DocumentRegistry.ConverterTypes.Register([]string{"tar"}, func(u *url.URL) data.Converter {
t := NewTar()
t.SetURI(u)
return t
})
folio.DocumentRegistry.ConverterTypes.RegisterContentType([]string{"tar", "tar.gz", "tgz"}, func(u *url.URL) data.Converter {
t := NewTar()
t.SetURI(u)
return t
})
}
func (t *Tar) Type() data.TypeName { return "tar" }
func (t *Tar) SetURI(u *url.URL) {
slog.Info("Tar.SetURI()", "tar", t)
u.Scheme = "file"
if u.Path == "" || u.Path == "-" {
} else {
fileAbsolutePath, _ := filepath.Abs(filepath.Join(u.Hostname(), u.Path))
u.Path = fileAbsolutePath
}
t.Uri.SetURL(u)
t.parsedURI = u
/*
exttype, fileext := t.Uri.Extension()
if exttype == "tgz" || fileext == "tgz" {
q := u.Query()
q.Set("gzip", string("true"))
u.RawQuery = q.Encode()
}
*/
}
func (t *Tar) targetResource() (target data.Resource, err error) {
if t.emitResource == nil {
if t.emitResource, err = t.Uri.NewResource(nil); err != nil {
return nil, err
}
var tarTargetFile data.FileResource = t.emitResource.(data.FileResource)
tarTargetFile.SetContentSourceRef(t.Uri.String())
tarTargetFile.SetGzipContent(true)
t.writer, err = tarTargetFile.(data.ContentReadWriter).ContentWriterStream()
if err == io.EOF {
slog.Info("Tar.targetResource() ContentWriterStream", "target", tarTargetFile, "tar", t.writer.(*transport.Writer), "error", err)
panic(err)
}
t.targetArchive = tar.NewWriter(t.writer)
slog.Info("Tar.targetResource() SetContentSourceRef", "target", tarTargetFile, "uri", t.Uri.String(), "tar", t.targetArchive, "error", err)
}
target = t.emitResource
return
}
// Convert a document of file resources to a tar file resource
func (t *Tar) Emit(document data.Document, filter data.ElementSelector) (resourceTarget data.Resource, err error) {
resourceTarget, err = t.targetResource()
slog.Info("Tar.Emit()", "writer", t.writer.(*transport.Writer), "error", err)
for _,res := range document.Filter(func(d data.Declaration) bool {
return d.ResourceType() == "file"
}) {
var f data.FileResource = res.(*folio.Declaration).Attributes.(data.FileResource)
//f.PathNormalization(true)
//err = f.NormalizePath()
fileInfo := f.FileInfo()
slog.Info("Tar.Emit() FileInfo", "fileinfo", fileInfo, "size", fileInfo.Size(), "file", f)
if fileInfo.Size() < 1 {
if len(f.GetContentSourceRef()) > 0 {
rs, _ := f.(data.ContentReader).ContentReaderStream()
info, _ := rs.Stat()
err = f.SetFileInfo(info)
slog.Info("Tar.Emit() Set FileInfo from ContentSourceRef", "fileinfo", f.FileInfo(), "file", f)
rs.Close()
} else {
if err = f.(data.Info).ReadStat(); err != nil {
return
}
}
}
slog.Info("Tar.Emit", "file", f, "size", fileInfo.Size(), "error", err)
hdr, fiErr := tar.FileInfoHeader(fileInfo, "")
if fileInfo.Mode() & fs.ModeSymlink != 0 {
hdr.Linkname = f.GetTarget()
}
slog.Info("Tar.Emit", "header", hdr, "size", fileInfo.Size(), "err", fiErr)
if err := t.targetArchive.WriteHeader(hdr); err != nil {
slog.Error("Tar.Emit() WriteHeader", "target", t.targetArchive, "header", hdr, "resource", f, "fileinfo", fileInfo, "error", err)
log.Fatal(err)
}
if fileInfo.IsDir() {
continue
}
slog.Info("Tar.Emit - writing resource to target archive", "target", t.targetArchive, "resource", f, "err", err)
if _, err := f.GetContent(t.targetArchive); err != nil {
slog.Error("Tar.Emit() Content", "target", t.targetArchive, "resource", f, "fileinfo", fileInfo, "error", err)
log.Fatal(err)
}
slog.Info("Tar.Emit - wrote", "resource", f, "err", err)
}
return
}
// Convert a tar file resource to a document of file resources
func (t *Tar) Extract(resourceSource data.Resource, filter data.ElementSelector) (document data.Document, err error) {
document = folio.DocumentRegistry.NewDocument("")
var tarSourceFile data.FileResource = resourceSource.(data.FileResource)
//tarSourceFile := resourceSource.(*resource.File)
tarSourceFile.SetGzipContent(true)
t.reader, err = tarSourceFile.GetContent(nil)
sourceArchive := tar.NewReader(t.reader)
defer t.reader.Close()
for {
var hdr *tar.Header
hdr, err = sourceArchive.Next()
if err == io.EOF {
slog.Info("Tar.Extract() EOF", "source", sourceArchive)
err = nil
break
}
if err != nil {
slog.Info("Tar.Extract() ERROR", "source", sourceArchive, "error", err)
return
}
var fileResource data.Resource
uri := fmt.Sprintf("file://%s", hdr.Name)
if fileResource, err = document.(*folio.Document).NewResource(uri); err != nil {
return
}
var f data.FileResource = fileResource.(data.FileResource)
if err = f.SetFileInfo(hdr.FileInfo()); err != nil {
return
}
err = f.SetContent(sourceArchive)
if err != nil {
return
}
}
return
}
func (t *Tar) Close() (err error) {
if t.reader != nil {
if err = t.reader.Close(); err != nil {
return
}
}
if err = t.targetArchive.Close(); err == nil {
if t.writer != nil {
err = t.writer.Close()
}
}
return
}