// Copyright 2024 Matthew Rich . All rights reserved. package fan import ( _ "context" _ "encoding/json" "fmt" _ "gopkg.in/yaml.v3" "net/url" "decl/internal/transport" "decl/internal/data" "decl/internal/folio" "archive/tar" _ "regexp" "io" "io/fs" "log" "log/slog" "path/filepath" ) type Tar struct { Uri folio.URI `yaml:"uri" json:"uri"` parsedURI *url.URL `yaml:"-" json:"-"` emitResource data.Resource `yaml:"-" json:"-"` reader io.ReadCloser `yaml:"-" json:"-"` writer io.WriteCloser `yaml:"-" json:"-"` targetArchive *tar.Writer `yaml:"-" json:"-"` } func NewTar() *Tar { return &Tar{} } func init() { folio.DocumentRegistry.ConverterTypes.Register([]string{"tar"}, func(u *url.URL) data.Converter { t := NewTar() t.SetURI(u) return t }) folio.DocumentRegistry.ConverterTypes.RegisterContentType([]string{"tar", "tar.gz", "tgz"}, func(u *url.URL) data.Converter { t := NewTar() t.SetURI(u) return t }) } func (t *Tar) Type() data.TypeName { return "tar" } func (t *Tar) SetURI(u *url.URL) { slog.Info("Tar.SetURI()", "tar", t) u.Scheme = "file" if u.Path == "" || u.Path == "-" { } else { fileAbsolutePath, _ := filepath.Abs(filepath.Join(u.Hostname(), u.Path)) u.Path = fileAbsolutePath } t.Uri.SetURL(u) t.parsedURI = u /* exttype, fileext := t.Uri.Extension() if exttype == "tgz" || fileext == "tgz" { q := u.Query() q.Set("gzip", string("true")) u.RawQuery = q.Encode() } */ } func (t *Tar) targetResource() (target data.Resource, err error) { if t.emitResource == nil { if t.emitResource, err = t.Uri.NewResource(nil); err != nil { return nil, err } var tarTargetFile data.FileResource = t.emitResource.(data.FileResource) tarTargetFile.SetContentSourceRef(t.Uri.String()) tarTargetFile.SetGzipContent(true) t.writer, err = tarTargetFile.(data.ContentReadWriter).ContentWriterStream() if err == io.EOF { slog.Info("Tar.targetResource() ContentWriterStream", "target", tarTargetFile, "tar", t.writer.(*transport.Writer), "error", err) panic(err) } t.targetArchive = tar.NewWriter(t.writer) slog.Info("Tar.targetResource() SetContentSourceRef", "target", tarTargetFile, "uri", t.Uri.String(), "tar", t.targetArchive, "error", err) } target = t.emitResource return } // Convert a document of file resources to a tar file resource func (t *Tar) Emit(document data.Document, filter data.ElementSelector) (resourceTarget data.Resource, err error) { resourceTarget, err = t.targetResource() slog.Info("Tar.Emit()", "writer", t.writer.(*transport.Writer), "error", err) for _,res := range document.Filter(func(d data.Declaration) bool { return d.ResourceType() == "file" }) { var f data.FileResource = res.(*folio.Declaration).Attributes.(data.FileResource) //f.PathNormalization(true) //err = f.NormalizePath() fileInfo := f.FileInfo() slog.Info("Tar.Emit() FileInfo", "fileinfo", fileInfo, "size", fileInfo.Size(), "file", f) if fileInfo.Size() < 1 { if len(f.GetContentSourceRef()) > 0 { rs, _ := f.(data.ContentReader).ContentReaderStream() info, _ := rs.Stat() err = f.SetFileInfo(info) slog.Info("Tar.Emit() Set FileInfo from ContentSourceRef", "fileinfo", f.FileInfo(), "file", f) rs.Close() } else { if err = f.(data.Info).ReadStat(); err != nil { return } } } slog.Info("Tar.Emit", "file", f, "size", fileInfo.Size(), "error", err) hdr, fiErr := tar.FileInfoHeader(fileInfo, "") if fileInfo.Mode() & fs.ModeSymlink != 0 { hdr.Linkname = f.GetTarget() } slog.Info("Tar.Emit", "header", hdr, "size", fileInfo.Size(), "err", fiErr) if err := t.targetArchive.WriteHeader(hdr); err != nil { slog.Error("Tar.Emit() WriteHeader", "target", t.targetArchive, "header", hdr, "resource", f, "fileinfo", fileInfo, "error", err) log.Fatal(err) } if fileInfo.IsDir() { continue } slog.Info("Tar.Emit - writing resource to target archive", "target", t.targetArchive, "resource", f, "err", err) if _, err := f.GetContent(t.targetArchive); err != nil { slog.Error("Tar.Emit() Content", "target", t.targetArchive, "resource", f, "fileinfo", fileInfo, "error", err) log.Fatal(err) } slog.Info("Tar.Emit - wrote", "resource", f, "err", err) } return } // Convert a tar file resource to a document of file resources func (t *Tar) Extract(resourceSource data.Resource, filter data.ElementSelector) (document data.Document, err error) { document = folio.DocumentRegistry.NewDocument("") var tarSourceFile data.FileResource = resourceSource.(data.FileResource) //tarSourceFile := resourceSource.(*resource.File) tarSourceFile.SetGzipContent(true) t.reader, err = tarSourceFile.GetContent(nil) sourceArchive := tar.NewReader(t.reader) defer t.reader.Close() for { var hdr *tar.Header hdr, err = sourceArchive.Next() if err == io.EOF { slog.Info("Tar.Extract() EOF", "source", sourceArchive) err = nil break } if err != nil { slog.Info("Tar.Extract() ERROR", "source", sourceArchive, "error", err) return } var fileResource data.Resource uri := fmt.Sprintf("file://%s", hdr.Name) if fileResource, err = document.(*folio.Document).NewResource(uri); err != nil { return } var f data.FileResource = fileResource.(data.FileResource) if err = f.SetFileInfo(hdr.FileInfo()); err != nil { return } err = f.SetContent(sourceArchive) if err != nil { return } } return } func (t *Tar) Close() (err error) { if t.reader != nil { if err = t.reader.Close(); err != nil { return } } if err = t.targetArchive.Close(); err == nil { if t.writer != nil { err = t.writer.Close() } } return }