// Copyright 2024 Matthew Rich . All rights reserved. package source import ( _ "context" _ "encoding/json" _ "fmt" _ "gopkg.in/yaml.v3" "net/url" "path/filepath" "decl/internal/resource" "compress/gzip" "archive/tar" "regexp" "os" "io" ) type Tar struct { Path string `yaml:"path" json:"path"` } func NewTar() *Tar { return &Tar{} } func init() { SourceTypes.Register([]string{"tar"}, func(u *url.URL) DocSource { t := NewTar() t.Path,_ = filepath.Abs(filepath.Join(u.Hostname(), u.Path)) return t }) SourceTypes.Register([]string{"tar.gz", "tgz"}, func(u *url.URL) DocSource { t := NewTar() if u.Scheme == "file" { fileAbsolutePath, _ := filepath.Abs(filepath.Join(u.Hostname(), u.RequestURI())) t.Path = fileAbsolutePath } else { t.Path = filepath.Join(u.Hostname(), u.Path) } return t }) } func (t *Tar) Type() string { return "tar" } func (t *Tar) ExtractResources(filter ResourceSelector) ([]*resource.Document, error) { documents := make([]*resource.Document, 0, 100) d := resource.NewDocument() documents = append(documents, d) TarGzipFileName := regexp.MustCompile(`^.*\.(tar\.gz|tgz)$`) TarFileName := regexp.MustCompile(`^.*\.tar$`) file, fileErr := os.Open(t.Path) if fileErr != nil { return documents, fileErr } var gzipReader io.Reader switch t.Path { case TarGzipFileName.FindString(t.Path): zr, err := gzip.NewReader(file) if err != nil { return documents, err } gzipReader = zr fallthrough case TarFileName.FindString(t.Path): var fileReader io.Reader if gzipReader == nil { fileReader = file } else { fileReader = gzipReader } tarReader := tar.NewReader(fileReader) for { hdr, err := tarReader.Next() if err == io.EOF { break } if err != nil { return documents, err } f := resource.NewFile() f.Path = hdr.Name if fiErr := f.UpdateAttributesFromFileInfo(hdr.FileInfo()); fiErr != nil { return documents, fiErr } readFileData, readErr := io.ReadAll(tarReader) if readErr != nil { return documents, readErr } f.Content = string(readFileData) d.AddResourceDeclaration("file", f) } } return documents, nil }