// Copyright 2024 Matthew Rich . All rights reserved. package fan import ( "context" _ "encoding/json" "fmt" _ "gopkg.in/yaml.v3" "net/url" "path/filepath" "decl/internal/codec" "decl/internal/folio" "decl/internal/data" _ "os" "io" "errors" "log/slog" "strings" ) /* Converts a file container an encoded (yaml, json, etc) JX document into a Document by using `Extract` or `ExtractMany`. Converts a JX Document structure into a yaml, json, etc encoded resource. */ type JxFile struct { Uri folio.URI `yaml:"uri,omitempty" json:"uri,omitempty"` url *url.URL `yaml:"-" json:"-"` emitResource data.Resource `yaml:"-" json:"-"` Path string `yaml:"path" json:"path"` Format codec.Format `yaml:"format,omitempty" json:"format,omitempty"` reader io.ReadCloser `yaml:"-" json:"-"` writer io.WriteCloser `yaml:"-" json:"-"` decoder codec.Decoder `yaml:"-" json:"-"` encoder codec.Encoder `yaml:"-" json:"-"` closer func() error `yaml:"-" json:"-"` index int `yaml:"-" json:"-"` } func NewJxFile() *JxFile { return &JxFile{ Format: codec.FormatYaml, index: 0, closer: func() error { return nil } } } func init() { folio.DocumentRegistry.ConverterTypes.Register([]string{"decl", "jx", "yaml", "yml", "json"}, func(u *url.URL) data.Converter { j := NewJxFile() j.SetURI(u) return j }) folio.DocumentRegistry.ConverterTypes.RegisterContentType([]string{"jx.yaml","jx.yml","jx.yaml.gz","jx.yml.gz", "jx.json", "jx.json.gz"}, func(u *url.URL) data.Converter { j := NewJxFile() slog.Info("JxFile.Factory", "jx", j) j.SetURI(u) slog.Info("JxFile.Factory", "jx", j) return j }) } /* Schemes: file, json, yaml, yml, decl, jx, http, https, other transport schemes? Format: URL scheme name, `format` query param, file extension If the input url is a file Detect Format */ func (j *JxFile) SetURI(u *url.URL) { slog.Info("JxFile.SetURI()", "jx", j) if ! errors.Is(j.Format.Set(u.Scheme), codec.ErrInvalidFormat) { u.Scheme = "file" q := u.Query() q.Set("format", string(j.Format)) u.RawQuery = q.Encode() } else { if format,ok := u.Query()["format"]; ok { _ = j.Format.Set(format[0]) } } if u.Scheme == "file" { if u.Path == "" || u.Path == "-" { j.Path = "-" } else { fileAbsolutePath, _ := filepath.Abs(filepath.Join(u.Hostname(), u.Path)) j.Path = fileAbsolutePath if _, err := u.Parse(j.Path); err != nil { panic(err) } } } else { j.Path = filepath.Join(u.Hostname(), u.RequestURI()) } j.Uri.SetURL(u) if j.Format == codec.FormatYaml { exttype, ext := j.Uri.Extension() if j.Format.Set(exttype) != nil { _ = j.Format.Set(ext) } } } func (j *JxFile) setencoder(target data.ContentIdentifier) { if formatErr := j.Format.Set(target.ContentType()); formatErr != nil { j.Format = codec.FormatYaml if format,ok := j.url.Query()["format"]; ok { if queryFormatErr := j.Format.Set(format[0]); queryFormatErr != nil { j.Format = codec.FormatYaml } } } if j.encoder == nil { j.encoder = codec.NewEncoder(j.writer, j.Format) } } func (j *JxFile) setdecoder(source data.ContentIdentifier) { if j.decoder == nil { for _,v := range strings.Split(source.ContentType(), ".") { _ = j.Format.Set(v) } slog.Info("JxFile.setdecoder()", "type", source.ContentType(), "format", j.Format) j.decoder = codec.NewDecoder(j.reader, j.Format) } slog.Info("JxFile.setdecoder()", "decoder", j.decoder) } func (j *JxFile) Type() data.TypeName { return "jx" } func (j *JxFile) Extract(resourceSource data.Resource, filter data.ElementSelector) (doc data.Document, err error) { if j.index == 0 { if resourceSource == nil { if len(j.Uri) > 0 { resourceSource, err = j.Uri.NewResource(nil) } else { return nil, ErrInvalidSource } } slog.Info("JxFile.Extract()", "source", resourceSource, "error", err) var jxSourceFile data.FileResource = resourceSource.(data.FileResource) j.reader, err = jxSourceFile.(data.ContentGetter).GetContent(nil) slog.Info("JxFile.Extract()", "jxfile", j, "error", err) if err != nil { return } j.setdecoder(jxSourceFile.(data.ContentIdentifier)) slog.Info("JxFile.Extract()", "jxfile", j) } uri := resourceSource.URI() if folio.DocumentRegistry.HasDocument(folio.URI(uri)) { uri = fmt.Sprintf("%s?index=%d", uri, j.index) } doc = folio.DocumentRegistry.NewDocument(folio.URI(uri)) err = j.decoder.Decode(doc) slog.Info("JxFile.Extract()", "doc", doc, "jxfile", j, "error", err) j.index++ if err != nil { return } if err = doc.Validate(); err != nil { return } return } func (j *JxFile) ExtractMany(resourceSource data.Resource, filter data.ElementSelector) (documents []data.Document, err error) { documents = make([]data.Document, 0, 100) defer j.Close() j.index = 0 for { var doc data.Document if doc, err = j.Extract(resourceSource, filter); err == nil { documents = append(documents, doc) } else { if errors.Is(err, io.EOF) { err = nil //documents = append(documents, doc) } break } slog.Info("JxFile.ExtractMany() loading", "document", j.index) } slog.Info("JxFile.ExtractMany()", "jxfile", j, "error", err) return } func (j *JxFile) targetResource() (target data.Resource, err error) { if j.emitResource == nil { targetUrl := j.Uri.Parse().URL() targetUrl.Scheme = "file" q := targetUrl.Query() q.Set("format", string(j.Format)) targetUrl.RawQuery = q.Encode() j.Uri.SetURL(targetUrl) slog.Info("JxFile.targetResource() SetURI", "uri", j.Uri, "targetUrl", targetUrl) j.url = targetUrl slog.Info("JxFile.targetResource()", "target", targetUrl, "jxfile", j) if j.emitResource, err = j.Uri.NewResource(nil); err != nil { return nil, err } var jxTargetFile data.FileResource = j.emitResource.(data.FileResource) jxTargetFile.SetContentSourceRef(j.Uri.String()) slog.Info("JxFile.targetResource() SetContentSourceRef", "target", jxTargetFile, "uri", j.Uri.String()) j.writer, err = jxTargetFile.(data.ContentReadWriter).ContentWriterStream() j.setencoder(j.emitResource.(data.ContentIdentifier)) } target = j.emitResource return } func (j *JxFile) Emit(document data.Document, filter data.ElementSelector) (resourceTarget data.Resource, err error) { ctx := context.Background() resourceTarget, err = j.targetResource() if err != nil { return } emitDoc := folio.DocumentRegistry.NewDocument("") if err = document.Validate(); err != nil { return } slog.Info("JxFile.Emit()", "document", document, "context", ctx) for _, declaration := range document.Filter(func (d data.Declaration) bool { if filter != nil { return filter(d.(*folio.Declaration).Attributes) } return true }) { //declaration.(*folio.Declaration).Resource().Read(ctx) // XXX added read here since it was removed from SetURI emitDoc.ResourceDeclarations = append(emitDoc.ResourceDeclarations, declaration.(*folio.Declaration)) } document.(*folio.Document).Format = j.Format slog.Info("Emit", "target", j, "encoder", j.encoder, "emit", emitDoc) if err = j.encoder.Encode(document); err != nil { slog.Info("Emit", "err", err) return } return } func (j *JxFile) EmitMany(documents []data.Document, filter data.ElementSelector) (resourceTarget data.Resource, err error) { for _, doc := range documents { if resourceTarget, err = j.Emit(doc, filter); err != nil { return } } return } func (j *JxFile) Close() (err error) { if j.closer != nil { err = j.closer() } if j.reader != nil { j.reader.Close() } if j.encoder != nil { j.encoder.Close() } if j.writer != nil { j.writer.Close() } return }