tagger/cmd/cli/main.go
Matthew Rich 14fda44c41
Some checks failed
Lint / golangci-lint (push) Failing after 10m25s
Declarative Tests / test (push) Failing after 1m59s
add artifacts
2024-04-04 13:37:54 -07:00

172 lines
3.6 KiB
Go

// Copyright 2024 Matthew Rich <matthewrich.conf@gmail.com>. All rights reserved.
package main
import (
"io"
_ "io/ioutil"
"fmt"
"strings"
"regexp"
"tagger/internal/models"
"tagger/internal/client"
"flag"
"net/http"
"golang.org/x/net/html"
"container/list"
_ "unicode"
"net/url"
)
type htmlNode html.Node
func metaKeywords(n *htmlNode) []string {
var name, content string
re := regexp.MustCompile(`[^-,\w\s]+`)
fmt.Printf("meta ")
for _, attribute := range n.Attr {
fmt.Printf(" %s=%s", attribute.Key, attribute.Val)
if attribute.Key == "name" {
if attribute.Val != "keywords" && attribute.Val != "title" && attribute.Val != "description" {
fmt.Printf("\n")
return []string{}
} else {
name = attribute.Val
}
}
if attribute.Key == "content" {
content,_ = url.PathUnescape(attribute.Val)
}
}
fmt.Printf("\n")
if name != "" {
var terms []string
if name == "keywords" {
terms = strings.Split(re.ReplaceAllString(content, ""), ",")
} else {
terms = []string{strings.ReplaceAll(re.ReplaceAllString(content, ""), ",", "")}
}
for i,t := range(terms) {
terms[i] = strings.ToLower(strings.ReplaceAll(strings.TrimSpace(t), " ", "-"))
}
return terms
//return strings.FieldsFunc(content, func(r rune) bool { return ! ( unicode.IsLetter(r) || unicode.IsNumber(r) || r == '-' ) })
}
return []string{}
}
func (h *htmlNode) FindAll(tagName string) []*htmlNode {
if h == nil {
return nil
}
return h.FindNodes(tagName, false)
}
func (h *htmlNode) Find(tagName string) *htmlNode {
if h == nil {
return nil
}
results := h.FindNodes(tagName, true)
if len(results) > 0 {
return results[0]
}
return nil
}
func (h *htmlNode) FindNodes(tagName string, first bool) []*htmlNode {
if h == nil {
return nil
}
n := (*html.Node)(h)
q := list.New()
var results []*htmlNode
q.PushBack(n)
for q.Len() > 0 {
v := (*html.Node)(q.Remove(q.Front()).(*html.Node))
if v.Type == html.ElementNode && v.Data == tagName {
results = append(results, (*htmlNode)(v))
if first {
break
}
}
for c := v.FirstChild; c != nil; c = c.NextSibling {
q.PushBack(c)
}
}
return results
}
func extractTagsMetaDataFromUrl(resource io.ReadCloser) ([]string, error) {
var results []string
doc,e := html.Parse(resource)
if e != nil {
panic(e)
}
for _,v := range (*htmlNode)(doc).Find("head").FindAll("meta") {
results = append(results, metaKeywords(v)...)
}
return results, nil
}
func main() {
tag := flag.String("tag", "tag-name", "Tag name")
//resource := flag.String("resource", "resource URL", "Resource URL")
endpoint := flag.String("endpoint", "http://localhost:8080/api/v1", "API endpoint URL")
extractUrl := flag.String("extract", "", "Extract tags from resource URL")
flag.Parse()
fmt.Printf("%#v\n", extractUrl)
cli := client.New(*endpoint)
if e := cli.Ping(); e != nil {
panic(e)
}
if len(*extractUrl) > 0 {
s,e := http.Get(*extractUrl)
if e != nil {
panic(e)
}
terms, e := extractTagsMetaDataFromUrl(s.Body)
// filter terms
fmt.Printf("%s\n", terms)
for _,t := range(terms) {
cli.AddTagResource(t, *extractUrl)
}
return
}
requestUrl := fmt.Sprintf("%s/tags/%s", *endpoint, *tag)
r,e := http.Get(requestUrl)
if e != nil {
panic(e)
}
defer r.Body.Close()
tagModel, e := models.NewTagFromJson(r.Body)
if e != nil {
//http.Error(w, err.Error(), http.StatusBadRequest)
}
fmt.Printf("%#v\n", tagModel)
}