package main import ( "io" _ "io/ioutil" "fmt" "strings" "regexp" "tagger/internal/models" "tagger/internal/client" "flag" "net/http" "golang.org/x/net/html" "container/list" _ "unicode" "net/url" ) type htmlNode html.Node func metaKeywords(n *htmlNode) []string { var name, content string re := regexp.MustCompile(`[^-,\w\s]+`) fmt.Printf("meta ") for _, attribute := range n.Attr { fmt.Printf(" %s=%s", attribute.Key, attribute.Val) if attribute.Key == "name" { if attribute.Val != "keywords" && attribute.Val != "title" && attribute.Val != "description" { fmt.Printf("\n") return []string{} } else { name = attribute.Val } } if attribute.Key == "content" { content,_ = url.PathUnescape(attribute.Val) } } fmt.Printf("\n") if name != "" { var terms []string if name == "keywords" { terms = strings.Split(re.ReplaceAllString(content, ""), ",") } else { terms = []string{strings.ReplaceAll(re.ReplaceAllString(content, ""), ",", "")} } for i,t := range(terms) { terms[i] = strings.ToLower(strings.ReplaceAll(strings.TrimSpace(t), " ", "-")) } return terms //return strings.FieldsFunc(content, func(r rune) bool { return ! ( unicode.IsLetter(r) || unicode.IsNumber(r) || r == '-' ) }) } return []string{} } func (h *htmlNode) FindAll(tagName string) []*htmlNode { if h == nil { return nil } return h.FindNodes(tagName, false) } func (h *htmlNode) Find(tagName string) *htmlNode { if h == nil { return nil } results := h.FindNodes(tagName, true) if len(results) > 0 { return results[0] } return nil } func (h *htmlNode) FindNodes(tagName string, first bool) []*htmlNode { if h == nil { return nil } n := (*html.Node)(h) q := list.New() var results []*htmlNode q.PushBack(n) for q.Len() > 0 { v := (*html.Node)(q.Remove(q.Front()).(*html.Node)) if v.Type == html.ElementNode && v.Data == tagName { results = append(results, (*htmlNode)(v)) if first { break } } for c := v.FirstChild; c != nil; c = c.NextSibling { q.PushBack(c) } } return results } func extractTagsMetaDataFromUrl(resource io.ReadCloser) ([]string, error) { var results []string doc,e := html.Parse(resource) if e != nil { panic(e) } for _,v := range (*htmlNode)(doc).Find("head").FindAll("meta") { results = append(results, metaKeywords(v)...) } return results, nil } func main() { tag := flag.String("tag", "tag-name", "Tag name") //resource := flag.String("resource", "resource URL", "Resource URL") endpoint := flag.String("endpoint", "http://localhost:8080/api/v1", "API endpoint URL") extractUrl := flag.String("extract", "", "Extract tags from resource URL") flag.Parse() fmt.Printf("%#v\n", extractUrl) cli := client.New(*endpoint) if e := cli.Ping(); e != nil { panic(e) } if len(*extractUrl) > 0 { s,e := http.Get(*extractUrl) if e != nil { panic(e) } terms, e := extractTagsMetaDataFromUrl(s.Body) // filter terms fmt.Printf("%s\n", terms) for _,t := range(terms) { cli.AddTagResource(t, *extractUrl) } return } requestUrl := fmt.Sprintf("%s/tags/%s", *endpoint, *tag) r,e := http.Get(requestUrl) if e != nil { panic(e) } defer r.Body.Close() tagModel, e := models.NewTagFromJson(r.Body) if e != nil { //http.Error(w, err.Error(), http.StatusBadRequest) } fmt.Printf("%#v\n", tagModel) }