1
0
Files
org2newsboat/tokenizer.go
2021-03-04 17:04:17 +01:00

131 lines
2.5 KiB
Go

package main
import (
"fmt"
"regexp"
"strings"
)
type Feed struct {
URL string
Description string
Tags []string
}
// Return the final feed string, depending on either the link has a description, tags or not
func (f Feed) String() string {
var ff string
var tags string = strings.TrimSpace(strings.Join(f.Tags, " "))
ff = fmt.Sprintf("%s %s", f.URL, tags)
if f.Description != "" {
ff = fmt.Sprintf("%s # %s", strings.TrimSpace(ff), f.Description)
}
return strings.TrimSpace(ff)
}
type Token struct {
Type string
Value string
}
func (t Token) String() string {
return fmt.Sprintf("%s : '%s'", t.Type, t.Value)
}
// Lexer embbed a temporary buffer to store "content"
// (url, description, tags) and an array of tokens
type Lexer struct {
Buf string
Tokens []Token
}
func (l *Lexer) AddToken(t string, s string) {
l.Tokens = append(l.Tokens, Token{t, s})
}
// Return last tokenized item, useful to determine context for a symbol
func (l Lexer) LastToken() Token {
var lastToken Token
if len(l.Tokens) > 0 {
lastToken = l.Tokens[len(l.Tokens)-1]
}
return lastToken
}
func (l Lexer) CountToken(t string) int {
var counter int
for i := range l.Tokens {
if l.Tokens[i].Value == t {
counter ++
}
}
return counter
}
func (l *Lexer) ProcessSimpleLink(s string) {
ss := strings.Split(s[3:], " ")
for w := range ss {
if w == 0 {
url := strings.TrimSpace(ss[w])
l.AddToken("URL", url)
} else {
if ss[w] != "" && ss[w] != " " {
tag := strings.ReplaceAll(ss[w], ":", "")
l.AddToken("TAG", tag)
}
}
}
}
// Tokenize a given line s from the org file
func (l Lexer) Process(s string) []Token {
if s[3] != '[' {
l.ProcessSimpleLink(s)
return l.Tokens
}
re := regexp.MustCompile(`(?:\[\[)(?P<url>\S+)(?:\]\[)(?P<desc>.+)(?:\]\])(?P<tags>.+)?`)
matches := re.FindStringSubmatch(s)
if len(matches) > 1 {
l.AddToken("URL", strings.TrimSpace(matches[1]))
}
if len(matches) > 2 {
l.AddToken("DESC", strings.TrimSpace(matches[2]))
}
if len(matches) > 3 {
tags := strings.Split(matches[3], " ")
for t := range tags {
if tags[t] != "" && tags[t] != " " {
tag := strings.ReplaceAll(tags[t], ":", "")
l.AddToken("TAG", strings.TrimSpace(tag))
}
}
}
return l.Tokens
}
func Parse(t []Token) Feed {
var f Feed
for i := range t {
token := t[i]
if token.Type == "URL" {
f.URL = token.Value
}
if token.Type == "DESC" {
f.Description = token.Value
}
if token.Type == "TAG" {
f.Tags = append(f.Tags, token.Value)
}
}
return f
}