package main import ( "fmt" "strings" ) type Feed struct { URL string Description string Tags []string } // Return the final feed string, depending on either the link has a description, tags or not func (f Feed) String() string { var ff string var tags string if len(f.Tags) > 0 { for i := range f.Tags { tags += " " + f.Tags[i] } } if f.Description == "" { ff = fmt.Sprintf("%s %s", f.URL, tags) } else { ff = fmt.Sprintf("%s %s # %s", f.URL, f.Description, tags) } return strings.TrimSpace(ff) } type Token struct { Type string Value string } func (t Token) String() string { return fmt.Sprintf("%s : '%s'", t.Type, t.Value) } // Lexer embbed a temporary buffer to store "content" // (url, description, tags) and an array of tokens type Lexer struct { Buf string Tokens []Token } func (l *Lexer) AddToken(t string, s string) { l.Tokens = append(l.Tokens, Token{t, s}) } // Return last tokenized item, useful to determine context for a symbol func (l Lexer) LastToken() Token { var lastToken Token if len(l.Tokens) > 0 { lastToken = l.Tokens[len(l.Tokens)-1] } return lastToken } func (l Lexer) CountToken(t string) int { var counter int for i := range l.Tokens { if l.Tokens[i].Value == t { counter ++ } } return counter } func (l Lexer) IdentifyContent() string { var tokenType string if l.CountToken("[") == 2 { tokenType = "URL" } if l.CountToken("[") == 3 { tokenType = "DESC" } else { } return tokenType } // Tokenize a given line s from the org file func (l Lexer) Process(s string) []Token { if strings.Count(s, "[") <= 0 { ss := strings.Split(s, " ") fmt.Println(ss) for w := range ss { if w == 0 { l.AddToken("URL", ss[w]) } else { l.AddToken("TAG", ss[w]) } } return l.Tokens } for i := range s { char := string(s[i]) switch char { case "*": l.AddToken("HEADER", char) case "[": l.AddToken("OBRACKET", char) case "]": // non-empty buffer and closing bracket means // current state is out of "content" context and buffer can be tokenized if len(l.Buf) > 0 { var tokenType string = l.IdentifyContent() l.AddToken("CBRACKET", char) l.AddToken(tokenType, l.Buf) l.Buf = "" } // whitespaces have different meaning given the context : // Either separator or part of a content string case " ": lt := l.LastToken() if len(l.Buf) > 0 { l.AddToken(l.IdentifyContent(), l.Buf) l.Buf = "" l.AddToken("WHITESPACE", char) } if i > 0 { if string(s[i-1]) == " " { break } } if lt.Type != "WHITESPACE" { if len(l.Buf) == 0 { l.AddToken("WHITESPACE", char) } else { l.Buf += char } } default: l.Buf += char } } if len(l.Buf) > 0 { l.AddToken(l.IdentifyContent(), l.Buf) } return l.Tokens } // Only retrieve content tokens, ignores uneeded separators and brackets func Parse(t []Token) Feed { var f Feed for i := range t { token := t[i] if token.Type == "URL" { f.URL = token.Value } if token.Type == "DESC" { f.Description = token.Value } if token.Type == "TAG" { f.Tags = append(f.Tags, token.Value) } } return f }