package main import ( "regexp" "strings" ) // Lexer embbed a temporary buffer to store "content" // (url, description, tags) and an array of tokens type Lexer struct { Buf string Tokens []Token } func (l *Lexer) AddToken(t string, s string) { l.Tokens = append(l.Tokens, Token{t, s}) } // Return last tokenized item, useful to determine context for a symbol func (l Lexer) LastToken() Token { var lastToken Token if len(l.Tokens) > 0 { lastToken = l.Tokens[len(l.Tokens)-1] } return lastToken } func (l Lexer) CountToken(t string) int { var counter int for i := range l.Tokens { if l.Tokens[i].Value == t { counter ++ } } return counter } func (l *Lexer) ProcessSimpleLink(s string) { ss := strings.Split(s[3:], " ") for w := range ss { if w == 0 { url := strings.TrimSpace(ss[w]) l.AddToken("URL", url) } else { if ss[w] != "" && ss[w] != " " { tag := strings.ReplaceAll(ss[w], ":", "") l.AddToken("TAG", tag) } } } } // Tokenize a given line s from the org file func (l Lexer) Process(s string) []Token { if s[3] != '[' { l.ProcessSimpleLink(s) return l.Tokens } re := regexp.MustCompile(`(?:\[\[)(?P\S+)(?:\]\[)(?P.+)(?:\]\])(?P.+)?`) matches := re.FindStringSubmatch(s) if len(matches) > 1 { l.AddToken("URL", strings.TrimSpace(matches[1])) } if len(matches) > 2 { l.AddToken("DESC", strings.TrimSpace(matches[2])) } if len(matches) > 3 { tags := strings.Split(matches[3], " ") for t := range tags { if tags[t] != "" && tags[t] != " " { tag := strings.ReplaceAll(tags[t], ":", "") l.AddToken("TAG", strings.TrimSpace(tag)) } } } return l.Tokens }