package main import ( "regexp" "strings" ) // Lexer embbed an array of token to retrieve them later for building urls type Lexer struct { Tokens []Token } func (l *Lexer) AddToken(t int, s string) { l.Tokens = append(l.Tokens, Token{t, s}) } // Return last tokenized item, useful to determine context for a symbol func (l Lexer) LastToken() Token { var lastToken Token if len(l.Tokens) > 0 { lastToken = l.Tokens[len(l.Tokens)-1] } return lastToken } func (l Lexer) CountToken(t string) int { var counter int for i := range l.Tokens { if l.Tokens[i].Value == t { counter ++ } } return counter } func (l *Lexer) ProcessSimpleLink(s string) { ss := strings.Split(s[3:], " ") for w := range ss { if w == 0 { url := strings.TrimSpace(ss[w]) l.AddToken(URL, url) } else { if ss[w] != "" && ss[w] != " " { tag := strings.ReplaceAll(ss[w], ":", "") l.AddToken(TAG, tag) } } } } // Tokenize a given line s from the org file func (l Lexer) Process(s string) []Token { if s[3] != '[' { l.ProcessSimpleLink(s) return l.Tokens } re := regexp.MustCompile(`(?:\[\[)(?P\S+)(?:\]\[)(?P.+)(?:\]\])(?P.+)?`) matches := re.FindStringSubmatch(s) if len(matches) > 1 { l.AddToken(URL, strings.TrimSpace(matches[1])) } if len(matches) > 2 { l.AddToken(DESCRIPTION, strings.TrimSpace(matches[2])) } if len(matches) > 3 { tags := strings.Split(matches[3], " ") for t := range tags { if tags[t] != "" && tags[t] != " " { tag := strings.ReplaceAll(tags[t], ":", "") l.AddToken(TAG, strings.TrimSpace(tag)) } } } return l.Tokens }