more tests for good boy points, split entities into files

2021-03-04 17:30:38 +01:00
parent f2347b8801
commit 13a990bb4a
6 changed files with 83 additions and 53 deletions
--- a/lexer.go
+++ b/lexer.go
@ -0,0 +1,79 @@
+package main
+
+import (
+	"regexp"
+	"strings"
+)
+
+// Lexer embbed a temporary buffer to store "content"
+// (url, description, tags) and an array of tokens
+type Lexer struct {
+	Buf    string
+	Tokens []Token
+}
+
+func (l *Lexer) AddToken(t string, s string) {
+	l.Tokens = append(l.Tokens, Token{t, s})
+}
+
+// Return last tokenized item, useful to determine context for a symbol
+func (l Lexer) LastToken() Token {
+	var lastToken Token
+	if len(l.Tokens) > 0 {
+		lastToken = l.Tokens[len(l.Tokens)-1]
+	}
+	return lastToken
+}
+
+func (l Lexer) CountToken(t string) int {
+	var counter int
+	for i := range l.Tokens {
+		if l.Tokens[i].Value == t {
+			counter ++
+		}
+	}
+	return counter
+}
+
+func (l *Lexer) ProcessSimpleLink(s string) {
+	ss := strings.Split(s[3:], " ")
+	for w := range ss {
+		if w == 0 {
+			url := strings.TrimSpace(ss[w])
+			l.AddToken("URL", url)
+		} else {
+			if ss[w] != "" && ss[w] != " " {
+				tag := strings.ReplaceAll(ss[w], ":", "")
+				l.AddToken("TAG", tag)
+			}
+		}
+	}
+}
+
+// Tokenize a given line s from the org file
+func (l Lexer) Process(s string) []Token {
+	if s[3] != '[' {
+		l.ProcessSimpleLink(s)
+		return l.Tokens
+	}
+
+	re := regexp.MustCompile(`(?:\[\[)(?P<url>\S+)(?:\]\[)(?P<desc>.+)(?:\]\])(?P<tags>.+)?`)
+	matches := re.FindStringSubmatch(s)
+	if len(matches) > 1 {
+		l.AddToken("URL", strings.TrimSpace(matches[1]))
+	}
+	if len(matches) > 2 {
+		l.AddToken("DESC", strings.TrimSpace(matches[2]))
+	}
+	if len(matches) > 3 {
+		tags := strings.Split(matches[3], " ")
+		for t := range tags {
+			if tags[t] != "" && tags[t] != " " {
+				tag := strings.ReplaceAll(tags[t], ":", "")
+				l.AddToken("TAG", strings.TrimSpace(tag))
+			}
+		}
+	}
+	
+	return l.Tokens
+}