more tests for good boy points, split entities into files
This commit is contained in:
79
lexer.go
Normal file
79
lexer.go
Normal file
@ -0,0 +1,79 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Lexer embbed a temporary buffer to store "content"
|
||||
// (url, description, tags) and an array of tokens
|
||||
type Lexer struct {
|
||||
Buf string
|
||||
Tokens []Token
|
||||
}
|
||||
|
||||
func (l *Lexer) AddToken(t string, s string) {
|
||||
l.Tokens = append(l.Tokens, Token{t, s})
|
||||
}
|
||||
|
||||
// Return last tokenized item, useful to determine context for a symbol
|
||||
func (l Lexer) LastToken() Token {
|
||||
var lastToken Token
|
||||
if len(l.Tokens) > 0 {
|
||||
lastToken = l.Tokens[len(l.Tokens)-1]
|
||||
}
|
||||
return lastToken
|
||||
}
|
||||
|
||||
func (l Lexer) CountToken(t string) int {
|
||||
var counter int
|
||||
for i := range l.Tokens {
|
||||
if l.Tokens[i].Value == t {
|
||||
counter ++
|
||||
}
|
||||
}
|
||||
return counter
|
||||
}
|
||||
|
||||
func (l *Lexer) ProcessSimpleLink(s string) {
|
||||
ss := strings.Split(s[3:], " ")
|
||||
for w := range ss {
|
||||
if w == 0 {
|
||||
url := strings.TrimSpace(ss[w])
|
||||
l.AddToken("URL", url)
|
||||
} else {
|
||||
if ss[w] != "" && ss[w] != " " {
|
||||
tag := strings.ReplaceAll(ss[w], ":", "")
|
||||
l.AddToken("TAG", tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tokenize a given line s from the org file
|
||||
func (l Lexer) Process(s string) []Token {
|
||||
if s[3] != '[' {
|
||||
l.ProcessSimpleLink(s)
|
||||
return l.Tokens
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(`(?:\[\[)(?P<url>\S+)(?:\]\[)(?P<desc>.+)(?:\]\])(?P<tags>.+)?`)
|
||||
matches := re.FindStringSubmatch(s)
|
||||
if len(matches) > 1 {
|
||||
l.AddToken("URL", strings.TrimSpace(matches[1]))
|
||||
}
|
||||
if len(matches) > 2 {
|
||||
l.AddToken("DESC", strings.TrimSpace(matches[2]))
|
||||
}
|
||||
if len(matches) > 3 {
|
||||
tags := strings.Split(matches[3], " ")
|
||||
for t := range tags {
|
||||
if tags[t] != "" && tags[t] != " " {
|
||||
tag := strings.ReplaceAll(tags[t], ":", "")
|
||||
l.AddToken("TAG", strings.TrimSpace(tag))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return l.Tokens
|
||||
}
|
||||
Reference in New Issue
Block a user