diff --git a/main.go b/main.go index 747dbf4..a72c698 100644 --- a/main.go +++ b/main.go @@ -4,128 +4,8 @@ import ( "bufio" "fmt" "os" - "strings" ) -type Token struct { - Type string - Value string -} - -func (t Token) String() string { - return fmt.Sprintf("%s : '%s'", t.Type, t.Value) -} - -// Lexer embbed a temporary buffer to store "content" -// (url, description, tags) and an array of tokens -type Lexer struct { - Buf string - Tokens []Token -} - -func (l *Lexer) AddToken(t string, s string) { - l.Tokens = append(l.Tokens, Token{t, s}) -} - -// Return last tokenized item, useful to determine context for a symbol -func (l Lexer) LastToken() Token { - var lastToken Token - if len(l.Tokens) > 0 { - lastToken = l.Tokens[len(l.Tokens)-1] - } - return lastToken -} - -// Tokenize a given line s from the org file -func (l Lexer) Process(s string) []Token { - for i := range s { - char := string(s[i]) - switch char { - case "*": - l.AddToken("HEADER", char) - - case "[": - l.AddToken("OBRACKET", char) // this doesnt - - case "]": - // non-empty buffer and closing bracket means current state is out of "content" context and buffer can be tokenized - if len(l.Buf) > 0 { - l.AddToken("CONTENT", l.Buf) - l.Buf = "" - l.AddToken("CBRACKET", char) - } - - // whitespaces have different meaning given the context : Either separator or part of a content string - case " ": - lt := l.LastToken() - if len(l.Buf) > 0 && lt.Type != "OBRACKET" { - l.AddToken("CONTENT", l.Buf) - l.Buf = "" - l.AddToken("WHITESPACE", char) - } - - if i > 0 { - if string(s[i-1]) == " " { - break - } - } - - if lt.Type != "WHITESPACE" { - if len(l.Buf) == 0 { - l.AddToken("WHITESPACE", char) - } else { - l.Buf += char - } - } - - default: - l.Buf += char - } - } - - if len(l.Buf) > 0 { - l.AddToken("CONTENT", l.Buf) - } - - return l.Tokens -} - -// Only retrieve content tokens, ignores uneeded separators and brackets -func Parse(t []Token) []string { - var content []string - for i := range t { - token := t[i] - if token.Type == "CONTENT" { - content = append(content, token.Value) - } - } - - return content -} - -// Return the final feed string, depending on either the link has a description, tags or not -func FormatFeed(content []string) string { - var feed string - - if len(content) > 1 { - url := content[0] - feed = fmt.Sprintf("%s\n", url) - } - - if len(content) > 2 { - url := content[0] - tag := strings.ReplaceAll(content[2], ":", "") - comment := content[1] - feed = fmt.Sprintf("%s %s # %s\n", url, tag, comment) - } else { - url := content[0] - tag := strings.ReplaceAll(content[1], ":", "") - feed = fmt.Sprintf("%s %s\n", url, tag) - } - - return feed -} - func IsExistFile(path string) error { if _, err := os.Stat(path); os.IsNotExist(err) { message := fmt.Sprintf("File does not exist : %s", err) @@ -170,7 +50,7 @@ func main() { } tokens := lexer.Process(scanner.Text()) - feed := FormatFeed(Parse(tokens)) + feed := Parse(tokens).String() file.WriteString(feed) } diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..752bb4b --- /dev/null +++ b/main_test.go @@ -0,0 +1,57 @@ +package main + +import ( + "fmt" + "testing" +) + +func LexerTestWrapper(message string, expected string) string { + lexer := Lexer{} + return Parse(lexer.Process(message)).String() +} + +func LexerTestWrapperFail(expected string, result string) { + fmt.Println("Expected :", expected) + fmt.Println("Got : ", result) +} + +func TestLinkNoTag(t *testing.T) { + var message string = "** https://pleroma.social/announcements/feed.xml" + var expected string = "https://pleroma.social/announcements/feed.xml" + result := LexerTestWrapper(message, expected) + if result != expected { + LexerTestWrapperFail(expected, result) + t.Fail() + } +} + +func TestLinkTag(t *testing.T) { + var message string = "** https://pleroma.social/announcements/feed.xml :software:" + var expected string = "https://pleroma.social/announcements/feed.xml software" + result := LexerTestWrapper(message, expected) + if result != expected { + LexerTestWrapperFail(expected, result) + t.Fail() + } +} + +func TestLinkDescTag(t *testing.T) { + var message string = "** [[https://pleroma.social/announcements/feed.xml][Pleroma Social]] :software:" + var expected string = "https://pleroma.social/announcements/feed.xml # Pleroma Social" + var result string = LexerTestWrapper(message, expected) + if result != expected { + LexerTestWrapperFail(expected, result) + t.Fail() + } +} + + +func TestLinkDescNoTag(t *testing.T) { + var message string = "** [[https://pleroma.social/announcements/feed.xml][Pleroma Social]]" + var expected string = "https://pleroma.social/announcements/feed.xml # Pleroma Social" + var result string = LexerTestWrapper(message, expected) + if result != expected { + LexerTestWrapperFail(expected, result) + t.Fail() + } +} diff --git a/tokenizer.go b/tokenizer.go new file mode 100644 index 0000000..efa1b92 --- /dev/null +++ b/tokenizer.go @@ -0,0 +1,177 @@ +package main + +import ( + "fmt" + "strings" +) + +type Feed struct { + URL string + Description string + Tags []string +} + +// Return the final feed string, depending on either the link has a description, tags or not +func (f Feed) String() string { + var ff string + var tags string + + if len(f.Tags) > 0 { + for i := range f.Tags { + tags += " " + f.Tags[i] + } + } + + if f.Description == "" { + ff = fmt.Sprintf("%s %s", f.URL, tags) + } else { + ff = fmt.Sprintf("%s %s # %s", f.URL, f.Description, tags) + } + + return strings.TrimSpace(ff) +} + + +type Token struct { + Type string + Value string +} + +func (t Token) String() string { + return fmt.Sprintf("%s : '%s'", t.Type, t.Value) +} + +// Lexer embbed a temporary buffer to store "content" +// (url, description, tags) and an array of tokens +type Lexer struct { + Buf string + Tokens []Token +} + +func (l *Lexer) AddToken(t string, s string) { + l.Tokens = append(l.Tokens, Token{t, s}) +} + +// Return last tokenized item, useful to determine context for a symbol +func (l Lexer) LastToken() Token { + var lastToken Token + if len(l.Tokens) > 0 { + lastToken = l.Tokens[len(l.Tokens)-1] + } + return lastToken +} + +func (l Lexer) CountToken(t string) int { + var counter int + for i := range l.Tokens { + if l.Tokens[i].Value == t { + counter ++ + } + } + return counter +} + +func (l Lexer) IdentifyContent() string { + var tokenType string + if l.CountToken("[") == 2 { + tokenType = "URL" + } + if l.CountToken("[") == 3 { + tokenType = "DESC" + } else { + + } + return tokenType +} + +// Tokenize a given line s from the org file +func (l Lexer) Process(s string) []Token { + if strings.Count(s, "[") <= 0 { + ss := strings.Split(s, " ") + fmt.Println(ss) + for w := range ss { + if w == 0 { + l.AddToken("URL", ss[w]) + } else { + l.AddToken("TAG", ss[w]) + } + } + + return l.Tokens + } + + for i := range s { + char := string(s[i]) + switch char { + case "*": + l.AddToken("HEADER", char) + + case "[": + l.AddToken("OBRACKET", char) + + case "]": + // non-empty buffer and closing bracket means + // current state is out of "content" context and buffer can be tokenized + if len(l.Buf) > 0 { + var tokenType string = l.IdentifyContent() + l.AddToken("CBRACKET", char) + l.AddToken(tokenType, l.Buf) + l.Buf = "" + } + + // whitespaces have different meaning given the context : + // Either separator or part of a content string + case " ": + lt := l.LastToken() + if len(l.Buf) > 0 { + l.AddToken(l.IdentifyContent(), l.Buf) + l.Buf = "" + l.AddToken("WHITESPACE", char) + } + + if i > 0 { + if string(s[i-1]) == " " { + break + } + } + + if lt.Type != "WHITESPACE" { + if len(l.Buf) == 0 { + l.AddToken("WHITESPACE", char) + } else { + l.Buf += char + } + } + + default: + l.Buf += char + } + } + + if len(l.Buf) > 0 { + l.AddToken(l.IdentifyContent(), l.Buf) + } + + return l.Tokens +} + +// Only retrieve content tokens, ignores uneeded separators and brackets +func Parse(t []Token) Feed { + var f Feed + for i := range t { + token := t[i] + if token.Type == "URL" { + f.URL = token.Value + } + + if token.Type == "DESC" { + f.Description = token.Value + } + + if token.Type == "TAG" { + f.Tags = append(f.Tags, token.Value) + } + } + + return f +}