182 lines
3.5 KiB
Go
182 lines
3.5 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
)
|
|
|
|
type Token struct {
|
|
Type string
|
|
Value string
|
|
}
|
|
|
|
func (t Token) String() string {
|
|
return fmt.Sprintf("%s : '%s'", t.Type, t.Value)
|
|
}
|
|
|
|
// Lexer embbed a temporary buffer to store "content"
|
|
// (url, description, tags) and an array of tokens
|
|
type Lexer struct {
|
|
Buf string
|
|
Tokens []Token
|
|
}
|
|
|
|
func (l *Lexer) AddToken(t string, s string) {
|
|
l.Tokens = append(l.Tokens, Token{t, s})
|
|
}
|
|
|
|
// Return last tokenized item, useful to determine context for a symbol
|
|
func (l Lexer) LastToken() Token {
|
|
var lastToken Token
|
|
if len(l.Tokens) > 0 {
|
|
lastToken = l.Tokens[len(l.Tokens)-1]
|
|
}
|
|
return lastToken
|
|
}
|
|
|
|
// Tokenize a given line s from the org file
|
|
func (l Lexer) Process(s string) []Token {
|
|
for i := range s {
|
|
char := string(s[i])
|
|
switch char {
|
|
case "*":
|
|
l.AddToken("HEADER", char)
|
|
|
|
case "[":
|
|
l.AddToken("OBRACKET", char) // this doesnt
|
|
|
|
case "]":
|
|
// non-empty buffer and closing bracket means current state is out of "content" context and buffer can be tokenized
|
|
if len(l.Buf) > 0 {
|
|
l.AddToken("CONTENT", l.Buf)
|
|
l.Buf = ""
|
|
l.AddToken("CBRACKET", char)
|
|
}
|
|
|
|
// whitespaces have different meaning given the context : Either separator or part of a content string
|
|
case " ":
|
|
lt := l.LastToken()
|
|
if len(l.Buf) > 0 && lt.Type != "OBRACKET" {
|
|
l.AddToken("CONTENT", l.Buf)
|
|
l.Buf = ""
|
|
l.AddToken("WHITESPACE", char)
|
|
}
|
|
|
|
if i > 0 {
|
|
if string(s[i-1]) == " " {
|
|
break
|
|
}
|
|
}
|
|
|
|
if lt.Type != "WHITESPACE" {
|
|
if len(l.Buf) == 0 {
|
|
l.AddToken("WHITESPACE", char)
|
|
} else {
|
|
l.Buf += char
|
|
}
|
|
}
|
|
|
|
default:
|
|
l.Buf += char
|
|
}
|
|
}
|
|
|
|
if len(l.Buf) > 0 {
|
|
l.AddToken("CONTENT", l.Buf)
|
|
}
|
|
|
|
return l.Tokens
|
|
}
|
|
|
|
// Only retrieve content tokens, ignores uneeded separators and brackets
|
|
func Parse(t []Token) []string {
|
|
var content []string
|
|
for i := range t {
|
|
token := t[i]
|
|
if token.Type == "CONTENT" {
|
|
content = append(content, token.Value)
|
|
}
|
|
}
|
|
|
|
return content
|
|
}
|
|
|
|
// Return the final feed string, depending on either the link has a description, tags or not
|
|
func FormatFeed(content []string) string {
|
|
var feed string
|
|
|
|
if len(content) > 1 {
|
|
url := content[0]
|
|
feed = fmt.Sprintf("%s\n", url)
|
|
}
|
|
|
|
if len(content) > 2 {
|
|
url := content[0]
|
|
tag := strings.ReplaceAll(content[2], ":", "")
|
|
comment := content[1]
|
|
feed = fmt.Sprintf("%s %s # %s\n", url, tag, comment)
|
|
} else {
|
|
url := content[0]
|
|
tag := strings.ReplaceAll(content[1], ":", "")
|
|
feed = fmt.Sprintf("%s %s\n", url, tag)
|
|
}
|
|
|
|
return feed
|
|
}
|
|
|
|
func IsExistFile(path string) error {
|
|
if _, err := os.Stat(path); os.IsNotExist(err) {
|
|
message := fmt.Sprintf("File does not exist : %s", err)
|
|
fmt.Println(message)
|
|
os.Exit(1)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func CheckErrorFile(err error) error {
|
|
if err != nil {
|
|
message := fmt.Sprintf("Error creating file : %s", err)
|
|
fmt.Println(message)
|
|
os.Exit(1)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func main() {
|
|
home := os.Getenv("HOME")
|
|
orgFile := home + "/Documents/orgfiles/rss.org"
|
|
newsboatFile := home + "/Config/private/newsboat/urls"
|
|
|
|
IsExistFile(orgFile)
|
|
data, err := os.Open(orgFile)
|
|
CheckErrorFile(err)
|
|
|
|
IsExistFile(newsboatFile)
|
|
file, err := os.Create(newsboatFile)
|
|
CheckErrorFile(err)
|
|
|
|
var counter int
|
|
lexer := Lexer{}
|
|
scanner := bufio.NewScanner(data)
|
|
|
|
for scanner.Scan() {
|
|
counter++
|
|
|
|
// skip the first heading
|
|
if counter == 1 {
|
|
continue
|
|
}
|
|
|
|
tokens := lexer.Process(scanner.Text())
|
|
feed := FormatFeed(Parse(tokens))
|
|
file.WriteString(feed)
|
|
}
|
|
|
|
file.Close()
|
|
data.Close()
|
|
|
|
fmt.Printf("%d feed(s) written to the file (%s)\n", counter, newsboatFile)
|
|
}
|