package main import ( "bufio" "fmt" "os" "strings" ) type Token struct { Type string Value string } func (t Token) String() string { return fmt.Sprintf("%s : '%s'", t.Type, t.Value) } // Lexer embbed a temporary buffer to store "content" // (url, description, tags) and an array of tokens type Lexer struct { Buf string Tokens []Token } func (l *Lexer) AddToken(t string, s string) { l.Tokens = append(l.Tokens, Token{t, s}) } // Return last tokenized item, useful to determine context for a symbol func (l Lexer) LastToken() Token { var lastToken Token if len(l.Tokens) > 0 { lastToken = l.Tokens[len(l.Tokens)-1] } return lastToken } // Tokenize a given line s from the org file func (l Lexer) Process(s string) []Token { for i := range s { char := string(s[i]) switch char { case "*": l.AddToken("HEADER", char) case "[": l.AddToken("OBRACKET", char) // this doesnt case "]": // non-empty buffer and closing bracket means current state is out of "content" context and buffer can be tokenized if len(l.Buf) > 0 { l.AddToken("CONTENT", l.Buf) l.Buf = "" l.AddToken("CBRACKET", char) } // whitespaces have different meaning given the context : Either separator or part of a content string case " ": lt := l.LastToken() if len(l.Buf) > 0 && lt.Type != "OBRACKET" { l.AddToken("CONTENT", l.Buf) l.Buf = "" l.AddToken("WHITESPACE", char) } if i > 0 { if string(s[i-1]) == " " { break } } if lt.Type != "WHITESPACE" { if len(l.Buf) == 0 { l.AddToken("WHITESPACE", char) } else { l.Buf += char } } default: l.Buf += char } } if len(l.Buf) > 0 { l.AddToken("CONTENT", l.Buf) } return l.Tokens } // Only retrieve content tokens, ignores uneeded separators and brackets func Parse(t []Token) []string { var content []string for i := range t { token := t[i] if token.Type == "CONTENT" { content = append(content, token.Value) } } return content } // Return the final feed string, depending on either the link has a description, tags or not func FormatFeed(content []string) string { var feed string if len(content) > 1 { url := content[0] feed = fmt.Sprintf("%s\n", url) } if len(content) > 2 { url := content[0] tag := strings.ReplaceAll(content[2], ":", "") comment := content[1] feed = fmt.Sprintf("%s %s # %s\n", url, tag, comment) } else { url := content[0] tag := strings.ReplaceAll(content[1], ":", "") feed = fmt.Sprintf("%s %s\n", url, tag) } return feed } func IsExistFile(path string) error { if _, err := os.Stat(path); os.IsNotExist(err) { message := fmt.Sprintf("File does not exist : %s", err) fmt.Println(message) os.Exit(1) } return nil } func CheckErrorFile(err error) error { if err != nil { message := fmt.Sprintf("Error creating file : %s", err) fmt.Println(message) os.Exit(1) } return nil } func main() { home := os.Getenv("HOME") orgFile := home + "/Documents/orgfiles/rss.org" newsboatFile := home + "/Config/private/newsboat/urls" IsExistFile(orgFile) data, err := os.Open(orgFile) CheckErrorFile(err) IsExistFile(newsboatFile) file, err := os.Create(newsboatFile) CheckErrorFile(err) var counter int lexer := Lexer{} scanner := bufio.NewScanner(data) for scanner.Scan() { counter++ // skip the first heading if counter == 1 { continue } tokens := lexer.Process(scanner.Text()) feed := FormatFeed(Parse(tokens)) file.WriteString(feed) } file.Close() data.Close() fmt.Printf("%d feed(s) written to the file (%s)\n", counter, newsboatFile) }