initial commit

2021-02-27 16:36:29 +01:00
commit ed3fe5a462
4 changed files with 199 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+bin/
--- a/10
+++ b/10
@ -0,0 +1,10 @@
+build:
+	go build -o bin/org2nb main.go
+
+install:
+	cp bin/org2nb ~/.local/bin/
+
+run:
+	go run main.go
+
+all: build install
--- a/README.md
+++ b/README.md
@ -0,0 +1,7 @@
+# org2newsboat
+
+Parse an [https://github.com/remyhonig/elfeed-org](elfeed-org) RSS file and write a `urls` file readable by Newsboat (Newsbeuter's successor). It exports the URL, tags and link description as a comment if it exists.
+
+# Installation
+
+`make install` to compile the binary and copy it in `~/.local/bin`
--- a/main.go
+++ b/main.go
@ -0,0 +1,181 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"strings"
+)
+
+type Token struct {
+	Type  string
+	Value string
+}
+
+func (t Token) String() string {
+	return fmt.Sprintf("%s : '%s'", t.Type, t.Value)
+}
+
+// Lexer embbed a temporary buffer to store "content"
+// (url, description, tags) and an array of tokens
+type Lexer struct {
+	Buf    string
+	Tokens []Token
+}
+
+func (l *Lexer) AddToken(t string, s string) {
+	l.Tokens = append(l.Tokens, Token{t, s})
+}
+
+// Return last tokenized item, useful to determine context for a symbol
+func (l Lexer) LastToken() Token {
+	var lastToken Token
+	if len(l.Tokens) > 0 {
+		lastToken = l.Tokens[len(l.Tokens)-1]
+	}
+	return lastToken
+}
+
+// Tokenize a given line s from the org file
+func (l Lexer) Process(s string) []Token {
+	for i := range s {
+		char := string(s[i])
+		switch char {
+		case "*":
+			l.AddToken("HEADER", char)
+
+		case "[":
+			l.AddToken("OBRACKET", char) // this doesnt
+
+		case "]":
+			// non-empty buffer and closing bracket means current state is out of "content" context and buffer can be tokenized
+			if len(l.Buf) > 0 {
+				l.AddToken("CONTENT", l.Buf)
+				l.Buf = ""
+				l.AddToken("CBRACKET", char)
+			}
+
+		// whitespaces have different meaning given the context : Either separator or part of a content string
+		case " ":
+			lt := l.LastToken()
+			if len(l.Buf) > 0 && lt.Type != "OBRACKET" {
+				l.AddToken("CONTENT", l.Buf)
+				l.Buf = ""
+				l.AddToken("WHITESPACE", char)
+			}
+
+			if i > 0 {
+				if string(s[i-1]) == " " {
+					break
+				}
+			}
+
+			if lt.Type != "WHITESPACE" {
+				if len(l.Buf) == 0 {
+					l.AddToken("WHITESPACE", char)
+				} else {
+					l.Buf += char
+				}
+			}
+
+		default:
+			l.Buf += char
+		}
+	}
+
+	if len(l.Buf) > 0 {
+		l.AddToken("CONTENT", l.Buf)
+	}
+
+	return l.Tokens
+}
+
+// Only retrieve content tokens, ignores uneeded separators and brackets
+func Parse(t []Token) []string {
+	var content []string
+	for i := range t {
+		token := t[i]
+		if token.Type == "CONTENT" {
+			content = append(content, token.Value)
+		}
+	}
+
+	return content
+}
+
+// Return the final feed string, depending on either the link has a description, tags or not
+func FormatFeed(content []string) string {
+	var feed string
+
+	if len(content) > 1 {
+		url := content[0]
+		feed = fmt.Sprintf("%s\n", url)
+	}
+
+	if len(content) > 2 {
+		url := content[0]
+		tag := strings.ReplaceAll(content[2], ":", "")
+		comment := content[1]
+		feed = fmt.Sprintf("%s %s # %s\n", url, tag, comment)
+	} else {
+		url := content[0]
+		tag := strings.ReplaceAll(content[1], ":", "")
+		feed = fmt.Sprintf("%s %s\n", url, tag)
+	}
+
+	return feed
+}
+
+func IsExistFile(path string) error {
+	if _, err := os.Stat(path); os.IsNotExist(err) {
+		message := fmt.Sprintf("File does not exist : %s", err)
+		fmt.Println(message)
+		os.Exit(1)
+	}
+	return nil
+}
+
+func CheckErrorFile(err error) error {
+	if err != nil {
+		message := fmt.Sprintf("Error creating file : %s", err)
+		fmt.Println(message)
+		os.Exit(1)
+	}
+	return nil
+}
+
+func main() {
+	home := os.Getenv("HOME")
+	orgFile := home + "/Documents/orgfiles/rss.org"
+	newsboatFile := home + "/Config/private/newsboat/urls"
+
+	IsExistFile(orgFile)
+	data, err := os.Open(orgFile)
+	CheckErrorFile(err)
+
+	IsExistFile(newsboatFile)
+	file, err := os.Create(newsboatFile)
+	CheckErrorFile(err)
+
+	var counter int
+	lexer := Lexer{}
+	scanner := bufio.NewScanner(data)
+
+	for scanner.Scan() {
+		counter++
+
+		// skip the first heading
+		if counter == 1 {
+			continue
+		}
+
+		tokens := lexer.Process(scanner.Text())
+		feed := FormatFeed(Parse(tokens))
+		file.WriteString(feed)
+	}
+
+	file.Close()
+	data.Close()
+
+	fmt.Printf("%d feed(s) written to the file (%s)\n", counter, newsboatFile)
+}