commit e013b25bbb2abf4df31605cf24f0af59ed0ce37f Author: Ismo Vuorinen Date: Sun Feb 25 03:41:11 2024 +0200 Initial commit diff --git a/.github/LICENSE.md b/.github/LICENSE.md new file mode 100644 index 0000000..bffa737 --- /dev/null +++ b/.github/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Ismo Vuorinen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/.github/README.md b/.github/README.md new file mode 100644 index 0000000..0dacac6 --- /dev/null +++ b/.github/README.md @@ -0,0 +1,41 @@ +# PaperBoy + +CLI tool to generate a weekly grouped list of articles from defined RSS feeds. + +## Usage + +Create `template.md` and `config.yaml` in the same directory as the binary. + +There are examples of both of these files in the `examples` directory. + +## Configuration (`config.yaml`) + +```yaml +feeds: + - https://xkcd.com/atom.xml + - https://www.foxtrot.com/feed/ + - https://news.ycombinator.com/rss + - https://feeds.feedburner.com/codinghorror + - https://internetofshit.net/rss/ + - https://techcrunch.com/feed/ +template: template.md +output: output.md +``` + +## Template (`template.md`) + +```markdown +# Paperboy weekly newsletter + +--- + +--- + +Generated by [paperboy](https://github.com/ivuorinen/paperboy) +``` + +The contents is rendered between the two `---` lines. + +## License + +The MIT License (MIT) - see [LICENSE](LICENSE.md) for more details diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..df7f9f2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +output.md +config.yaml +template.md +!examples/* +paperboy diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..80cf87d --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +default: help + +VERSION?=dev + +.PHONY: help +help: # Show help for each of the Makefile recipes. + @grep -E '^[a-zA-Z0-9 -]+:.*#' Makefile | sort | while read -r l; do printf "\033[1;32m$$(echo $$l | cut -f 1 -d':')\033[00m:$$(echo $$l | cut -f 2- -d'#')\n"; done + +build: # Build the binary. Use VERSION (make build VERSION=1.2.3) to set the build version. + go build -o paperboy \ + -ldflags "-X main.version=${VERSION}" \ + main.go + diff --git a/examples/config.yaml b/examples/config.yaml new file mode 100644 index 0000000..0509fc1 --- /dev/null +++ b/examples/config.yaml @@ -0,0 +1,9 @@ +feeds: + - https://xkcd.com/atom.xml + - https://www.foxtrot.com/feed/ + - https://news.ycombinator.com/rss + - https://feeds.feedburner.com/codinghorror + - https://internetofshit.net/rss/ + - https://techcrunch.com/feed/ +template: template.md +output: output.md diff --git a/examples/template.md b/examples/template.md new file mode 100644 index 0000000..f876636 --- /dev/null +++ b/examples/template.md @@ -0,0 +1,7 @@ +# Paperboy weekly newsletter + +--- + +--- + +Generated by [paperboy](https://github.com/ivuorinen/paperboy) diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..01a6b00 --- /dev/null +++ b/go.mod @@ -0,0 +1,16 @@ +module github.com/ivuorinen/paperboy + +go 1.22.0 + +require ( + github.com/PuerkitoBio/goquery v1.8.0 // indirect + github.com/andybalholm/cascadia v1.3.1 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mmcdole/gofeed v1.2.1 // indirect + github.com/mmcdole/goxpp v1.1.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + golang.org/x/net v0.4.0 // indirect + golang.org/x/text v0.5.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..41986e7 --- /dev/null +++ b/go.sum @@ -0,0 +1,34 @@ +github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= +github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/mmcdole/gofeed v1.2.1 h1:tPbFN+mfOLcM1kDF1x2c/N68ChbdBatkppdzf/vDe1s= +github.com/mmcdole/gofeed v1.2.1/go.mod h1:2wVInNpgmC85q16QTTuwbuKxtKkHLCDDtf0dCmnrNr4= +github.com/mmcdole/goxpp v1.1.0 h1:WwslZNF7KNAXTFuzRtn/OKZxFLJAAyOA9w82mDz2ZGI= +github.com/mmcdole/goxpp v1.1.0/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.4.0 h1:Q5QPcMlvfxFTAPV0+07Xz/MpK9NTXu2VDUuy0FeMfaU= +golang.org/x/net v0.4.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM= +golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/main.go b/main.go new file mode 100644 index 0000000..4b8e6c0 --- /dev/null +++ b/main.go @@ -0,0 +1,203 @@ +// Copyright 2024 Ismo Vuorinen. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT +// +// Paperboy is a simple RSS feed reader that generates +// a Markdown file with the latest articles from multiple feeds. + +//go:build go1.16 +// +build go1.16 + +package main + +import ( + "cmp" + "fmt" + "log" + "net/url" + "os" + "regexp" + "slices" + "sort" + "strings" + "time" + + "github.com/mmcdole/gofeed" + "gopkg.in/yaml.v2" +) + +// Version and Build information +// These variables are set during build time +var ( + version string = "dev" + build string = time.Now().Format("20060102") +) + +// Config represents the structure of the YAML configuration file +type Config struct { + Template string `yaml:"template"` + Output string `yaml:"output"` + Feeds []string `yaml:"feeds"` +} + +// Article represents a feed article +type Article struct { + PublishAt time.Time + Title string + URL string + URLDomain string +} + +func main() { + log.Printf("Paperboy v.%s (build %s)", version, build) + + // Read YAML configuration file + configFile := "config.yaml" + configData, err := os.ReadFile(configFile) + if err != nil { + log.Fatalf("Error reading config.yaml file: %v", err) + } + + // Parse YAML configuration + var config Config + err = yaml.Unmarshal(configData, &config) + if err != nil { + log.Fatalf("Error parsing config.yaml file: %v", err) + } + + log.Printf("Feeds: %d", len(config.Feeds)) + + // Fetch articles from each feed URL + articlesByWeek := make(map[string][]Article) + var weeks []string + + for _, feedURL := range config.Feeds { + + log.Printf("Fetching articles from %s", feedURL) + + articles, err := fetchArticles(feedURL) + if err != nil { + log.Printf("Error fetching articles from %s: %v", feedURL, err) + continue + } + + log.Printf("-> Got %d articles", len(articles)) + + // Group articles by publish week + for _, article := range articles { + year, week := article.PublishAt.UTC().ISOWeek() + // Format week in the format "YYYY-WW" + // e.g. 2021-01 + id := fmt.Sprintf("%d-%02d", year, week) + articlesByWeek[id] = append(articlesByWeek[id], article) + + if !slices.Contains(weeks, id) { + weeks = append(weeks, id) + } + } + } + + // Sort weeks + sort.Strings(weeks) + slices.Reverse(weeks) + + // Generate Markdown output + output := generateMarkdown(config.Template, articlesByWeek, weeks) + + // Write Markdown output to file + outputFile := config.Output + err = os.WriteFile(outputFile, []byte(output), 0644) + if err != nil { + log.Fatalf("Error writing output file: %v", err) + } +} + +// fetchArticles fetches articles from a given feed URL +func fetchArticles(feedURL string) ([]Article, error) { + fp := gofeed.NewParser() + feed, err := fp.ParseURL(feedURL) + if err != nil { + return nil, fmt.Errorf("error fetching feed: %v", err) + } + + var articles []Article + for _, item := range feed.Items { + // Parse publish date + publishAt := item.PublishedParsed.UTC() + articleDomain := getURLDomain(item.Link) + + articles = append(articles, Article{ + Title: item.Title, + URL: item.Link, + PublishAt: publishAt, + URLDomain: articleDomain, + }) + } + + return articles, nil +} + +// generateMarkdown generates Markdown output with header and footer +func generateMarkdown(templateFile string, articlesByWeek map[string][]Article, weeks []string) string { + // Read template file + templateData, err := os.ReadFile(templateFile) + if err != nil { + log.Fatalf("Error reading template file: %v", err) + } + + // Split template into header and footer sections + templateParts := strings.SplitN(string(templateData), "---", 3) + if len(templateParts) != 3 { + log.Fatalf("Invalid template format") + } + + header := strings.TrimSpace(templateParts[0]) + footer := strings.TrimSpace(templateParts[2]) + + // Generate Markdown output + var output strings.Builder + output.WriteString(header) + output.WriteString("\n\n") + + for _, week := range weeks { + articles := articlesByWeek[week] + if len(articles) == 0 { + continue + } + + // Sort articles by publish date + slices.SortFunc(articles, func(a, b Article) int { + return cmp.Compare(a.PublishAt.Unix(), b.PublishAt.Unix()) + }) + + output.WriteString(fmt.Sprintf("## Week: %s\n\n", week)) + for _, article := range articles { + output.WriteString(fmt.Sprintf("- %s @ %s: [%s](%s)\n", article.PublishAt.Format("2006-01-02"), article.URLDomain, article.Title, article.URL)) + } + output.WriteString("\n") + + } + + output.WriteString(footer) + output.WriteString("\n") + + return output.String() +} + +// getURLDomain extracts the domain from a URL-like string +// e.g. "https://example.com" -> "example.com" +func getURLDomain(urlString string) string { + urlString = strings.TrimSpace(urlString) + + if regexp.MustCompile(`^https?`).MatchString(urlString) { + read, _ := url.Parse(urlString) + urlString = read.Host + } + + if regexp.MustCompile(`^www\.`).MatchString(urlString) { + urlString = regexp.MustCompile(`^www\.`).ReplaceAllString(urlString, "") + } + + return regexp.MustCompile(`([a-z0-9\-]+\.)+[a-z0-9\-]+`).FindString(urlString) +}