Initial commit

This commit is contained in:
2024-02-25 03:41:11 +02:00
commit e013b25bbb
9 changed files with 349 additions and 0 deletions

21
.github/LICENSE.md vendored Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 Ismo Vuorinen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

41
.github/README.md vendored Normal file
View File

@@ -0,0 +1,41 @@
# PaperBoy
CLI tool to generate a weekly grouped list of articles from defined RSS feeds.
## Usage
Create `template.md` and `config.yaml` in the same directory as the binary.
There are examples of both of these files in the `examples` directory.
## Configuration (`config.yaml`)
```yaml
feeds:
- https://xkcd.com/atom.xml
- https://www.foxtrot.com/feed/
- https://news.ycombinator.com/rss
- https://feeds.feedburner.com/codinghorror
- https://internetofshit.net/rss/
- https://techcrunch.com/feed/
template: template.md
output: output.md
```
## Template (`template.md`)
```markdown
# Paperboy weekly newsletter
---
---
Generated by [paperboy](https://github.com/ivuorinen/paperboy)
```
The contents is rendered between the two `---` lines.
## License
The MIT License (MIT) - see [LICENSE](LICENSE.md) for more details

5
.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
output.md
config.yaml
template.md
!examples/*
paperboy

13
Makefile Normal file
View File

@@ -0,0 +1,13 @@
default: help
VERSION?=dev
.PHONY: help
help: # Show help for each of the Makefile recipes.
@grep -E '^[a-zA-Z0-9 -]+:.*#' Makefile | sort | while read -r l; do printf "\033[1;32m$$(echo $$l | cut -f 1 -d':')\033[00m:$$(echo $$l | cut -f 2- -d'#')\n"; done
build: # Build the binary. Use VERSION (make build VERSION=1.2.3) to set the build version.
go build -o paperboy \
-ldflags "-X main.version=${VERSION}" \
main.go

9
examples/config.yaml Normal file
View File

@@ -0,0 +1,9 @@
feeds:
- https://xkcd.com/atom.xml
- https://www.foxtrot.com/feed/
- https://news.ycombinator.com/rss
- https://feeds.feedburner.com/codinghorror
- https://internetofshit.net/rss/
- https://techcrunch.com/feed/
template: template.md
output: output.md

7
examples/template.md Normal file
View File

@@ -0,0 +1,7 @@
# Paperboy weekly newsletter
---
---
Generated by [paperboy](https://github.com/ivuorinen/paperboy)

16
go.mod Normal file
View File

@@ -0,0 +1,16 @@
module github.com/ivuorinen/paperboy
go 1.22.0
require (
github.com/PuerkitoBio/goquery v1.8.0 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mmcdole/gofeed v1.2.1 // indirect
github.com/mmcdole/goxpp v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
golang.org/x/net v0.4.0 // indirect
golang.org/x/text v0.5.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
)

34
go.sum Normal file
View File

@@ -0,0 +1,34 @@
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/mmcdole/gofeed v1.2.1 h1:tPbFN+mfOLcM1kDF1x2c/N68ChbdBatkppdzf/vDe1s=
github.com/mmcdole/gofeed v1.2.1/go.mod h1:2wVInNpgmC85q16QTTuwbuKxtKkHLCDDtf0dCmnrNr4=
github.com/mmcdole/goxpp v1.1.0 h1:WwslZNF7KNAXTFuzRtn/OKZxFLJAAyOA9w82mDz2ZGI=
github.com/mmcdole/goxpp v1.1.0/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.4.0 h1:Q5QPcMlvfxFTAPV0+07Xz/MpK9NTXu2VDUuy0FeMfaU=
golang.org/x/net v0.4.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM=
golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=

203
main.go Normal file
View File

@@ -0,0 +1,203 @@
// Copyright 2024 Ismo Vuorinen. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT
//
// Paperboy is a simple RSS feed reader that generates
// a Markdown file with the latest articles from multiple feeds.
//go:build go1.16
// +build go1.16
package main
import (
"cmp"
"fmt"
"log"
"net/url"
"os"
"regexp"
"slices"
"sort"
"strings"
"time"
"github.com/mmcdole/gofeed"
"gopkg.in/yaml.v2"
)
// Version and Build information
// These variables are set during build time
var (
version string = "dev"
build string = time.Now().Format("20060102")
)
// Config represents the structure of the YAML configuration file
type Config struct {
Template string `yaml:"template"`
Output string `yaml:"output"`
Feeds []string `yaml:"feeds"`
}
// Article represents a feed article
type Article struct {
PublishAt time.Time
Title string
URL string
URLDomain string
}
func main() {
log.Printf("Paperboy v.%s (build %s)", version, build)
// Read YAML configuration file
configFile := "config.yaml"
configData, err := os.ReadFile(configFile)
if err != nil {
log.Fatalf("Error reading config.yaml file: %v", err)
}
// Parse YAML configuration
var config Config
err = yaml.Unmarshal(configData, &config)
if err != nil {
log.Fatalf("Error parsing config.yaml file: %v", err)
}
log.Printf("Feeds: %d", len(config.Feeds))
// Fetch articles from each feed URL
articlesByWeek := make(map[string][]Article)
var weeks []string
for _, feedURL := range config.Feeds {
log.Printf("Fetching articles from %s", feedURL)
articles, err := fetchArticles(feedURL)
if err != nil {
log.Printf("Error fetching articles from %s: %v", feedURL, err)
continue
}
log.Printf("-> Got %d articles", len(articles))
// Group articles by publish week
for _, article := range articles {
year, week := article.PublishAt.UTC().ISOWeek()
// Format week in the format "YYYY-WW"
// e.g. 2021-01
id := fmt.Sprintf("%d-%02d", year, week)
articlesByWeek[id] = append(articlesByWeek[id], article)
if !slices.Contains(weeks, id) {
weeks = append(weeks, id)
}
}
}
// Sort weeks
sort.Strings(weeks)
slices.Reverse(weeks)
// Generate Markdown output
output := generateMarkdown(config.Template, articlesByWeek, weeks)
// Write Markdown output to file
outputFile := config.Output
err = os.WriteFile(outputFile, []byte(output), 0644)
if err != nil {
log.Fatalf("Error writing output file: %v", err)
}
}
// fetchArticles fetches articles from a given feed URL
func fetchArticles(feedURL string) ([]Article, error) {
fp := gofeed.NewParser()
feed, err := fp.ParseURL(feedURL)
if err != nil {
return nil, fmt.Errorf("error fetching feed: %v", err)
}
var articles []Article
for _, item := range feed.Items {
// Parse publish date
publishAt := item.PublishedParsed.UTC()
articleDomain := getURLDomain(item.Link)
articles = append(articles, Article{
Title: item.Title,
URL: item.Link,
PublishAt: publishAt,
URLDomain: articleDomain,
})
}
return articles, nil
}
// generateMarkdown generates Markdown output with header and footer
func generateMarkdown(templateFile string, articlesByWeek map[string][]Article, weeks []string) string {
// Read template file
templateData, err := os.ReadFile(templateFile)
if err != nil {
log.Fatalf("Error reading template file: %v", err)
}
// Split template into header and footer sections
templateParts := strings.SplitN(string(templateData), "---", 3)
if len(templateParts) != 3 {
log.Fatalf("Invalid template format")
}
header := strings.TrimSpace(templateParts[0])
footer := strings.TrimSpace(templateParts[2])
// Generate Markdown output
var output strings.Builder
output.WriteString(header)
output.WriteString("\n\n")
for _, week := range weeks {
articles := articlesByWeek[week]
if len(articles) == 0 {
continue
}
// Sort articles by publish date
slices.SortFunc(articles, func(a, b Article) int {
return cmp.Compare(a.PublishAt.Unix(), b.PublishAt.Unix())
})
output.WriteString(fmt.Sprintf("## Week: %s\n\n", week))
for _, article := range articles {
output.WriteString(fmt.Sprintf("- %s @ %s: [%s](%s)\n", article.PublishAt.Format("2006-01-02"), article.URLDomain, article.Title, article.URL))
}
output.WriteString("\n")
}
output.WriteString(footer)
output.WriteString("\n")
return output.String()
}
// getURLDomain extracts the domain from a URL-like string
// e.g. "https://example.com" -> "example.com"
func getURLDomain(urlString string) string {
urlString = strings.TrimSpace(urlString)
if regexp.MustCompile(`^https?`).MatchString(urlString) {
read, _ := url.Parse(urlString)
urlString = read.Host
}
if regexp.MustCompile(`^www\.`).MatchString(urlString) {
urlString = regexp.MustCompile(`^www\.`).ReplaceAllString(urlString, "")
}
return regexp.MustCompile(`([a-z0-9\-]+\.)+[a-z0-9\-]+`).FindString(urlString)
}