mirror of
https://github.com/ivuorinen/paperboy.git
synced 2026-01-26 03:34:02 +00:00
210 lines
5.1 KiB
Go
210 lines
5.1 KiB
Go
// Copyright 2024 Ismo Vuorinen. All rights reserved.
|
|
// Use of this source code is governed by a MIT-style
|
|
// license that can be found in the LICENSE file.
|
|
// SPDX-License-Identifier: MIT
|
|
//
|
|
// Paperboy is a simple RSS feed reader that generates
|
|
// a Markdown file with the latest articles from multiple feeds.
|
|
|
|
//go:build go1.22
|
|
|
|
package main
|
|
|
|
import (
|
|
"cmp"
|
|
"fmt"
|
|
"log"
|
|
"net/url"
|
|
"os"
|
|
"regexp"
|
|
"slices"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/mmcdole/gofeed"
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// Version and Build information
|
|
// These variables are set during build time
|
|
var (
|
|
version string = "dev"
|
|
build string = time.Now().Format("20060102")
|
|
)
|
|
|
|
// Config represents the structure of the YAML configuration file
|
|
type Config struct {
|
|
Template string `yaml:"template"`
|
|
Output string `yaml:"output"`
|
|
Feeds []string `yaml:"feeds"`
|
|
}
|
|
|
|
// Article represents a feed article
|
|
type Article struct {
|
|
PublishAt time.Time
|
|
Title string
|
|
URL string
|
|
URLDomain string
|
|
}
|
|
|
|
func main() {
|
|
log.Printf("Paperboy v.%s (build %s)", version, build)
|
|
|
|
// Read YAML configuration file
|
|
configFile := "config.yaml"
|
|
configData, err := os.ReadFile(configFile)
|
|
if err != nil {
|
|
log.Fatalf("Error reading config.yaml file: %v", err)
|
|
}
|
|
|
|
// Parse YAML configuration
|
|
var config Config
|
|
err = yaml.Unmarshal(configData, &config)
|
|
if err != nil {
|
|
log.Fatalf("Error parsing config.yaml file: %v", err)
|
|
}
|
|
|
|
log.Printf("Feeds: %d", len(config.Feeds))
|
|
|
|
// Fetch articles from each feed URL
|
|
articlesByWeek := make(map[string][]Article)
|
|
var weeks []string
|
|
|
|
for _, feedURL := range config.Feeds {
|
|
|
|
log.Printf("Fetching articles from %s", feedURL)
|
|
|
|
articles, err := fetchArticles(feedURL)
|
|
if err != nil {
|
|
log.Printf("Error fetching articles from %s: %v", feedURL, err)
|
|
continue
|
|
}
|
|
|
|
log.Printf("-> Got %d articles", len(articles))
|
|
|
|
// Group articles by publish week
|
|
for _, article := range articles {
|
|
year, week := article.PublishAt.UTC().ISOWeek()
|
|
// Format week in the format "YYYY-WW"
|
|
// e.g. 2021-01
|
|
id := fmt.Sprintf("%d-%02d", year, week)
|
|
articlesByWeek[id] = append(articlesByWeek[id], article)
|
|
|
|
if !slices.Contains(weeks, id) {
|
|
weeks = append(weeks, id)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort weeks
|
|
sort.Strings(weeks)
|
|
slices.Reverse(weeks)
|
|
|
|
log.Printf("-> Sorted and reversed %d weeks", len(weeks))
|
|
|
|
// Generate Markdown output
|
|
output := generateMarkdown(config.Template, articlesByWeek, weeks)
|
|
|
|
log.Printf("-> Generated Markdown output")
|
|
|
|
// Write Markdown output to file
|
|
outputFile := config.Output
|
|
err = os.WriteFile(outputFile, []byte(output), 0644)
|
|
if err != nil {
|
|
log.Fatalf("Error writing output file: %v", err)
|
|
}
|
|
|
|
log.Printf("-> Wrote output to %s", outputFile)
|
|
log.Printf("Paperboy finished")
|
|
}
|
|
|
|
// fetchArticles fetches articles from a given feed URL
|
|
func fetchArticles(feedURL string) ([]Article, error) {
|
|
fp := gofeed.NewParser()
|
|
feed, err := fp.ParseURL(feedURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error fetching feed: %v", err)
|
|
}
|
|
|
|
var articles []Article
|
|
for _, item := range feed.Items {
|
|
// Parse publish date
|
|
publishAt := item.PublishedParsed.UTC()
|
|
articleDomain := getURLDomain(item.Link)
|
|
|
|
articles = append(articles, Article{
|
|
Title: item.Title,
|
|
URL: item.Link,
|
|
PublishAt: publishAt,
|
|
URLDomain: articleDomain,
|
|
})
|
|
}
|
|
|
|
return articles, nil
|
|
}
|
|
|
|
// generateMarkdown generates Markdown output with header and footer
|
|
func generateMarkdown(templateFile string, articlesByWeek map[string][]Article, weeks []string) string {
|
|
// Read template file
|
|
templateData, err := os.ReadFile(templateFile)
|
|
if err != nil {
|
|
log.Fatalf("Error reading template file: %v", err)
|
|
}
|
|
|
|
// Split template into header and footer sections
|
|
templateParts := strings.SplitN(string(templateData), "---", 3)
|
|
if len(templateParts) != 3 {
|
|
log.Fatalf("Invalid template format")
|
|
}
|
|
|
|
header := strings.TrimSpace(templateParts[0])
|
|
footer := strings.TrimSpace(templateParts[2])
|
|
|
|
// Generate Markdown output
|
|
var output strings.Builder
|
|
output.WriteString(header)
|
|
output.WriteString("\n\n")
|
|
|
|
for _, week := range weeks {
|
|
articles := articlesByWeek[week]
|
|
if len(articles) == 0 {
|
|
continue
|
|
}
|
|
|
|
// Sort articles by publish date
|
|
slices.SortFunc(articles, func(a, b Article) int {
|
|
return cmp.Compare(a.PublishAt.Unix(), b.PublishAt.Unix())
|
|
})
|
|
|
|
output.WriteString(fmt.Sprintf("## Week: %s\n\n", week))
|
|
for _, article := range articles {
|
|
output.WriteString(fmt.Sprintf("- %s @ %s: [%s](%s)\n", article.PublishAt.Format("2006-01-02"), article.URLDomain, article.Title, article.URL))
|
|
}
|
|
output.WriteString("\n")
|
|
|
|
}
|
|
|
|
output.WriteString(footer)
|
|
output.WriteString("\n")
|
|
|
|
return output.String()
|
|
}
|
|
|
|
// getURLDomain extracts the domain from a URL-like string
|
|
// e.g. "https://example.com" -> "example.com"
|
|
func getURLDomain(urlString string) string {
|
|
urlString = strings.TrimSpace(urlString)
|
|
|
|
if regexp.MustCompile(`^https?`).MatchString(urlString) {
|
|
read, _ := url.Parse(urlString)
|
|
urlString = read.Host
|
|
}
|
|
|
|
if regexp.MustCompile(`^www\.`).MatchString(urlString) {
|
|
urlString = regexp.MustCompile(`^www\.`).ReplaceAllString(urlString, "")
|
|
}
|
|
|
|
return regexp.MustCompile(`([a-z0-9\-]+\.)+[a-z0-9\-]+`).FindString(urlString)
|
|
}
|