feat: more features, output formats, configs, etc

This commit is contained in:
2025-02-08 22:36:28 +02:00
parent 7c09552196
commit 01210aaebe
13 changed files with 356 additions and 155 deletions

View File

@@ -3,25 +3,34 @@ package fileproc
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"github.com/sirupsen/logrus"
)
// WriteRequest represents the content to be written.
type WriteRequest struct {
Path string
Content string
}
// ProcessFile reads the file at filePath and sends a formatted output to outCh.
// The optional wg parameter is used when the caller wants to wait on file-level processing.
func ProcessFile(filePath string, outCh chan<- WriteRequest, wg *interface{}) {
content, err := ioutil.ReadFile(filePath)
func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) {
content, err := os.ReadFile(filePath)
if err != nil {
logrus.Errorf("Failed to read file %s: %v", filePath, err)
return
}
// Format: separator, file path, then content.
formatted := fmt.Sprintf("\n---\n%s\n%s\n", filePath, string(content))
outCh <- WriteRequest{Content: formatted}
// Compute path relative to rootPath, so /a/b/c/d.c becomes c/d.c
relPath, err := filepath.Rel(rootPath, filePath)
if err != nil {
// Fallback if something unexpected happens
relPath = filePath
}
// Format: separator, then relative path, then content
formatted := fmt.Sprintf("\n---\n%s\n%s\n", relPath, string(content))
outCh <- WriteRequest{Path: relPath, Content: formatted}
}

View File

@@ -13,20 +13,29 @@ func TestProcessFile(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer os.Remove(tmpFile.Name())
defer func(name string) {
err := os.Remove(name)
if err != nil {
t.Fatal(err)
}
}(tmpFile.Name())
content := "Test content"
if _, err := tmpFile.WriteString(content); err != nil {
t.Fatal(err)
}
tmpFile.Close()
errTmpFile := tmpFile.Close()
if errTmpFile != nil {
t.Fatal(errTmpFile)
return
}
ch := make(chan WriteRequest, 1)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
ProcessFile(tmpFile.Name(), ch, nil)
ProcessFile(tmpFile.Name(), ch, "")
}()
wg.Wait()
close(ch)

View File

@@ -2,8 +2,12 @@
package fileproc
import (
"github.com/boyter/gocodewalker"
"github.com/sirupsen/logrus"
"os"
"path/filepath"
"strings"
"github.com/ivuorinen/gibidify/config"
ignore "github.com/sabhiram/go-gitignore"
)
// Walker defines an interface for scanning directories.
@@ -11,30 +15,148 @@ type Walker interface {
Walk(root string) ([]string, error)
}
// ProdWalker implements Walker using gocodewalker.
// ProdWalker implements Walker using a custom directory walker that
// respects .gitignore and .ignore files, configuration-defined ignore directories,
// and ignores binary and image files by default.
type ProdWalker struct{}
// Walk scans the given root directory using gocodewalker and returns a slice of file paths.
func (pw ProdWalker) Walk(root string) ([]string, error) {
fileListQueue := make(chan *gocodewalker.File, 100)
fileWalker := gocodewalker.NewFileWalker(root, fileListQueue)
// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
type ignoreRule struct {
base string
gi *ignore.GitIgnore
}
errorHandler := func(err error) bool {
logrus.Errorf("error walking directory: %s", err.Error())
// Walk scans the given root directory recursively and returns a slice of file paths
// that are not ignored based on .gitignore/.ignore files, the configuration, or the default binary/image filter.
func (pw ProdWalker) Walk(root string) ([]string, error) {
absRoot, err := filepath.Abs(root)
if err != nil {
return nil, err
}
return walkDir(absRoot, absRoot, []ignoreRule{})
}
// walkDir recursively walks the directory tree starting at currentDir.
// It loads any .gitignore and .ignore files found in each directory and
// appends the corresponding rules to the inherited list. Each file/directory is
// then checked against the accumulated ignore rules, the configuration's list of ignored directories,
// and a default filter that ignores binary and image files.
func walkDir(root string, currentDir string, parentRules []ignoreRule) ([]string, error) {
var results []string
entries, err := os.ReadDir(currentDir)
if err != nil {
return nil, err
}
// Start with the parent's ignore rules.
rules := make([]ignoreRule, len(parentRules))
copy(rules, parentRules)
// Check for .gitignore and .ignore files in the current directory.
for _, fileName := range []string{".gitignore", ".ignore"} {
ignorePath := filepath.Join(currentDir, fileName)
if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
gi, err := ignore.CompileIgnoreFile(ignorePath)
if err == nil {
rules = append(rules, ignoreRule{
base: currentDir,
gi: gi,
})
}
}
}
// Get the list of directories to ignore from configuration.
ignoredDirs := config.GetIgnoredDirectories()
sizeLimit := config.GetFileSizeLimit() // e.g., 5242880 for 5 MB
for _, entry := range entries {
fullPath := filepath.Join(currentDir, entry.Name())
// For directories, check if its name is in the config ignore list.
if entry.IsDir() {
for _, d := range ignoredDirs {
if entry.Name() == d {
// Skip this directory entirely.
goto SkipEntry
}
}
} else {
// Check if file exceeds the configured size limit.
info, err := entry.Info()
if err == nil && info.Size() > sizeLimit {
goto SkipEntry
}
// For files, apply the default filter to ignore binary and image files.
if isBinaryOrImage(fullPath) {
goto SkipEntry
}
}
// Check accumulated ignore rules.
for _, rule := range rules {
// Compute the path relative to the base where the ignore rule was defined.
rel, err := filepath.Rel(rule.base, fullPath)
if err != nil {
continue
}
// If the rule matches, skip this entry.
if rule.gi.MatchesPath(rel) {
goto SkipEntry
}
}
// If not ignored, then process the entry.
if entry.IsDir() {
subFiles, err := walkDir(root, fullPath, rules)
if err != nil {
return nil, err
}
results = append(results, subFiles...)
} else {
results = append(results, fullPath)
}
SkipEntry:
continue
}
return results, nil
}
// isBinaryOrImage checks if a file should be considered binary or an image based on its extension.
// The check is case-insensitive.
func isBinaryOrImage(filePath string) bool {
ext := strings.ToLower(filepath.Ext(filePath))
// Common image file extensions.
imageExtensions := map[string]bool{
".png": true,
".jpg": true,
".jpeg": true,
".gif": true,
".bmp": true,
".tiff": true,
".ico": true,
".svg": true,
".webp": true,
}
// Common binary file extensions.
binaryExtensions := map[string]bool{
".exe": true,
".dll": true,
".so": true,
".bin": true,
".dat": true,
".zip": true,
".tar": true,
".gz": true,
".7z": true,
".rar": true,
".DS_Store": true,
}
if imageExtensions[ext] || binaryExtensions[ext] {
return true
}
fileWalker.SetErrorHandler(errorHandler)
go func() {
err := fileWalker.Start()
if err != nil {
logrus.Errorf("error walking directory: %s", err.Error())
}
}()
var files []string
for f := range fileListQueue {
files = append(files, f.Location)
}
return files, nil
return false
}

View File

@@ -1,21 +1,94 @@
// Package fileproc provides functions for writing file contents concurrently.
package fileproc
import (
"io"
"encoding/json"
"fmt"
"os"
"github.com/sirupsen/logrus"
"gopkg.in/yaml.v3"
)
// StartWriter listens on the write channel and writes content to outFile.
// When finished, it signals on the done channel.
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}) {
writer := io.Writer(outFile)
for req := range writeCh {
if _, err := writer.Write([]byte(req.Content)); err != nil {
logrus.Errorf("Error writing to file: %v", err)
}
}
done <- struct{}{}
// FileData represents a single file's path and content.
type FileData struct {
Path string `json:"path" yaml:"path"`
Content string `json:"content" yaml:"content"`
}
// OutputData represents the full output structure.
type OutputData struct {
Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
Files []FileData `json:"files" yaml:"files"`
Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
}
// StartWriter writes the output in the specified format.
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format string, prefix, suffix string) {
var files []FileData
// Read from channel until closed
for req := range writeCh {
files = append(files, FileData{Path: req.Path, Content: req.Content})
}
// Create output struct
output := OutputData{Prefix: prefix, Files: files, Suffix: suffix}
// Serialize based on format
var outputData []byte
var err error
switch format {
case "json":
outputData, err = json.MarshalIndent(output, "", " ")
case "yaml":
outputData, err = yaml.Marshal(output)
case "markdown":
outputData = []byte(formatMarkdown(output))
default:
err = fmt.Errorf("unsupported format: %s", format)
}
if err != nil {
logrus.Errorf("Error encoding output: %v", err)
close(done)
return
}
// Write to file
if _, err := outFile.Write(outputData); err != nil {
logrus.Errorf("Error writing to file: %v", err)
}
close(done)
}
func formatMarkdown(output OutputData) string {
markdown := "# " + output.Prefix + "\n\n"
for _, file := range output.Files {
markdown += fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", file.Path, detectLanguage(file.Path), file.Content)
}
markdown += "# " + output.Suffix
return markdown
}
// detectLanguage tries to infer code block language from file extension.
func detectLanguage(filename string) string {
if len(filename) < 3 {
return ""
}
switch {
case len(filename) >= 3 && filename[len(filename)-3:] == ".go":
return "go"
case len(filename) >= 3 && filename[len(filename)-3:] == ".py":
return "python"
case len(filename) >= 2 && filename[len(filename)-2:] == ".c":
return "c"
case len(filename) >= 3 && filename[len(filename)-3:] == ".js":
return "javascript"
default:
return ""
}
}

View File

@@ -1,31 +1,45 @@
package fileproc
import (
"bytes"
"sync"
"encoding/json"
"os"
"testing"
)
func TestStartWriter(t *testing.T) {
var buf bytes.Buffer
func TestStartWriter_JSONOutput(t *testing.T) {
outFile, err := os.CreateTemp("", "output.json")
if err != nil {
t.Fatal(err)
}
defer func(name string) {
err := os.Remove(name)
if err != nil {
t.Fatal(err)
}
}(outFile.Name())
writeCh := make(chan WriteRequest)
done := make(chan struct{})
go StartWriter(&buf, writeCh, done)
go StartWriter(outFile, writeCh, done, "json", "Prefix", "Suffix")
writeCh <- WriteRequest{Path: "file1.go", Content: "package main"}
writeCh <- WriteRequest{Path: "file2.py", Content: "def hello(): print('Hello')"}
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
writeCh <- WriteRequest{Content: "Hello"}
writeCh <- WriteRequest{Content: " World"}
}()
wg.Wait()
close(writeCh)
<-done
if buf.String() != "Hello World" {
t.Errorf("Expected 'Hello World', got '%s'", buf.String())
data, err := os.ReadFile(outFile.Name())
if err != nil {
t.Fatal(err)
}
var output OutputData
if err := json.Unmarshal(data, &output); err != nil {
t.Fatalf("JSON output is invalid: %v", err)
}
if len(output.Files) != 2 {
t.Errorf("Expected 2 files, got %d", len(output.Files))
}
}