mirror of
https://github.com/ivuorinen/gibidify.git
synced 2026-01-26 03:24:05 +00:00
feat: more features, output formats, configs, etc
This commit is contained in:
@@ -3,25 +3,34 @@ package fileproc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// WriteRequest represents the content to be written.
|
||||
type WriteRequest struct {
|
||||
Path string
|
||||
Content string
|
||||
}
|
||||
|
||||
// ProcessFile reads the file at filePath and sends a formatted output to outCh.
|
||||
// The optional wg parameter is used when the caller wants to wait on file-level processing.
|
||||
func ProcessFile(filePath string, outCh chan<- WriteRequest, wg *interface{}) {
|
||||
content, err := ioutil.ReadFile(filePath)
|
||||
func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) {
|
||||
content, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to read file %s: %v", filePath, err)
|
||||
return
|
||||
}
|
||||
// Format: separator, file path, then content.
|
||||
formatted := fmt.Sprintf("\n---\n%s\n%s\n", filePath, string(content))
|
||||
outCh <- WriteRequest{Content: formatted}
|
||||
|
||||
// Compute path relative to rootPath, so /a/b/c/d.c becomes c/d.c
|
||||
relPath, err := filepath.Rel(rootPath, filePath)
|
||||
if err != nil {
|
||||
// Fallback if something unexpected happens
|
||||
relPath = filePath
|
||||
}
|
||||
|
||||
// Format: separator, then relative path, then content
|
||||
formatted := fmt.Sprintf("\n---\n%s\n%s\n", relPath, string(content))
|
||||
outCh <- WriteRequest{Path: relPath, Content: formatted}
|
||||
}
|
||||
|
||||
@@ -13,20 +13,29 @@ func TestProcessFile(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.Remove(tmpFile.Name())
|
||||
defer func(name string) {
|
||||
err := os.Remove(name)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}(tmpFile.Name())
|
||||
|
||||
content := "Test content"
|
||||
if _, err := tmpFile.WriteString(content); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
tmpFile.Close()
|
||||
errTmpFile := tmpFile.Close()
|
||||
if errTmpFile != nil {
|
||||
t.Fatal(errTmpFile)
|
||||
return
|
||||
}
|
||||
|
||||
ch := make(chan WriteRequest, 1)
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
ProcessFile(tmpFile.Name(), ch, nil)
|
||||
ProcessFile(tmpFile.Name(), ch, "")
|
||||
}()
|
||||
wg.Wait()
|
||||
close(ch)
|
||||
|
||||
@@ -2,8 +2,12 @@
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"github.com/boyter/gocodewalker"
|
||||
"github.com/sirupsen/logrus"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/ivuorinen/gibidify/config"
|
||||
ignore "github.com/sabhiram/go-gitignore"
|
||||
)
|
||||
|
||||
// Walker defines an interface for scanning directories.
|
||||
@@ -11,30 +15,148 @@ type Walker interface {
|
||||
Walk(root string) ([]string, error)
|
||||
}
|
||||
|
||||
// ProdWalker implements Walker using gocodewalker.
|
||||
// ProdWalker implements Walker using a custom directory walker that
|
||||
// respects .gitignore and .ignore files, configuration-defined ignore directories,
|
||||
// and ignores binary and image files by default.
|
||||
type ProdWalker struct{}
|
||||
|
||||
// Walk scans the given root directory using gocodewalker and returns a slice of file paths.
|
||||
func (pw ProdWalker) Walk(root string) ([]string, error) {
|
||||
fileListQueue := make(chan *gocodewalker.File, 100)
|
||||
fileWalker := gocodewalker.NewFileWalker(root, fileListQueue)
|
||||
// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
|
||||
type ignoreRule struct {
|
||||
base string
|
||||
gi *ignore.GitIgnore
|
||||
}
|
||||
|
||||
errorHandler := func(err error) bool {
|
||||
logrus.Errorf("error walking directory: %s", err.Error())
|
||||
// Walk scans the given root directory recursively and returns a slice of file paths
|
||||
// that are not ignored based on .gitignore/.ignore files, the configuration, or the default binary/image filter.
|
||||
func (pw ProdWalker) Walk(root string) ([]string, error) {
|
||||
absRoot, err := filepath.Abs(root)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return walkDir(absRoot, absRoot, []ignoreRule{})
|
||||
}
|
||||
|
||||
// walkDir recursively walks the directory tree starting at currentDir.
|
||||
// It loads any .gitignore and .ignore files found in each directory and
|
||||
// appends the corresponding rules to the inherited list. Each file/directory is
|
||||
// then checked against the accumulated ignore rules, the configuration's list of ignored directories,
|
||||
// and a default filter that ignores binary and image files.
|
||||
func walkDir(root string, currentDir string, parentRules []ignoreRule) ([]string, error) {
|
||||
var results []string
|
||||
|
||||
entries, err := os.ReadDir(currentDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Start with the parent's ignore rules.
|
||||
rules := make([]ignoreRule, len(parentRules))
|
||||
copy(rules, parentRules)
|
||||
|
||||
// Check for .gitignore and .ignore files in the current directory.
|
||||
for _, fileName := range []string{".gitignore", ".ignore"} {
|
||||
ignorePath := filepath.Join(currentDir, fileName)
|
||||
if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
|
||||
gi, err := ignore.CompileIgnoreFile(ignorePath)
|
||||
if err == nil {
|
||||
rules = append(rules, ignoreRule{
|
||||
base: currentDir,
|
||||
gi: gi,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the list of directories to ignore from configuration.
|
||||
ignoredDirs := config.GetIgnoredDirectories()
|
||||
sizeLimit := config.GetFileSizeLimit() // e.g., 5242880 for 5 MB
|
||||
|
||||
for _, entry := range entries {
|
||||
fullPath := filepath.Join(currentDir, entry.Name())
|
||||
|
||||
// For directories, check if its name is in the config ignore list.
|
||||
if entry.IsDir() {
|
||||
for _, d := range ignoredDirs {
|
||||
if entry.Name() == d {
|
||||
// Skip this directory entirely.
|
||||
goto SkipEntry
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Check if file exceeds the configured size limit.
|
||||
info, err := entry.Info()
|
||||
if err == nil && info.Size() > sizeLimit {
|
||||
goto SkipEntry
|
||||
}
|
||||
|
||||
// For files, apply the default filter to ignore binary and image files.
|
||||
if isBinaryOrImage(fullPath) {
|
||||
goto SkipEntry
|
||||
}
|
||||
}
|
||||
|
||||
// Check accumulated ignore rules.
|
||||
for _, rule := range rules {
|
||||
// Compute the path relative to the base where the ignore rule was defined.
|
||||
rel, err := filepath.Rel(rule.base, fullPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
// If the rule matches, skip this entry.
|
||||
if rule.gi.MatchesPath(rel) {
|
||||
goto SkipEntry
|
||||
}
|
||||
}
|
||||
|
||||
// If not ignored, then process the entry.
|
||||
if entry.IsDir() {
|
||||
subFiles, err := walkDir(root, fullPath, rules)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
results = append(results, subFiles...)
|
||||
} else {
|
||||
results = append(results, fullPath)
|
||||
}
|
||||
SkipEntry:
|
||||
continue
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// isBinaryOrImage checks if a file should be considered binary or an image based on its extension.
|
||||
// The check is case-insensitive.
|
||||
func isBinaryOrImage(filePath string) bool {
|
||||
ext := strings.ToLower(filepath.Ext(filePath))
|
||||
// Common image file extensions.
|
||||
imageExtensions := map[string]bool{
|
||||
".png": true,
|
||||
".jpg": true,
|
||||
".jpeg": true,
|
||||
".gif": true,
|
||||
".bmp": true,
|
||||
".tiff": true,
|
||||
".ico": true,
|
||||
".svg": true,
|
||||
".webp": true,
|
||||
}
|
||||
// Common binary file extensions.
|
||||
binaryExtensions := map[string]bool{
|
||||
".exe": true,
|
||||
".dll": true,
|
||||
".so": true,
|
||||
".bin": true,
|
||||
".dat": true,
|
||||
".zip": true,
|
||||
".tar": true,
|
||||
".gz": true,
|
||||
".7z": true,
|
||||
".rar": true,
|
||||
".DS_Store": true,
|
||||
}
|
||||
if imageExtensions[ext] || binaryExtensions[ext] {
|
||||
return true
|
||||
}
|
||||
fileWalker.SetErrorHandler(errorHandler)
|
||||
go func() {
|
||||
err := fileWalker.Start()
|
||||
if err != nil {
|
||||
logrus.Errorf("error walking directory: %s", err.Error())
|
||||
}
|
||||
}()
|
||||
|
||||
var files []string
|
||||
for f := range fileListQueue {
|
||||
files = append(files, f.Location)
|
||||
}
|
||||
|
||||
return files, nil
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1,21 +1,94 @@
|
||||
// Package fileproc provides functions for writing file contents concurrently.
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"io"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// StartWriter listens on the write channel and writes content to outFile.
|
||||
// When finished, it signals on the done channel.
|
||||
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}) {
|
||||
writer := io.Writer(outFile)
|
||||
for req := range writeCh {
|
||||
if _, err := writer.Write([]byte(req.Content)); err != nil {
|
||||
logrus.Errorf("Error writing to file: %v", err)
|
||||
}
|
||||
}
|
||||
done <- struct{}{}
|
||||
// FileData represents a single file's path and content.
|
||||
type FileData struct {
|
||||
Path string `json:"path" yaml:"path"`
|
||||
Content string `json:"content" yaml:"content"`
|
||||
}
|
||||
|
||||
// OutputData represents the full output structure.
|
||||
type OutputData struct {
|
||||
Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
|
||||
Files []FileData `json:"files" yaml:"files"`
|
||||
Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
|
||||
}
|
||||
|
||||
// StartWriter writes the output in the specified format.
|
||||
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format string, prefix, suffix string) {
|
||||
var files []FileData
|
||||
|
||||
// Read from channel until closed
|
||||
for req := range writeCh {
|
||||
files = append(files, FileData{Path: req.Path, Content: req.Content})
|
||||
}
|
||||
|
||||
// Create output struct
|
||||
output := OutputData{Prefix: prefix, Files: files, Suffix: suffix}
|
||||
|
||||
// Serialize based on format
|
||||
var outputData []byte
|
||||
var err error
|
||||
|
||||
switch format {
|
||||
case "json":
|
||||
outputData, err = json.MarshalIndent(output, "", " ")
|
||||
case "yaml":
|
||||
outputData, err = yaml.Marshal(output)
|
||||
case "markdown":
|
||||
outputData = []byte(formatMarkdown(output))
|
||||
default:
|
||||
err = fmt.Errorf("unsupported format: %s", format)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logrus.Errorf("Error encoding output: %v", err)
|
||||
close(done)
|
||||
return
|
||||
}
|
||||
|
||||
// Write to file
|
||||
if _, err := outFile.Write(outputData); err != nil {
|
||||
logrus.Errorf("Error writing to file: %v", err)
|
||||
}
|
||||
|
||||
close(done)
|
||||
}
|
||||
|
||||
func formatMarkdown(output OutputData) string {
|
||||
markdown := "# " + output.Prefix + "\n\n"
|
||||
|
||||
for _, file := range output.Files {
|
||||
markdown += fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", file.Path, detectLanguage(file.Path), file.Content)
|
||||
}
|
||||
|
||||
markdown += "# " + output.Suffix
|
||||
return markdown
|
||||
}
|
||||
|
||||
// detectLanguage tries to infer code block language from file extension.
|
||||
func detectLanguage(filename string) string {
|
||||
if len(filename) < 3 {
|
||||
return ""
|
||||
}
|
||||
switch {
|
||||
case len(filename) >= 3 && filename[len(filename)-3:] == ".go":
|
||||
return "go"
|
||||
case len(filename) >= 3 && filename[len(filename)-3:] == ".py":
|
||||
return "python"
|
||||
case len(filename) >= 2 && filename[len(filename)-2:] == ".c":
|
||||
return "c"
|
||||
case len(filename) >= 3 && filename[len(filename)-3:] == ".js":
|
||||
return "javascript"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,31 +1,45 @@
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sync"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestStartWriter(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
func TestStartWriter_JSONOutput(t *testing.T) {
|
||||
outFile, err := os.CreateTemp("", "output.json")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func(name string) {
|
||||
err := os.Remove(name)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}(outFile.Name())
|
||||
|
||||
writeCh := make(chan WriteRequest)
|
||||
done := make(chan struct{})
|
||||
|
||||
go StartWriter(&buf, writeCh, done)
|
||||
go StartWriter(outFile, writeCh, done, "json", "Prefix", "Suffix")
|
||||
|
||||
writeCh <- WriteRequest{Path: "file1.go", Content: "package main"}
|
||||
writeCh <- WriteRequest{Path: "file2.py", Content: "def hello(): print('Hello')"}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
writeCh <- WriteRequest{Content: "Hello"}
|
||||
writeCh <- WriteRequest{Content: " World"}
|
||||
}()
|
||||
wg.Wait()
|
||||
close(writeCh)
|
||||
<-done
|
||||
|
||||
if buf.String() != "Hello World" {
|
||||
t.Errorf("Expected 'Hello World', got '%s'", buf.String())
|
||||
data, err := os.ReadFile(outFile.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var output OutputData
|
||||
if err := json.Unmarshal(data, &output); err != nil {
|
||||
t.Fatalf("JSON output is invalid: %v", err)
|
||||
}
|
||||
|
||||
if len(output.Files) != 2 {
|
||||
t.Errorf("Expected 2 files, got %d", len(output.Files))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user