feat: many features, check TODO.md

This commit is contained in:
2025-07-19 00:45:21 +03:00
parent 3556b06bb9
commit e35126856d
50 changed files with 6996 additions and 674 deletions

196
fileproc/backpressure.go Normal file
View File

@@ -0,0 +1,196 @@
// Package fileproc provides back-pressure management for memory optimization.
package fileproc
import (
"context"
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/config"
)
// BackpressureManager manages memory usage and applies back-pressure when needed.
type BackpressureManager struct {
enabled bool
maxMemoryUsage int64
memoryCheckInterval int
maxPendingFiles int
maxPendingWrites int
filesProcessed int64
mu sync.RWMutex
memoryWarningLogged bool
lastMemoryCheck time.Time
}
// NewBackpressureManager creates a new back-pressure manager with configuration.
func NewBackpressureManager() *BackpressureManager {
return &BackpressureManager{
enabled: config.GetBackpressureEnabled(),
maxMemoryUsage: config.GetMaxMemoryUsage(),
memoryCheckInterval: config.GetMemoryCheckInterval(),
maxPendingFiles: config.GetMaxPendingFiles(),
maxPendingWrites: config.GetMaxPendingWrites(),
lastMemoryCheck: time.Now(),
}
}
// CreateChannels creates properly sized channels based on back-pressure configuration.
func (bp *BackpressureManager) CreateChannels() (chan string, chan WriteRequest) {
var fileCh chan string
var writeCh chan WriteRequest
if bp.enabled {
// Use buffered channels with configured limits
fileCh = make(chan string, bp.maxPendingFiles)
writeCh = make(chan WriteRequest, bp.maxPendingWrites)
logrus.Debugf("Created buffered channels: files=%d, writes=%d", bp.maxPendingFiles, bp.maxPendingWrites)
} else {
// Use unbuffered channels (default behavior)
fileCh = make(chan string)
writeCh = make(chan WriteRequest)
logrus.Debug("Created unbuffered channels (back-pressure disabled)")
}
return fileCh, writeCh
}
// ShouldApplyBackpressure checks if back-pressure should be applied.
func (bp *BackpressureManager) ShouldApplyBackpressure(ctx context.Context) bool {
if !bp.enabled {
return false
}
// Check if we should evaluate memory usage
filesProcessed := atomic.AddInt64(&bp.filesProcessed, 1)
if int(filesProcessed)%bp.memoryCheckInterval != 0 {
return false
}
// Get current memory usage
var m runtime.MemStats
runtime.ReadMemStats(&m)
currentMemory := int64(m.Alloc)
bp.mu.Lock()
defer bp.mu.Unlock()
bp.lastMemoryCheck = time.Now()
// Check if we're over the memory limit
if currentMemory > bp.maxMemoryUsage {
if !bp.memoryWarningLogged {
logrus.Warnf("Memory usage (%d bytes) exceeds limit (%d bytes), applying back-pressure",
currentMemory, bp.maxMemoryUsage)
bp.memoryWarningLogged = true
}
return true
}
// Reset warning flag if we're back under the limit
if bp.memoryWarningLogged && currentMemory < bp.maxMemoryUsage*8/10 { // 80% of limit
logrus.Infof("Memory usage normalized (%d bytes), removing back-pressure", currentMemory)
bp.memoryWarningLogged = false
}
return false
}
// ApplyBackpressure applies back-pressure by triggering garbage collection and adding delay.
func (bp *BackpressureManager) ApplyBackpressure(ctx context.Context) {
if !bp.enabled {
return
}
// Force garbage collection to free up memory
runtime.GC()
// Add a small delay to allow memory to be freed
select {
case <-ctx.Done():
return
case <-time.After(10 * time.Millisecond):
// Small delay to allow GC to complete
}
// Log memory usage after GC
var m runtime.MemStats
runtime.ReadMemStats(&m)
logrus.Debugf("Applied back-pressure: memory after GC = %d bytes", m.Alloc)
}
// GetStats returns current back-pressure statistics.
func (bp *BackpressureManager) GetStats() BackpressureStats {
bp.mu.RLock()
defer bp.mu.RUnlock()
var m runtime.MemStats
runtime.ReadMemStats(&m)
return BackpressureStats{
Enabled: bp.enabled,
FilesProcessed: atomic.LoadInt64(&bp.filesProcessed),
CurrentMemoryUsage: int64(m.Alloc),
MaxMemoryUsage: bp.maxMemoryUsage,
MemoryWarningActive: bp.memoryWarningLogged,
LastMemoryCheck: bp.lastMemoryCheck,
MaxPendingFiles: bp.maxPendingFiles,
MaxPendingWrites: bp.maxPendingWrites,
}
}
// BackpressureStats represents back-pressure manager statistics.
type BackpressureStats struct {
Enabled bool `json:"enabled"`
FilesProcessed int64 `json:"files_processed"`
CurrentMemoryUsage int64 `json:"current_memory_usage"`
MaxMemoryUsage int64 `json:"max_memory_usage"`
MemoryWarningActive bool `json:"memory_warning_active"`
LastMemoryCheck time.Time `json:"last_memory_check"`
MaxPendingFiles int `json:"max_pending_files"`
MaxPendingWrites int `json:"max_pending_writes"`
}
// WaitForChannelSpace waits for space in channels if they're getting full.
func (bp *BackpressureManager) WaitForChannelSpace(ctx context.Context, fileCh chan string, writeCh chan WriteRequest) {
if !bp.enabled {
return
}
// Check if file channel is getting full (>90% capacity)
if len(fileCh) > bp.maxPendingFiles*9/10 {
logrus.Debugf("File channel is %d%% full, waiting for space", len(fileCh)*100/bp.maxPendingFiles)
// Wait a bit for the channel to drain
select {
case <-ctx.Done():
return
case <-time.After(5 * time.Millisecond):
}
}
// Check if write channel is getting full (>90% capacity)
if len(writeCh) > bp.maxPendingWrites*9/10 {
logrus.Debugf("Write channel is %d%% full, waiting for space", len(writeCh)*100/bp.maxPendingWrites)
// Wait a bit for the channel to drain
select {
case <-ctx.Done():
return
case <-time.After(5 * time.Millisecond):
}
}
}
// LogBackpressureInfo logs back-pressure configuration and status.
func (bp *BackpressureManager) LogBackpressureInfo() {
if bp.enabled {
logrus.Infof("Back-pressure enabled: maxMemory=%dMB, fileBuffer=%d, writeBuffer=%d, checkInterval=%d",
bp.maxMemoryUsage/1024/1024, bp.maxPendingFiles, bp.maxPendingWrites, bp.memoryCheckInterval)
} else {
logrus.Info("Back-pressure disabled")
}
}

127
fileproc/cache.go Normal file
View File

@@ -0,0 +1,127 @@
package fileproc
// getNormalizedExtension efficiently extracts and normalizes the file extension with caching.
func (r *FileTypeRegistry) getNormalizedExtension(filename string) string {
// Try cache first (read lock)
r.cacheMutex.RLock()
if ext, exists := r.extCache[filename]; exists {
r.cacheMutex.RUnlock()
return ext
}
r.cacheMutex.RUnlock()
// Compute normalized extension
ext := normalizeExtension(filename)
// Cache the result (write lock)
r.cacheMutex.Lock()
// Check cache size and clean if needed
if len(r.extCache) >= r.maxCacheSize*2 {
r.clearExtCache()
r.stats.CacheEvictions++
}
r.extCache[filename] = ext
r.cacheMutex.Unlock()
return ext
}
// getFileTypeResult gets cached file type detection result or computes it.
func (r *FileTypeRegistry) getFileTypeResult(filename string) FileTypeResult {
ext := r.getNormalizedExtension(filename)
// Update statistics
r.updateStats(func() {
r.stats.TotalLookups++
})
// Try cache first (read lock)
r.cacheMutex.RLock()
if result, exists := r.resultCache[ext]; exists {
r.cacheMutex.RUnlock()
r.updateStats(func() {
r.stats.CacheHits++
})
return result
}
r.cacheMutex.RUnlock()
// Cache miss
r.updateStats(func() {
r.stats.CacheMisses++
})
// Compute result
result := FileTypeResult{
Extension: ext,
IsImage: r.imageExts[ext],
IsBinary: r.binaryExts[ext],
Language: r.languageMap[ext],
}
// Handle special cases for binary detection (like .DS_Store)
if !result.IsBinary && isSpecialFile(filename, r.binaryExts) {
result.IsBinary = true
}
// Cache the result (write lock)
r.cacheMutex.Lock()
if len(r.resultCache) >= r.maxCacheSize {
r.clearResultCache()
r.stats.CacheEvictions++
}
r.resultCache[ext] = result
r.cacheMutex.Unlock()
return result
}
// clearExtCache clears half of the extension cache (LRU-like behavior).
func (r *FileTypeRegistry) clearExtCache() {
r.clearCache(&r.extCache, r.maxCacheSize)
}
// clearResultCache clears half of the result cache.
func (r *FileTypeRegistry) clearResultCache() {
newCache := make(map[string]FileTypeResult, r.maxCacheSize)
count := 0
for k, v := range r.resultCache {
if count >= r.maxCacheSize/2 {
break
}
newCache[k] = v
count++
}
r.resultCache = newCache
}
// clearCache is a generic cache clearing function.
func (r *FileTypeRegistry) clearCache(cache *map[string]string, maxSize int) {
newCache := make(map[string]string, maxSize)
count := 0
for k, v := range *cache {
if count >= maxSize/2 {
break
}
newCache[k] = v
count++
}
*cache = newCache
}
// invalidateCache clears both caches when the registry is modified.
func (r *FileTypeRegistry) invalidateCache() {
r.cacheMutex.Lock()
defer r.cacheMutex.Unlock()
r.extCache = make(map[string]string, r.maxCacheSize)
r.resultCache = make(map[string]FileTypeResult, r.maxCacheSize)
r.stats.CacheEvictions++
}
// updateStats safely updates statistics.
func (r *FileTypeRegistry) updateStats(fn func()) {
r.cacheMutex.Lock()
fn()
r.cacheMutex.Unlock()
}

View File

@@ -4,6 +4,6 @@ package fileproc
// CollectFiles scans the given root directory using the default walker (ProdWalker)
// and returns a slice of file paths.
func CollectFiles(root string) ([]string, error) {
var w Walker = ProdWalker{}
w := NewProdWalker()
return w.Walk(root)
}

View File

@@ -4,7 +4,7 @@ import (
"os"
"testing"
fileproc "github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/fileproc"
)
func TestCollectFilesWithFakeWalker(t *testing.T) {

40
fileproc/config.go Normal file
View File

@@ -0,0 +1,40 @@
package fileproc
import "strings"
// ApplyCustomExtensions applies custom extensions from configuration.
func (r *FileTypeRegistry) ApplyCustomExtensions(customImages, customBinary []string, customLanguages map[string]string) {
// Add custom image extensions
r.addExtensions(customImages, r.AddImageExtension)
// Add custom binary extensions
r.addExtensions(customBinary, r.AddBinaryExtension)
// Add custom language mappings
for ext, lang := range customLanguages {
if ext != "" && lang != "" {
r.AddLanguageMapping(strings.ToLower(ext), lang)
}
}
}
// addExtensions is a helper to add multiple extensions.
func (r *FileTypeRegistry) addExtensions(extensions []string, adder func(string)) {
for _, ext := range extensions {
if ext != "" {
adder(strings.ToLower(ext))
}
}
}
// ConfigureFromSettings applies configuration settings to the registry.
// This function is called from main.go after config is loaded to avoid circular imports.
func ConfigureFromSettings(
customImages, customBinary []string,
customLanguages map[string]string,
disabledImages, disabledBinary, disabledLanguages []string,
) {
registry := GetDefaultRegistry()
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
}

99
fileproc/detection.go Normal file
View File

@@ -0,0 +1,99 @@
package fileproc
import "strings"
// Package-level detection functions
// IsImage checks if the file extension indicates an image file.
func IsImage(filename string) bool {
return getRegistry().IsImage(filename)
}
// IsBinary checks if the file extension indicates a binary file.
func IsBinary(filename string) bool {
return getRegistry().IsBinary(filename)
}
// GetLanguage returns the language identifier for the given filename based on its extension.
func GetLanguage(filename string) string {
return getRegistry().GetLanguage(filename)
}
// Registry methods for detection
// IsImage checks if the file extension indicates an image file.
func (r *FileTypeRegistry) IsImage(filename string) bool {
result := r.getFileTypeResult(filename)
return result.IsImage
}
// IsBinary checks if the file extension indicates a binary file.
func (r *FileTypeRegistry) IsBinary(filename string) bool {
result := r.getFileTypeResult(filename)
return result.IsBinary
}
// GetLanguage returns the language identifier for the given filename based on its extension.
func (r *FileTypeRegistry) GetLanguage(filename string) string {
if len(filename) < minExtensionLength {
return ""
}
result := r.getFileTypeResult(filename)
return result.Language
}
// Extension management methods
// AddImageExtension adds a new image extension to the registry.
func (r *FileTypeRegistry) AddImageExtension(ext string) {
r.addExtension(ext, r.imageExts)
}
// AddBinaryExtension adds a new binary extension to the registry.
func (r *FileTypeRegistry) AddBinaryExtension(ext string) {
r.addExtension(ext, r.binaryExts)
}
// AddLanguageMapping adds a new language mapping to the registry.
func (r *FileTypeRegistry) AddLanguageMapping(ext, language string) {
r.languageMap[strings.ToLower(ext)] = language
r.invalidateCache()
}
// addExtension is a helper to add extensions to a map.
func (r *FileTypeRegistry) addExtension(ext string, target map[string]bool) {
target[strings.ToLower(ext)] = true
r.invalidateCache()
}
// removeExtension is a helper to remove extensions from a map.
func (r *FileTypeRegistry) removeExtension(ext string, target map[string]bool) {
delete(target, strings.ToLower(ext))
}
// DisableExtensions removes specified extensions from the registry.
func (r *FileTypeRegistry) DisableExtensions(disabledImages, disabledBinary, disabledLanguages []string) {
// Disable image extensions
for _, ext := range disabledImages {
if ext != "" {
r.removeExtension(ext, r.imageExts)
}
}
// Disable binary extensions
for _, ext := range disabledBinary {
if ext != "" {
r.removeExtension(ext, r.binaryExts)
}
}
// Disable language extensions
for _, ext := range disabledLanguages {
if ext != "" {
delete(r.languageMap, strings.ToLower(ext))
}
}
// Invalidate cache after all modifications
r.invalidateCache()
}

161
fileproc/extensions.go Normal file
View File

@@ -0,0 +1,161 @@
package fileproc
// getImageExtensions returns the default image file extensions.
func getImageExtensions() map[string]bool {
return map[string]bool{
".png": true,
".jpg": true,
".jpeg": true,
".gif": true,
".bmp": true,
".tiff": true,
".tif": true,
".svg": true,
".webp": true,
".ico": true,
}
}
// getBinaryExtensions returns the default binary file extensions.
func getBinaryExtensions() map[string]bool {
return map[string]bool{
// Executables and libraries
".exe": true,
".dll": true,
".so": true,
".dylib": true,
".bin": true,
".o": true,
".a": true,
".lib": true,
// Compiled bytecode
".jar": true,
".class": true,
".pyc": true,
".pyo": true,
// Data files
".dat": true,
".db": true,
".sqlite": true,
".ds_store": true,
// Documents
".pdf": true,
// Archives
".zip": true,
".tar": true,
".gz": true,
".bz2": true,
".xz": true,
".7z": true,
".rar": true,
// Fonts
".ttf": true,
".otf": true,
".woff": true,
".woff2": true,
// Media files
".mp3": true,
".mp4": true,
".avi": true,
".mov": true,
".wmv": true,
".flv": true,
".webm": true,
".ogg": true,
".wav": true,
".flac": true,
}
}
// getLanguageMap returns the default language mappings.
func getLanguageMap() map[string]string {
return map[string]string{
// Systems programming
".go": "go",
".c": "c",
".cpp": "cpp",
".h": "c",
".hpp": "cpp",
".rs": "rust",
// Scripting languages
".py": "python",
".rb": "ruby",
".pl": "perl",
".lua": "lua",
".php": "php",
// Web technologies
".js": "javascript",
".ts": "typescript",
".jsx": "javascript",
".tsx": "typescript",
".html": "html",
".htm": "html",
".css": "css",
".scss": "scss",
".sass": "sass",
".less": "less",
".vue": "vue",
// JVM languages
".java": "java",
".scala": "scala",
".kt": "kotlin",
".clj": "clojure",
// .NET languages
".cs": "csharp",
".vb": "vbnet",
".fs": "fsharp",
// Apple platforms
".swift": "swift",
".m": "objc",
".mm": "objcpp",
// Shell scripts
".sh": "bash",
".bash": "bash",
".zsh": "zsh",
".fish": "fish",
".ps1": "powershell",
".bat": "batch",
".cmd": "batch",
// Data formats
".json": "json",
".yaml": "yaml",
".yml": "yaml",
".toml": "toml",
".xml": "xml",
".sql": "sql",
// Documentation
".md": "markdown",
".rst": "rst",
".tex": "latex",
// Functional languages
".hs": "haskell",
".ml": "ocaml",
".mli": "ocaml",
".elm": "elm",
".ex": "elixir",
".exs": "elixir",
".erl": "erlang",
".hrl": "erlang",
// Other languages
".r": "r",
".dart": "dart",
".nim": "nim",
".nims": "nim",
}
}

View File

@@ -3,8 +3,8 @@ package fileproc
// FakeWalker implements Walker for testing purposes.
type FakeWalker struct {
Files []string
Err error
Files []string
}
// Walk returns predetermined file paths or an error, depending on FakeWalker's configuration.

55
fileproc/file_filters.go Normal file
View File

@@ -0,0 +1,55 @@
package fileproc
import (
"os"
"github.com/ivuorinen/gibidify/config"
)
// FileFilter defines filtering criteria for files and directories.
type FileFilter struct {
ignoredDirs []string
sizeLimit int64
}
// NewFileFilter creates a new file filter with current configuration.
func NewFileFilter() *FileFilter {
return &FileFilter{
ignoredDirs: config.GetIgnoredDirectories(),
sizeLimit: config.GetFileSizeLimit(),
}
}
// shouldSkipEntry determines if an entry should be skipped based on ignore rules and filters.
func (f *FileFilter) shouldSkipEntry(entry os.DirEntry, fullPath string, rules []ignoreRule) bool {
if entry.IsDir() {
return f.shouldSkipDirectory(entry)
}
if f.shouldSkipFile(entry, fullPath) {
return true
}
return matchesIgnoreRules(fullPath, rules)
}
// shouldSkipDirectory checks if a directory should be skipped based on the ignored directories list.
func (f *FileFilter) shouldSkipDirectory(entry os.DirEntry) bool {
for _, d := range f.ignoredDirs {
if entry.Name() == d {
return true
}
}
return false
}
// shouldSkipFile checks if a file should be skipped based on size limit and file type.
func (f *FileFilter) shouldSkipFile(entry os.DirEntry, fullPath string) bool {
// Check if file exceeds the configured size limit.
if info, err := entry.Info(); err == nil && info.Size() > f.sizeLimit {
return true
}
// Apply the default filter to ignore binary and image files.
return IsBinary(fullPath) || IsImage(fullPath)
}

827
fileproc/filetypes_test.go Normal file
View File

@@ -0,0 +1,827 @@
package fileproc
import (
"fmt"
"sync"
"testing"
)
// TestFileTypeRegistry_ModificationMethods tests the modification methods of FileTypeRegistry.
func TestFileTypeRegistry_ModificationMethods(t *testing.T) {
// Create a new registry instance for testing
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
// Test AddImageExtension
t.Run("AddImageExtension", func(t *testing.T) {
// Add a new image extension
registry.AddImageExtension(".webp")
if !registry.IsImage("test.webp") {
t.Errorf("Expected .webp to be recognized as image after adding")
}
// Test case insensitive addition
registry.AddImageExtension(".AVIF")
if !registry.IsImage("test.avif") {
t.Errorf("Expected .avif to be recognized as image after adding .AVIF")
}
if !registry.IsImage("test.AVIF") {
t.Errorf("Expected .AVIF to be recognized as image")
}
// Test with dot prefix
registry.AddImageExtension("heic")
if registry.IsImage("test.heic") {
t.Errorf("Expected extension without dot to not work")
}
// Test with proper dot prefix
registry.AddImageExtension(".heic")
if !registry.IsImage("test.heic") {
t.Errorf("Expected .heic to be recognized as image")
}
})
// Test AddBinaryExtension
t.Run("AddBinaryExtension", func(t *testing.T) {
// Add a new binary extension
registry.AddBinaryExtension(".custom")
if !registry.IsBinary("test.custom") {
t.Errorf("Expected .custom to be recognized as binary after adding")
}
// Test case insensitive addition
registry.AddBinaryExtension(".NEWBIN")
if !registry.IsBinary("test.newbin") {
t.Errorf("Expected .newbin to be recognized as binary after adding .NEWBIN")
}
if !registry.IsBinary("test.NEWBIN") {
t.Errorf("Expected .NEWBIN to be recognized as binary")
}
// Test overwriting existing extension
registry.AddBinaryExtension(".custom")
if !registry.IsBinary("test.custom") {
t.Errorf("Expected .custom to still be recognized as binary after re-adding")
}
})
// Test AddLanguageMapping
t.Run("AddLanguageMapping", func(t *testing.T) {
// Add a new language mapping
registry.AddLanguageMapping(".zig", "zig")
if registry.GetLanguage("test.zig") != "zig" {
t.Errorf("Expected .zig to map to 'zig', got '%s'", registry.GetLanguage("test.zig"))
}
// Test case insensitive addition
registry.AddLanguageMapping(".V", "vlang")
if registry.GetLanguage("test.v") != "vlang" {
t.Errorf("Expected .v to map to 'vlang' after adding .V, got '%s'", registry.GetLanguage("test.v"))
}
if registry.GetLanguage("test.V") != "vlang" {
t.Errorf("Expected .V to map to 'vlang', got '%s'", registry.GetLanguage("test.V"))
}
// Test overwriting existing mapping
registry.AddLanguageMapping(".zig", "ziglang")
if registry.GetLanguage("test.zig") != "ziglang" {
t.Errorf("Expected .zig to map to 'ziglang' after update, got '%s'", registry.GetLanguage("test.zig"))
}
// Test empty language
registry.AddLanguageMapping(".empty", "")
if registry.GetLanguage("test.empty") != "" {
t.Errorf("Expected .empty to map to empty string, got '%s'", registry.GetLanguage("test.empty"))
}
})
}
// TestFileTypeRegistry_LanguageDetection tests the language detection functionality.
func TestFileTypeRegistry_LanguageDetection(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected string
}{
// Programming languages
{"main.go", "go"},
{"script.py", "python"},
{"app.js", "javascript"},
{"component.tsx", "typescript"},
{"service.ts", "typescript"},
{"App.java", "java"},
{"program.c", "c"},
{"program.cpp", "cpp"},
{"header.h", "c"},
{"header.hpp", "cpp"},
{"main.rs", "rust"},
{"script.rb", "ruby"},
{"index.php", "php"},
{"app.swift", "swift"},
{"MainActivity.kt", "kotlin"},
{"Main.scala", "scala"},
{"analysis.r", "r"},
{"ViewController.m", "objc"},
{"ViewController.mm", "objcpp"},
{"Program.cs", "csharp"},
{"Module.vb", "vbnet"},
{"program.fs", "fsharp"},
{"script.lua", "lua"},
{"script.pl", "perl"},
// Shell scripts
{"script.sh", "bash"},
{"script.bash", "bash"},
{"script.zsh", "zsh"},
{"script.fish", "fish"},
{"script.ps1", "powershell"},
{"script.bat", "batch"},
{"script.cmd", "batch"},
// Data and markup
{"query.sql", "sql"},
{"index.html", "html"},
{"page.htm", "html"},
{"data.xml", "xml"},
{"style.css", "css"},
{"style.scss", "scss"},
{"style.sass", "sass"},
{"style.less", "less"},
{"data.json", "json"},
{"config.yaml", "yaml"},
{"config.yml", "yaml"},
{"config.toml", "toml"},
{"README.md", "markdown"},
{"doc.rst", "rst"},
{"paper.tex", "latex"},
// Modern languages
{"main.dart", "dart"},
{"Main.elm", "elm"},
{"core.clj", "clojure"},
{"server.ex", "elixir"},
{"test.exs", "elixir"},
{"server.erl", "erlang"},
{"header.hrl", "erlang"},
{"main.hs", "haskell"},
{"module.ml", "ocaml"},
{"interface.mli", "ocaml"},
{"main.nim", "nim"},
{"config.nims", "nim"},
// Web frameworks
{"Component.vue", "vue"},
{"Component.jsx", "javascript"},
// Case sensitivity tests
{"MAIN.GO", "go"},
{"Script.PY", "python"},
{"APP.JS", "javascript"},
// Edge cases
{"", ""}, // Empty filename
{"a", ""}, // Too short (less than minExtensionLength)
{"noext", ""}, // No extension
{".hidden", ""}, // Hidden file with no name
{"file.", ""}, // Extension is just a dot
{"file.unknown", ""}, // Unknown extension
{"file.123", ""}, // Numeric extension
{"a.b", ""}, // Very short filename and extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.GetLanguage(tt.filename)
if result != tt.expected {
t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected)
}
})
}
}
// TestFileTypeRegistry_ImageDetection tests the image detection functionality.
func TestFileTypeRegistry_ImageDetection(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected bool
}{
// Common image formats
{"photo.png", true},
{"image.jpg", true},
{"picture.jpeg", true},
{"animation.gif", true},
{"bitmap.bmp", true},
{"image.tiff", true},
{"scan.tif", true},
{"vector.svg", true},
{"modern.webp", true},
{"favicon.ico", true},
// Case sensitivity tests
{"PHOTO.PNG", true},
{"IMAGE.JPG", true},
{"PICTURE.JPEG", true},
// Non-image files
{"document.txt", false},
{"script.js", false},
{"data.json", false},
{"archive.zip", false},
{"executable.exe", false},
// Edge cases
{"", false}, // Empty filename
{"image", false}, // No extension
{".png", true}, // Just extension
{"file.png.bak", false}, // Multiple extensions
{"image.unknown", false}, // Unknown extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.IsImage(tt.filename)
if result != tt.expected {
t.Errorf("IsImage(%q) = %t, expected %t", tt.filename, result, tt.expected)
}
})
}
}
// TestFileTypeRegistry_BinaryDetection tests the binary detection functionality.
func TestFileTypeRegistry_BinaryDetection(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected bool
}{
// Executable files
{"program.exe", true},
{"library.dll", true},
{"libfoo.so", true},
{"framework.dylib", true},
{"data.bin", true},
// Object and library files
{"object.o", true},
{"archive.a", true},
{"library.lib", true},
{"application.jar", true},
{"bytecode.class", true},
{"compiled.pyc", true},
{"optimized.pyo", true},
// System files
{".DS_Store", true},
// Document files (treated as binary)
{"document.pdf", true},
// Archive files
{"archive.zip", true},
{"backup.tar", true},
{"compressed.gz", true},
{"data.bz2", true},
{"package.xz", true},
{"archive.7z", true},
{"backup.rar", true},
// Font files
{"font.ttf", true},
{"font.otf", true},
{"font.woff", true},
{"font.woff2", true},
// Media files
{"song.mp3", true},
{"video.mp4", true},
{"movie.avi", true},
{"clip.mov", true},
{"video.wmv", true},
{"animation.flv", true},
{"modern.webm", true},
{"audio.ogg", true},
{"sound.wav", true},
{"music.flac", true},
// Database files
{"data.dat", true},
{"database.db", true},
{"app.sqlite", true},
// Case sensitivity tests
{"PROGRAM.EXE", true},
{"LIBRARY.DLL", true},
// Non-binary files
{"document.txt", false},
{"script.js", false},
{"data.json", false},
{"style.css", false},
{"page.html", false},
// Edge cases
{"", false}, // Empty filename
{"binary", false}, // No extension
{".exe", true}, // Just extension
{"file.exe.bak", false}, // Multiple extensions
{"file.unknown", false}, // Unknown extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.IsBinary(tt.filename)
if result != tt.expected {
t.Errorf("IsBinary(%q) = %t, expected %t", tt.filename, result, tt.expected)
}
})
}
}
// TestFileTypeRegistry_DefaultRegistryConsistency tests that the default registry is consistent.
func TestFileTypeRegistry_DefaultRegistryConsistency(t *testing.T) {
// Get registry multiple times and ensure it's the same instance
registry1 := GetDefaultRegistry()
registry2 := GetDefaultRegistry()
registry3 := getRegistry()
if registry1 != registry2 {
t.Error("GetDefaultRegistry() should return the same instance")
}
if registry1 != registry3 {
t.Error("getRegistry() should return the same instance as GetDefaultRegistry()")
}
// Test that global functions use the same registry
filename := "test.go"
if IsImage(filename) != registry1.IsImage(filename) {
t.Error("IsImage() global function should match registry method")
}
if IsBinary(filename) != registry1.IsBinary(filename) {
t.Error("IsBinary() global function should match registry method")
}
if GetLanguage(filename) != registry1.GetLanguage(filename) {
t.Error("GetLanguage() global function should match registry method")
}
}
// TestFileTypeRegistry_ThreadSafety tests the thread safety of the FileTypeRegistry.
func TestFileTypeRegistry_ThreadSafety(t *testing.T) {
const numGoroutines = 100
const numOperationsPerGoroutine = 100
var wg sync.WaitGroup
// Test concurrent read operations
t.Run("ConcurrentReads", func(t *testing.T) {
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
registry := GetDefaultRegistry()
for j := 0; j < numOperationsPerGoroutine; j++ {
// Test various file detection operations
_ = registry.IsImage("test.png")
_ = registry.IsBinary("test.exe")
_ = registry.GetLanguage("test.go")
// Test global functions too
_ = IsImage("image.jpg")
_ = IsBinary("binary.dll")
_ = GetLanguage("script.py")
}
}(i)
}
wg.Wait()
})
// Test concurrent registry access (singleton creation)
t.Run("ConcurrentRegistryAccess", func(t *testing.T) {
// Reset the registry to test concurrent initialization
// Note: This is not safe in a real application, but needed for testing
registryOnce = sync.Once{}
registry = nil
registries := make([]*FileTypeRegistry, numGoroutines)
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
registries[id] = GetDefaultRegistry()
}(i)
}
wg.Wait()
// Verify all goroutines got the same registry instance
firstRegistry := registries[0]
for i := 1; i < numGoroutines; i++ {
if registries[i] != firstRegistry {
t.Errorf("Registry %d is different from registry 0", i)
}
}
})
// Test concurrent modifications on separate registry instances
t.Run("ConcurrentModifications", func(t *testing.T) {
// Create separate registry instances for each goroutine to test modification thread safety
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
// Create a new registry instance for this goroutine
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
for j := 0; j < numOperationsPerGoroutine; j++ {
// Add unique extensions for this goroutine
extSuffix := fmt.Sprintf("_%d_%d", id, j)
registry.AddImageExtension(".img" + extSuffix)
registry.AddBinaryExtension(".bin" + extSuffix)
registry.AddLanguageMapping(".lang"+extSuffix, "lang"+extSuffix)
// Verify the additions worked
if !registry.IsImage("test.img" + extSuffix) {
t.Errorf("Failed to add image extension .img%s", extSuffix)
}
if !registry.IsBinary("test.bin" + extSuffix) {
t.Errorf("Failed to add binary extension .bin%s", extSuffix)
}
if registry.GetLanguage("test.lang"+extSuffix) != "lang"+extSuffix {
t.Errorf("Failed to add language mapping .lang%s", extSuffix)
}
}
}(i)
}
wg.Wait()
})
}
// TestFileTypeRegistry_EdgeCases tests edge cases and boundary conditions.
func TestFileTypeRegistry_EdgeCases(t *testing.T) {
registry := GetDefaultRegistry()
// Test various edge cases for filename handling
edgeCases := []struct {
name string
filename string
desc string
}{
{"empty", "", "empty filename"},
{"single_char", "a", "single character filename"},
{"just_dot", ".", "just a dot"},
{"double_dot", "..", "double dot"},
{"hidden_file", ".hidden", "hidden file"},
{"hidden_with_ext", ".hidden.txt", "hidden file with extension"},
{"multiple_dots", "file.tar.gz", "multiple extensions"},
{"trailing_dot", "file.", "trailing dot"},
{"unicode", "файл.txt", "unicode filename"},
{"spaces", "my file.txt", "filename with spaces"},
{"special_chars", "file@#$.txt", "filename with special characters"},
{"very_long", "very_long_filename_with_many_characters_in_it.extension", "very long filename"},
{"no_basename", ".gitignore", "dotfile with no basename"},
{"case_mixed", "FiLe.ExT", "mixed case"},
}
for _, tc := range edgeCases {
t.Run(tc.name, func(t *testing.T) {
// These should not panic
_ = registry.IsImage(tc.filename)
_ = registry.IsBinary(tc.filename)
_ = registry.GetLanguage(tc.filename)
// Global functions should also not panic
_ = IsImage(tc.filename)
_ = IsBinary(tc.filename)
_ = GetLanguage(tc.filename)
})
}
}
// TestFileTypeRegistry_MinimumExtensionLength tests the minimum extension length requirement.
func TestFileTypeRegistry_MinimumExtensionLength(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected string
}{
{"", ""}, // Empty filename
{"a", ""}, // Single character (less than minExtensionLength)
{"ab", ""}, // Two characters, no extension
{"a.b", ""}, // Extension too short, but filename too short anyway
{"ab.c", "c"}, // Valid: filename >= minExtensionLength and .c is valid extension
{"a.go", "go"}, // Valid extension
{"ab.py", "python"}, // Valid extension
{"a.unknown", ""}, // Valid length but unknown extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.GetLanguage(tt.filename)
if result != tt.expected {
t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected)
}
})
}
}
// BenchmarkFileTypeRegistry tests performance of the registry operations.
func BenchmarkFileTypeRegistry_IsImage(b *testing.B) {
registry := GetDefaultRegistry()
filename := "test.png"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = registry.IsImage(filename)
}
}
func BenchmarkFileTypeRegistry_IsBinary(b *testing.B) {
registry := GetDefaultRegistry()
filename := "test.exe"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = registry.IsBinary(filename)
}
}
func BenchmarkFileTypeRegistry_GetLanguage(b *testing.B) {
registry := GetDefaultRegistry()
filename := "test.go"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = registry.GetLanguage(filename)
}
}
func BenchmarkFileTypeRegistry_GlobalFunctions(b *testing.B) {
filename := "test.go"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = IsImage(filename)
_ = IsBinary(filename)
_ = GetLanguage(filename)
}
}
func BenchmarkFileTypeRegistry_ConcurrentAccess(b *testing.B) {
filename := "test.go"
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = IsImage(filename)
_ = IsBinary(filename)
_ = GetLanguage(filename)
}
})
}
// TestFileTypeRegistry_Configuration tests the configuration functionality.
func TestFileTypeRegistry_Configuration(t *testing.T) {
// Create a new registry instance for testing
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
// Test ApplyCustomExtensions
t.Run("ApplyCustomExtensions", func(t *testing.T) {
customImages := []string{".webp", ".avif", ".heic"}
customBinary := []string{".custom", ".mybin"}
customLanguages := map[string]string{
".zig": "zig",
".odin": "odin",
".v": "vlang",
}
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
// Test custom image extensions
for _, ext := range customImages {
if !registry.IsImage("test" + ext) {
t.Errorf("Expected %s to be recognized as image", ext)
}
}
// Test custom binary extensions
for _, ext := range customBinary {
if !registry.IsBinary("test" + ext) {
t.Errorf("Expected %s to be recognized as binary", ext)
}
}
// Test custom language mappings
for ext, expectedLang := range customLanguages {
if lang := registry.GetLanguage("test" + ext); lang != expectedLang {
t.Errorf("Expected %s to map to %s, got %s", ext, expectedLang, lang)
}
}
})
// Test DisableExtensions
t.Run("DisableExtensions", func(t *testing.T) {
// Add some extensions first
registry.AddImageExtension(".png")
registry.AddImageExtension(".jpg")
registry.AddBinaryExtension(".exe")
registry.AddBinaryExtension(".dll")
registry.AddLanguageMapping(".go", "go")
registry.AddLanguageMapping(".py", "python")
// Verify they work
if !registry.IsImage("test.png") {
t.Error("Expected .png to be image before disabling")
}
if !registry.IsBinary("test.exe") {
t.Error("Expected .exe to be binary before disabling")
}
if registry.GetLanguage("test.go") != "go" {
t.Error("Expected .go to map to go before disabling")
}
// Disable some extensions
disabledImages := []string{".png"}
disabledBinary := []string{".exe"}
disabledLanguages := []string{".go"}
registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
// Test that disabled extensions no longer work
if registry.IsImage("test.png") {
t.Error("Expected .png to not be image after disabling")
}
if registry.IsBinary("test.exe") {
t.Error("Expected .exe to not be binary after disabling")
}
if registry.GetLanguage("test.go") != "" {
t.Error("Expected .go to not map to language after disabling")
}
// Test that non-disabled extensions still work
if !registry.IsImage("test.jpg") {
t.Error("Expected .jpg to still be image after disabling .png")
}
if !registry.IsBinary("test.dll") {
t.Error("Expected .dll to still be binary after disabling .exe")
}
if registry.GetLanguage("test.py") != "python" {
t.Error("Expected .py to still map to python after disabling .go")
}
})
// Test empty values handling
t.Run("EmptyValuesHandling", func(t *testing.T) {
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
// Test with empty values
customImages := []string{"", ".valid", ""}
customBinary := []string{"", ".valid"}
customLanguages := map[string]string{
"": "invalid",
".valid": "",
".good": "good",
}
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
// Only valid entries should be added
if registry.IsImage("test.") {
t.Error("Expected empty extension to not be added as image")
}
if !registry.IsImage("test.valid") {
t.Error("Expected .valid to be added as image")
}
if registry.IsBinary("test.") {
t.Error("Expected empty extension to not be added as binary")
}
if !registry.IsBinary("test.valid") {
t.Error("Expected .valid to be added as binary")
}
if registry.GetLanguage("test.") != "" {
t.Error("Expected empty extension to not be added as language")
}
if registry.GetLanguage("test.valid") != "" {
t.Error("Expected .valid with empty language to not be added")
}
if registry.GetLanguage("test.good") != "good" {
t.Error("Expected .good to map to good")
}
})
// Test case insensitive handling
t.Run("CaseInsensitiveHandling", func(t *testing.T) {
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
customImages := []string{".WEBP", ".Avif"}
customBinary := []string{".CUSTOM", ".MyBin"}
customLanguages := map[string]string{
".ZIG": "zig",
".Odin": "odin",
}
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
// Test that both upper and lower case work
if !registry.IsImage("test.webp") {
t.Error("Expected .webp (lowercase) to work after adding .WEBP")
}
if !registry.IsImage("test.WEBP") {
t.Error("Expected .WEBP (uppercase) to work")
}
if !registry.IsBinary("test.custom") {
t.Error("Expected .custom (lowercase) to work after adding .CUSTOM")
}
if !registry.IsBinary("test.CUSTOM") {
t.Error("Expected .CUSTOM (uppercase) to work")
}
if registry.GetLanguage("test.zig") != "zig" {
t.Error("Expected .zig (lowercase) to work after adding .ZIG")
}
if registry.GetLanguage("test.ZIG") != "zig" {
t.Error("Expected .ZIG (uppercase) to work")
}
})
}
// TestConfigureFromSettings tests the global configuration function.
func TestConfigureFromSettings(t *testing.T) {
// Reset registry to ensure clean state
registryOnce = sync.Once{}
registry = nil
// Test configuration application
customImages := []string{".webp", ".avif"}
customBinary := []string{".custom"}
customLanguages := map[string]string{".zig": "zig"}
disabledImages := []string{".gif"} // Disable default extension
disabledBinary := []string{".exe"} // Disable default extension
disabledLanguages := []string{".rb"} // Disable default extension
ConfigureFromSettings(
customImages,
customBinary,
customLanguages,
disabledImages,
disabledBinary,
disabledLanguages,
)
// Test that custom extensions work
if !IsImage("test.webp") {
t.Error("Expected custom image extension .webp to work")
}
if !IsBinary("test.custom") {
t.Error("Expected custom binary extension .custom to work")
}
if GetLanguage("test.zig") != "zig" {
t.Error("Expected custom language .zig to work")
}
// Test that disabled extensions don't work
if IsImage("test.gif") {
t.Error("Expected disabled image extension .gif to not work")
}
if IsBinary("test.exe") {
t.Error("Expected disabled binary extension .exe to not work")
}
if GetLanguage("test.rb") != "" {
t.Error("Expected disabled language extension .rb to not work")
}
// Test that non-disabled defaults still work
if !IsImage("test.png") {
t.Error("Expected non-disabled image extension .png to still work")
}
if !IsBinary("test.dll") {
t.Error("Expected non-disabled binary extension .dll to still work")
}
if GetLanguage("test.go") != "go" {
t.Error("Expected non-disabled language extension .go to still work")
}
}

28
fileproc/formats.go Normal file
View File

@@ -0,0 +1,28 @@
package fileproc
// FileData represents a single file's path and content.
type FileData struct {
Path string `json:"path" yaml:"path"`
Content string `json:"content" yaml:"content"`
Language string `json:"language" yaml:"language"`
}
// OutputData represents the full output structure.
type OutputData struct {
Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
Files []FileData `json:"files" yaml:"files"`
}
// FormatWriter defines the interface for format-specific writers.
type FormatWriter interface {
Start(prefix, suffix string) error
WriteFile(req WriteRequest) error
Close() error
}
// detectLanguage tries to infer the code block language from the file extension.
func detectLanguage(filePath string) string {
registry := GetDefaultRegistry()
return registry.GetLanguage(filePath)
}

66
fileproc/ignore_rules.go Normal file
View File

@@ -0,0 +1,66 @@
package fileproc
import (
"os"
"path/filepath"
ignore "github.com/sabhiram/go-gitignore"
)
// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
type ignoreRule struct {
gi *ignore.GitIgnore
base string
}
// loadIgnoreRules loads ignore rules from the current directory and combines them with parent rules.
func loadIgnoreRules(currentDir string, parentRules []ignoreRule) []ignoreRule {
// Pre-allocate for parent rules plus possible .gitignore and .ignore
const expectedIgnoreFiles = 2
rules := make([]ignoreRule, 0, len(parentRules)+expectedIgnoreFiles)
rules = append(rules, parentRules...)
// Check for .gitignore and .ignore files in the current directory.
for _, fileName := range []string{".gitignore", ".ignore"} {
if rule := tryLoadIgnoreFile(currentDir, fileName); rule != nil {
rules = append(rules, *rule)
}
}
return rules
}
// tryLoadIgnoreFile attempts to load an ignore file from the given directory.
func tryLoadIgnoreFile(dir, fileName string) *ignoreRule {
ignorePath := filepath.Join(dir, fileName)
if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
if gi, err := ignore.CompileIgnoreFile(ignorePath); err == nil {
return &ignoreRule{
base: dir,
gi: gi,
}
}
}
return nil
}
// matchesIgnoreRules checks if a path matches any of the ignore rules.
func matchesIgnoreRules(fullPath string, rules []ignoreRule) bool {
for _, rule := range rules {
if matchesRule(fullPath, rule) {
return true
}
}
return false
}
// matchesRule checks if a path matches a specific ignore rule.
func matchesRule(fullPath string, rule ignoreRule) bool {
// Compute the path relative to the base where the ignore rule was defined.
rel, err := filepath.Rel(rule.base, fullPath)
if err != nil {
return false
}
// If the rule matches, skip this entry.
return rule.gi.MatchesPath(rel)
}

188
fileproc/json_writer.go Normal file
View File

@@ -0,0 +1,188 @@
package fileproc
import (
"encoding/json"
"fmt"
"io"
"os"
"github.com/ivuorinen/gibidify/utils"
)
// JSONWriter handles JSON format output with streaming support.
type JSONWriter struct {
outFile *os.File
firstFile bool
}
// NewJSONWriter creates a new JSON writer.
func NewJSONWriter(outFile *os.File) *JSONWriter {
return &JSONWriter{
outFile: outFile,
firstFile: true,
}
}
// Start writes the JSON header.
func (w *JSONWriter) Start(prefix, suffix string) error {
// Start JSON structure
if _, err := w.outFile.WriteString(`{"prefix":"`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON start")
}
// Write escaped prefix
escapedPrefix := escapeJSONString(prefix)
if _, err := w.outFile.WriteString(escapedPrefix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON prefix")
}
if _, err := w.outFile.WriteString(`","suffix":"`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON middle")
}
// Write escaped suffix
escapedSuffix := escapeJSONString(suffix)
if _, err := w.outFile.WriteString(escapedSuffix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON suffix")
}
if _, err := w.outFile.WriteString(`","files":[`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON files start")
}
return nil
}
// WriteFile writes a file entry in JSON format.
func (w *JSONWriter) WriteFile(req WriteRequest) error {
if !w.firstFile {
if _, err := w.outFile.WriteString(","); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON separator")
}
}
w.firstFile = false
if req.IsStream {
return w.writeStreaming(req)
}
return w.writeInline(req)
}
// Close writes the JSON footer.
func (w *JSONWriter) Close() error {
// Close JSON structure
if _, err := w.outFile.WriteString("]}"); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON end")
}
return nil
}
// writeStreaming writes a large file as JSON in streaming chunks.
func (w *JSONWriter) writeStreaming(req WriteRequest) error {
defer w.closeReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write file start
escapedPath := escapeJSONString(req.Path)
if _, err := fmt.Fprintf(w.outFile, `{"path":"%s","language":"%s","content":"`, escapedPath, language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file start").WithFilePath(req.Path)
}
// Stream content with JSON escaping
if err := w.streamJSONContent(req.Reader, req.Path); err != nil {
return err
}
// Write file end
if _, err := w.outFile.WriteString(`"}`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file end").WithFilePath(req.Path)
}
return nil
}
// writeInline writes a small file directly as JSON.
func (w *JSONWriter) writeInline(req WriteRequest) error {
language := detectLanguage(req.Path)
fileData := FileData{
Path: req.Path,
Content: req.Content,
Language: language,
}
encoded, err := json.Marshal(fileData)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingEncode, "failed to marshal JSON").WithFilePath(req.Path)
}
if _, err := w.outFile.Write(encoded); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file").WithFilePath(req.Path)
}
return nil
}
// streamJSONContent streams content with JSON escaping.
func (w *JSONWriter) streamJSONContent(reader io.Reader, path string) error {
buf := make([]byte, StreamChunkSize)
for {
n, err := reader.Read(buf)
if n > 0 {
escaped := escapeJSONString(string(buf[:n]))
if _, writeErr := w.outFile.WriteString(escaped); writeErr != nil {
return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON chunk").WithFilePath(path)
}
}
if err == io.EOF {
break
}
if err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read JSON chunk").WithFilePath(path)
}
}
return nil
}
// closeReader safely closes a reader if it implements io.Closer.
func (w *JSONWriter) closeReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
utils.LogError(
"Failed to close file reader",
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// escapeJSONString escapes a string for JSON output.
func escapeJSONString(s string) string {
// Use json.Marshal to properly escape the string, then remove the quotes
escaped, _ := json.Marshal(s)
return string(escaped[1 : len(escaped)-1]) // Remove surrounding quotes
}
// startJSONWriter handles JSON format output with streaming support.
func startJSONWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
writer := NewJSONWriter(outFile)
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write JSON start", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write JSON file", err)
}
}
// Close writer
if err := writer.Close(); err != nil {
utils.LogError("Failed to write JSON end", err)
}
}

139
fileproc/markdown_writer.go Normal file
View File

@@ -0,0 +1,139 @@
package fileproc
import (
"fmt"
"io"
"os"
"github.com/ivuorinen/gibidify/utils"
)
// MarkdownWriter handles markdown format output with streaming support.
type MarkdownWriter struct {
outFile *os.File
}
// NewMarkdownWriter creates a new markdown writer.
func NewMarkdownWriter(outFile *os.File) *MarkdownWriter {
return &MarkdownWriter{outFile: outFile}
}
// Start writes the markdown header.
func (w *MarkdownWriter) Start(prefix, suffix string) error {
if prefix != "" {
if _, err := fmt.Fprintf(w.outFile, "# %s\n\n", prefix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write prefix")
}
}
return nil
}
// WriteFile writes a file entry in markdown format.
func (w *MarkdownWriter) WriteFile(req WriteRequest) error {
if req.IsStream {
return w.writeStreaming(req)
}
return w.writeInline(req)
}
// Close writes the markdown footer.
func (w *MarkdownWriter) Close(suffix string) error {
if suffix != "" {
if _, err := fmt.Fprintf(w.outFile, "\n# %s\n", suffix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write suffix")
}
}
return nil
}
// writeStreaming writes a large file in streaming chunks.
func (w *MarkdownWriter) writeStreaming(req WriteRequest) error {
defer w.closeReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write file header
if _, err := fmt.Fprintf(w.outFile, "## File: `%s`\n```%s\n", req.Path, language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file header").WithFilePath(req.Path)
}
// Stream file content in chunks
if err := w.streamContent(req.Reader, req.Path); err != nil {
return err
}
// Write file footer
if _, err := w.outFile.WriteString("\n```\n\n"); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file footer").WithFilePath(req.Path)
}
return nil
}
// writeInline writes a small file directly from content.
func (w *MarkdownWriter) writeInline(req WriteRequest) error {
language := detectLanguage(req.Path)
formatted := fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", req.Path, language, req.Content)
if _, err := w.outFile.WriteString(formatted); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write inline content").WithFilePath(req.Path)
}
return nil
}
// streamContent streams file content in chunks.
func (w *MarkdownWriter) streamContent(reader io.Reader, path string) error {
buf := make([]byte, StreamChunkSize)
for {
n, err := reader.Read(buf)
if n > 0 {
if _, writeErr := w.outFile.Write(buf[:n]); writeErr != nil {
return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write chunk").WithFilePath(path)
}
}
if err == io.EOF {
break
}
if err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read chunk").WithFilePath(path)
}
}
return nil
}
// closeReader safely closes a reader if it implements io.Closer.
func (w *MarkdownWriter) closeReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
utils.LogError(
"Failed to close file reader",
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// startMarkdownWriter handles markdown format output with streaming support.
func startMarkdownWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
writer := NewMarkdownWriter(outFile)
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write markdown prefix", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write markdown file", err)
}
}
// Close writer
if err := writer.Close(suffix); err != nil {
utils.LogError("Failed to write markdown suffix", err)
}
}

View File

@@ -3,34 +3,157 @@ package fileproc
import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/utils"
)
const (
// StreamChunkSize is the size of chunks when streaming large files (64KB).
StreamChunkSize = 65536
// StreamThreshold is the file size above which we use streaming (1MB).
StreamThreshold = 1048576
// MaxMemoryBuffer is the maximum memory to use for buffering content (10MB).
MaxMemoryBuffer = 10485760
)
// WriteRequest represents the content to be written.
type WriteRequest struct {
Path string
Content string
Path string
Content string
IsStream bool
Reader io.Reader
}
// FileProcessor handles file processing operations.
type FileProcessor struct {
rootPath string
sizeLimit int64
}
// NewFileProcessor creates a new file processor.
func NewFileProcessor(rootPath string) *FileProcessor {
return &FileProcessor{
rootPath: rootPath,
sizeLimit: config.GetFileSizeLimit(),
}
}
// ProcessFile reads the file at filePath and sends a formatted output to outCh.
// It automatically chooses between loading the entire file or streaming based on file size.
func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) {
content, err := os.ReadFile(filePath)
processor := NewFileProcessor(rootPath)
processor.Process(filePath, outCh)
}
// Process handles file processing with the configured settings.
func (p *FileProcessor) Process(filePath string, outCh chan<- WriteRequest) {
// Validate file
fileInfo, err := p.validateFile(filePath)
if err != nil {
logrus.Errorf("Failed to read file %s: %v", filePath, err)
return // Error already logged
}
// Get relative path
relPath := p.getRelativePath(filePath)
// Choose processing strategy based on file size
if fileInfo.Size() <= StreamThreshold {
p.processInMemory(filePath, relPath, outCh)
} else {
p.processStreaming(filePath, relPath, outCh)
}
}
// validateFile checks if the file can be processed.
func (p *FileProcessor) validateFile(filePath string) (os.FileInfo, error) {
fileInfo, err := os.Stat(filePath)
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to stat file").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to stat file %s", filePath)
return nil, err
}
// Check size limit
if fileInfo.Size() > p.sizeLimit {
utils.LogErrorf(
utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationSize,
fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", fileInfo.Size(), p.sizeLimit),
).WithFilePath(filePath).WithContext("file_size", fileInfo.Size()).WithContext("size_limit", p.sizeLimit),
"Skipping large file %s", filePath,
)
return nil, fmt.Errorf("file too large")
}
return fileInfo, nil
}
// getRelativePath computes the path relative to rootPath.
func (p *FileProcessor) getRelativePath(filePath string) string {
relPath, err := filepath.Rel(p.rootPath, filePath)
if err != nil {
return filePath // Fallback
}
return relPath
}
// processInMemory loads the entire file into memory (for small files).
func (p *FileProcessor) processInMemory(filePath, relPath string, outCh chan<- WriteRequest) {
content, err := os.ReadFile(filePath) // #nosec G304 - filePath is validated by walker
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to read file").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to read file %s", filePath)
return
}
// Compute path relative to rootPath, so /a/b/c/d.c becomes c/d.c
relPath, err := filepath.Rel(rootPath, filePath)
if err != nil {
// Fallback if something unexpected happens
relPath = filePath
outCh <- WriteRequest{
Path: relPath,
Content: p.formatContent(relPath, string(content)),
IsStream: false,
}
}
// processStreaming creates a streaming reader for large files.
func (p *FileProcessor) processStreaming(filePath, relPath string, outCh chan<- WriteRequest) {
reader := p.createStreamReader(filePath, relPath)
if reader == nil {
return // Error already logged
}
// Format: separator, then relative path, then content
formatted := fmt.Sprintf("\n---\n%s\n%s\n", relPath, string(content))
outCh <- WriteRequest{Path: relPath, Content: formatted}
outCh <- WriteRequest{
Path: relPath,
Content: "", // Empty since content is in Reader
IsStream: true,
Reader: reader,
}
}
// createStreamReader creates a reader that combines header and file content.
func (p *FileProcessor) createStreamReader(filePath, relPath string) io.Reader {
file, err := os.Open(filePath) // #nosec G304 - filePath is validated by walker
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to open file for streaming").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to open file for streaming %s", filePath)
return nil
}
// Note: file will be closed by the writer
header := p.formatHeader(relPath)
return io.MultiReader(header, file)
}
// formatContent formats the file content with header.
func (p *FileProcessor) formatContent(relPath, content string) string {
return fmt.Sprintf("\n---\n%s\n%s\n", relPath, content)
}
// formatHeader creates a reader for the file header.
func (p *FileProcessor) formatHeader(relPath string) io.Reader {
return strings.NewReader(fmt.Sprintf("\n---\n%s\n", relPath))
}

View File

@@ -6,12 +6,15 @@ import (
"sync"
"testing"
fileproc "github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/testutil"
)
func TestProcessFile(t *testing.T) {
// Reset and load default config to ensure proper file size limits
testutil.ResetViperConfig(t, "")
// Create a temporary file with known content.
tmpFile, err := os.CreateTemp("", "testfile")
tmpFile, err := os.CreateTemp(t.TempDir(), "testfile")
if err != nil {
t.Fatal(err)
}

107
fileproc/registry.go Normal file
View File

@@ -0,0 +1,107 @@
// Package fileproc provides file processing utilities.
package fileproc
import (
"path/filepath"
"strings"
"sync"
)
const minExtensionLength = 2
var (
registry *FileTypeRegistry
registryOnce sync.Once
)
// FileTypeRegistry manages file type detection and classification.
type FileTypeRegistry struct {
imageExts map[string]bool
binaryExts map[string]bool
languageMap map[string]string
// Cache for frequent lookups to avoid repeated string operations
extCache map[string]string // filename -> normalized extension
resultCache map[string]FileTypeResult // extension -> cached result
cacheMutex sync.RWMutex
maxCacheSize int
// Performance statistics
stats RegistryStats
}
// RegistryStats tracks performance metrics for the registry.
type RegistryStats struct {
TotalLookups uint64
CacheHits uint64
CacheMisses uint64
CacheEvictions uint64
}
// FileTypeResult represents cached file type detection results.
type FileTypeResult struct {
IsImage bool
IsBinary bool
Language string
Extension string
}
// initRegistry initializes the default file type registry with common extensions.
func initRegistry() *FileTypeRegistry {
return &FileTypeRegistry{
imageExts: getImageExtensions(),
binaryExts: getBinaryExtensions(),
languageMap: getLanguageMap(),
extCache: make(map[string]string, 1000), // Cache for extension normalization
resultCache: make(map[string]FileTypeResult, 500), // Cache for type results
maxCacheSize: 500,
}
}
// getRegistry returns the singleton file type registry, creating it if necessary.
func getRegistry() *FileTypeRegistry {
registryOnce.Do(func() {
registry = initRegistry()
})
return registry
}
// GetDefaultRegistry returns the default file type registry.
func GetDefaultRegistry() *FileTypeRegistry {
return getRegistry()
}
// GetStats returns a copy of the current registry statistics.
func (r *FileTypeRegistry) GetStats() RegistryStats {
r.cacheMutex.RLock()
defer r.cacheMutex.RUnlock()
return r.stats
}
// GetCacheInfo returns current cache size information.
func (r *FileTypeRegistry) GetCacheInfo() (extCacheSize, resultCacheSize, maxCacheSize int) {
r.cacheMutex.RLock()
defer r.cacheMutex.RUnlock()
return len(r.extCache), len(r.resultCache), r.maxCacheSize
}
// ResetRegistryForTesting resets the registry to its initial state.
// This function should only be used in tests.
func ResetRegistryForTesting() {
registryOnce = sync.Once{}
registry = nil
}
// normalizeExtension extracts and normalizes the file extension.
func normalizeExtension(filename string) string {
return strings.ToLower(filepath.Ext(filename))
}
// isSpecialFile checks if the filename matches special cases like .DS_Store.
func isSpecialFile(filename string, extensions map[string]bool) bool {
if filepath.Ext(filename) == "" {
basename := strings.ToLower(filepath.Base(filename))
return extensions[basename]
}
return false
}

View File

@@ -4,10 +4,8 @@ package fileproc
import (
"os"
"path/filepath"
"strings"
"github.com/ivuorinen/gibidify/config"
ignore "github.com/sabhiram/go-gitignore"
"github.com/ivuorinen/gibidify/utils"
)
// Walker defines an interface for scanning directories.
@@ -18,22 +16,25 @@ type Walker interface {
// ProdWalker implements Walker using a custom directory walker that
// respects .gitignore and .ignore files, configuration-defined ignore directories,
// and ignores binary and image files by default.
type ProdWalker struct{}
type ProdWalker struct {
filter *FileFilter
}
// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
type ignoreRule struct {
base string
gi *ignore.GitIgnore
// NewProdWalker creates a new production walker with current configuration.
func NewProdWalker() *ProdWalker {
return &ProdWalker{
filter: NewFileFilter(),
}
}
// Walk scans the given root directory recursively and returns a slice of file paths
// that are not ignored based on .gitignore/.ignore files, the configuration, or the default binary/image filter.
func (pw ProdWalker) Walk(root string) ([]string, error) {
absRoot, err := filepath.Abs(root)
func (w *ProdWalker) Walk(root string) ([]string, error) {
absRoot, err := utils.GetAbsolutePath(root)
if err != nil {
return nil, err
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to resolve root path").WithFilePath(root)
}
return walkDir(absRoot, absRoot, []ignoreRule{})
return w.walkDir(absRoot, []ignoreRule{})
}
// walkDir recursively walks the directory tree starting at currentDir.
@@ -41,122 +42,34 @@ func (pw ProdWalker) Walk(root string) ([]string, error) {
// appends the corresponding rules to the inherited list. Each file/directory is
// then checked against the accumulated ignore rules, the configuration's list of ignored directories,
// and a default filter that ignores binary and image files.
func walkDir(root string, currentDir string, parentRules []ignoreRule) ([]string, error) {
func (w *ProdWalker) walkDir(currentDir string, parentRules []ignoreRule) ([]string, error) {
var results []string
entries, err := os.ReadDir(currentDir)
if err != nil {
return nil, err
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to read directory").WithFilePath(currentDir)
}
// Start with the parent's ignore rules.
rules := make([]ignoreRule, len(parentRules))
copy(rules, parentRules)
// Check for .gitignore and .ignore files in the current directory.
for _, fileName := range []string{".gitignore", ".ignore"} {
ignorePath := filepath.Join(currentDir, fileName)
if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
gi, err := ignore.CompileIgnoreFile(ignorePath)
if err == nil {
rules = append(rules, ignoreRule{
base: currentDir,
gi: gi,
})
}
}
}
// Get the list of directories to ignore from configuration.
ignoredDirs := config.GetIgnoredDirectories()
sizeLimit := config.GetFileSizeLimit() // e.g., 5242880 for 5 MB
rules := loadIgnoreRules(currentDir, parentRules)
for _, entry := range entries {
fullPath := filepath.Join(currentDir, entry.Name())
// For directories, check if its name is in the config ignore list.
if entry.IsDir() {
for _, d := range ignoredDirs {
if entry.Name() == d {
// Skip this directory entirely.
goto SkipEntry
}
}
} else {
// Check if file exceeds the configured size limit.
info, err := entry.Info()
if err == nil && info.Size() > sizeLimit {
goto SkipEntry
}
// For files, apply the default filter to ignore binary and image files.
if isBinaryOrImage(fullPath) {
goto SkipEntry
}
if w.filter.shouldSkipEntry(entry, fullPath, rules) {
continue
}
// Check accumulated ignore rules.
for _, rule := range rules {
// Compute the path relative to the base where the ignore rule was defined.
rel, err := filepath.Rel(rule.base, fullPath)
if err != nil {
continue
}
// If the rule matches, skip this entry.
if rule.gi.MatchesPath(rel) {
goto SkipEntry
}
}
// If not ignored, then process the entry.
// Process entry
if entry.IsDir() {
subFiles, err := walkDir(root, fullPath, rules)
subFiles, err := w.walkDir(fullPath, rules)
if err != nil {
return nil, err
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingTraversal, "failed to traverse subdirectory").WithFilePath(fullPath)
}
results = append(results, subFiles...)
} else {
results = append(results, fullPath)
}
SkipEntry:
continue
}
return results, nil
}
// isBinaryOrImage checks if a file should be considered binary or an image based on its extension.
// The check is case-insensitive.
func isBinaryOrImage(filePath string) bool {
ext := strings.ToLower(filepath.Ext(filePath))
// Common image file extensions.
imageExtensions := map[string]bool{
".png": true,
".jpg": true,
".jpeg": true,
".gif": true,
".bmp": true,
".tiff": true,
".ico": true,
".svg": true,
".webp": true,
}
// Common binary file extensions.
binaryExtensions := map[string]bool{
".exe": true,
".dll": true,
".so": true,
".bin": true,
".dat": true,
".zip": true,
".tar": true,
".gz": true,
".7z": true,
".rar": true,
".DS_Store": true,
}
if imageExtensions[ext] || binaryExtensions[ext] {
return true
}
return false
}

View File

@@ -1,64 +1,42 @@
package fileproc_test
import (
"os"
"path/filepath"
"testing"
"github.com/ivuorinen/gibidify/config"
fileproc "github.com/ivuorinen/gibidify/fileproc"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/testutil"
)
func TestProdWalkerWithIgnore(t *testing.T) {
// Create a temporary directory structure.
rootDir, err := os.MkdirTemp("", "walker_test_root")
if err != nil {
t.Fatalf("Failed to create temp root directory: %v", err)
}
defer func() {
if err := os.RemoveAll(rootDir); err != nil {
t.Fatalf("cleanup failed: %v", err)
}
}()
rootDir := t.TempDir()
subDir := filepath.Join(rootDir, "vendor")
if err := os.Mkdir(subDir, 0755); err != nil {
t.Fatalf("Failed to create subDir: %v", err)
}
subDir := testutil.CreateTestDirectory(t, rootDir, "vendor")
// Write sample files
filePaths := []string{
filepath.Join(rootDir, "file1.go"),
filepath.Join(rootDir, "file2.txt"),
filepath.Join(subDir, "file_in_vendor.txt"), // should be ignored
}
for _, fp := range filePaths {
if err := os.WriteFile(fp, []byte("content"), 0644); err != nil {
t.Fatalf("Failed to write file %s: %v", fp, err)
}
}
testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{
{Name: "file1.go", Content: "content"},
{Name: "file2.txt", Content: "content"},
})
testutil.CreateTestFile(t, subDir, "file_in_vendor.txt", []byte("content")) // should be ignored
// .gitignore that ignores *.txt and itself
gitignoreContent := `*.txt
.gitignore
`
gitignorePath := filepath.Join(rootDir, ".gitignore")
if err := os.WriteFile(gitignorePath, []byte(gitignoreContent), 0644); err != nil {
t.Fatalf("Failed to write .gitignore: %v", err)
}
testutil.CreateTestFile(t, rootDir, ".gitignore", []byte(gitignoreContent))
// Initialize config to ignore "vendor" directory
viper.Reset()
config.LoadConfig()
testutil.ResetViperConfig(t, "")
viper.Set("ignoreDirectories", []string{"vendor"})
// Run walker
var w fileproc.Walker = fileproc.ProdWalker{}
w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir)
if err != nil {
t.Fatalf("Walk returned error: %v", err)
}
testutil.MustSucceed(t, err, "walking directory")
// We expect only file1.go to appear
if len(found) != 1 {
@@ -70,38 +48,24 @@ func TestProdWalkerWithIgnore(t *testing.T) {
}
func TestProdWalkerBinaryCheck(t *testing.T) {
rootDir, err := os.MkdirTemp("", "walker_test_bincheck")
if err != nil {
t.Fatalf("Failed to create temp root directory: %v", err)
}
defer func() {
if err := os.RemoveAll(rootDir); err != nil {
t.Fatalf("cleanup failed: %v", err)
}
}()
rootDir := t.TempDir()
// Create a mock binary file
binFile := filepath.Join(rootDir, "somefile.exe")
if err := os.WriteFile(binFile, []byte("fake-binary-content"), 0644); err != nil {
t.Fatalf("Failed to write file %s: %v", binFile, err)
}
// Create a normal file
normalFile := filepath.Join(rootDir, "keep.go")
if err := os.WriteFile(normalFile, []byte("package main"), 0644); err != nil {
t.Fatalf("Failed to write file %s: %v", normalFile, err)
}
// Create test files
testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{
{Name: "somefile.exe", Content: "fake-binary-content"},
{Name: "keep.go", Content: "package main"},
})
// Reset and load default config
viper.Reset()
config.LoadConfig()
testutil.ResetViperConfig(t, "")
// Reset FileTypeRegistry to ensure clean state
fileproc.ResetRegistryForTesting()
// Run walker
var w fileproc.Walker = fileproc.ProdWalker{}
w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir)
if err != nil {
t.Fatalf("Walk returned error: %v", err)
}
testutil.MustSucceed(t, err, "walking directory")
// Only "keep.go" should be returned
if len(found) != 1 {
@@ -113,34 +77,17 @@ func TestProdWalkerBinaryCheck(t *testing.T) {
}
func TestProdWalkerSizeLimit(t *testing.T) {
rootDir, err := os.MkdirTemp("", "walker_test_sizelimit")
if err != nil {
t.Fatalf("Failed to create temp root directory: %v", err)
}
defer func() {
if err := os.RemoveAll(rootDir); err != nil {
t.Fatalf("cleanup failed: %v", err)
}
}()
rootDir := t.TempDir()
// Create a file exceeding the size limit
largeFilePath := filepath.Join(rootDir, "largefile.txt")
// Create test files
largeFileData := make([]byte, 6*1024*1024) // 6 MB
if err := os.WriteFile(largeFilePath, largeFileData, 0644); err != nil {
t.Fatalf("Failed to write large file: %v", err)
}
// Create a small file
smallFilePath := filepath.Join(rootDir, "smallfile.go")
if err := os.WriteFile(smallFilePath, []byte("package main"), 0644); err != nil {
t.Fatalf("Failed to write small file: %v", err)
}
testutil.CreateTestFile(t, rootDir, "largefile.txt", largeFileData)
testutil.CreateTestFile(t, rootDir, "smallfile.go", []byte("package main"))
// Reset and load default config, which sets size limit to 5 MB
viper.Reset()
config.LoadConfig()
testutil.ResetViperConfig(t, "")
var w fileproc.Walker = fileproc.ProdWalker{}
w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir)
if err != nil {
t.Fatalf("Walk returned error: %v", err)

View File

@@ -1,101 +1,29 @@
// Package fileproc provides a writer for the output of the file processor.
//
// The StartWriter function writes the output in the specified format.
// The formatMarkdown function formats the output in Markdown format.
// The detectLanguage function tries to infer the code block language from the file extension.
// The OutputData struct represents the full output structure.
// The FileData struct represents a single file's path and content.
package fileproc
import (
"encoding/json"
"fmt"
"os"
"github.com/sirupsen/logrus"
"gopkg.in/yaml.v3"
"github.com/ivuorinen/gibidify/utils"
)
// FileData represents a single file's path and content.
type FileData struct {
Path string `json:"path" yaml:"path"`
Content string `json:"content" yaml:"content"`
}
// OutputData represents the full output structure.
type OutputData struct {
Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
Files []FileData `json:"files" yaml:"files"`
Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
}
// StartWriter writes the output in the specified format.
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format string, prefix, suffix string) {
var files []FileData
// Read from channel until closed
for req := range writeCh {
files = append(files, FileData(req))
}
// Create output struct
output := OutputData{Prefix: prefix, Files: files, Suffix: suffix}
// Serialize based on format
var outputData []byte
var err error
// StartWriter writes the output in the specified format with memory optimization.
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format, prefix, suffix string) {
switch format {
case "json":
outputData, err = json.MarshalIndent(output, "", " ")
case "yaml":
outputData, err = yaml.Marshal(output)
case "markdown":
outputData = []byte(formatMarkdown(output))
startMarkdownWriter(outFile, writeCh, done, prefix, suffix)
case "json":
startJSONWriter(outFile, writeCh, done, prefix, suffix)
case "yaml":
startYAMLWriter(outFile, writeCh, done, prefix, suffix)
default:
err = fmt.Errorf("unsupported format: %s", format)
}
if err != nil {
logrus.Errorf("Error encoding output: %v", err)
err := utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationFormat,
fmt.Sprintf("unsupported format: %s", format),
).WithContext("format", format)
utils.LogError("Failed to encode output", err)
close(done)
return
}
// Write to file
if _, err := outFile.Write(outputData); err != nil {
logrus.Errorf("Error writing to file: %v", err)
}
close(done)
}
func formatMarkdown(output OutputData) string {
markdown := "# " + output.Prefix + "\n\n"
for _, file := range output.Files {
markdown += fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", file.Path, detectLanguage(file.Path), file.Content)
}
markdown += "# " + output.Suffix
return markdown
}
// detectLanguage tries to infer code block language from file extension.
func detectLanguage(filename string) string {
if len(filename) < 3 {
return ""
}
switch {
case len(filename) >= 3 && filename[len(filename)-3:] == ".go":
return "go"
case len(filename) >= 3 && filename[len(filename)-3:] == ".py":
return "python"
case len(filename) >= 2 && filename[len(filename)-2:] == ".c":
return "c"
case len(filename) >= 3 && filename[len(filename)-3:] == ".js":
return "javascript"
default:
return ""
}
}

View File

@@ -7,8 +7,9 @@ import (
"sync"
"testing"
fileproc "github.com/ivuorinen/gibidify/fileproc"
"gopkg.in/yaml.v3"
"github.com/ivuorinen/gibidify/fileproc"
)
func TestStartWriter_Formats(t *testing.T) {
@@ -18,107 +19,109 @@ func TestStartWriter_Formats(t *testing.T) {
format string
expectError bool
}{
{
name: "JSON format",
format: "json",
expectError: false,
},
{
name: "YAML format",
format: "yaml",
expectError: false,
},
{
name: "Markdown format",
format: "markdown",
expectError: false,
},
{
name: "Invalid format",
format: "invalid",
expectError: true,
},
{"JSON format", "json", false},
{"YAML format", "yaml", false},
{"Markdown format", "markdown", false},
{"Invalid format", "invalid", true},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
outFile, err := os.CreateTemp("", "gibidify_test_output")
if err != nil {
t.Fatalf("Failed to create temp file: %v", err)
}
defer func() {
if err := outFile.Close(); err != nil {
t.Errorf("close temp file: %v", err)
}
if err := os.Remove(outFile.Name()); err != nil {
t.Errorf("remove temp file: %v", err)
}
}()
// Prepare channels
writeCh := make(chan fileproc.WriteRequest, 2)
doneCh := make(chan struct{})
// Write a couple of sample requests
writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"}
writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"}
close(writeCh)
// Start the writer
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
fileproc.StartWriter(outFile, writeCh, doneCh, tc.format, "PREFIX", "SUFFIX")
}()
// Wait until writer signals completion
wg.Wait()
<-doneCh // make sure all writes finished
// Read output
data, err := os.ReadFile(outFile.Name())
if err != nil {
t.Fatalf("Error reading output file: %v", err)
}
data := runWriterTest(t, tc.format)
if tc.expectError {
// For an invalid format, we expect StartWriter to log an error
// and produce no content or minimal content. There's no official
// error returned, so check if it's empty or obviously incorrect.
if len(data) != 0 {
t.Errorf("Expected no output for invalid format, got:\n%s", data)
}
verifyErrorOutput(t, data)
} else {
// Valid format: check basic properties in the output
content := string(data)
switch tc.format {
case "json":
// Quick parse check
var outStruct fileproc.OutputData
if err := json.Unmarshal(data, &outStruct); err != nil {
t.Errorf("JSON unmarshal failed: %v", err)
}
case "yaml":
var outStruct fileproc.OutputData
if err := yaml.Unmarshal(data, &outStruct); err != nil {
t.Errorf("YAML unmarshal failed: %v", err)
}
case "markdown":
// Check presence of code fences or "## File: ..."
if !strings.Contains(content, "```") {
t.Error("Expected markdown code fences not found")
}
}
// Prefix and suffix checks (common to JSON, YAML, markdown)
if !strings.Contains(string(data), "PREFIX") {
t.Errorf("Missing prefix in output: %s", data)
}
if !strings.Contains(string(data), "SUFFIX") {
t.Errorf("Missing suffix in output: %s", data)
}
verifyValidOutput(t, data, tc.format)
verifyPrefixSuffix(t, data)
}
})
}
}
// runWriterTest executes the writer with the given format and returns the output data.
func runWriterTest(t *testing.T, format string) []byte {
t.Helper()
outFile, err := os.CreateTemp(t.TempDir(), "gibidify_test_output")
if err != nil {
t.Fatalf("Failed to create temp file: %v", err)
}
defer func() {
if closeErr := outFile.Close(); closeErr != nil {
t.Errorf("close temp file: %v", closeErr)
}
if removeErr := os.Remove(outFile.Name()); removeErr != nil {
t.Errorf("remove temp file: %v", removeErr)
}
}()
// Prepare channels
writeCh := make(chan fileproc.WriteRequest, 2)
doneCh := make(chan struct{})
// Write a couple of sample requests
writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"}
writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"}
close(writeCh)
// Start the writer
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
fileproc.StartWriter(outFile, writeCh, doneCh, format, "PREFIX", "SUFFIX")
}()
// Wait until writer signals completion
wg.Wait()
<-doneCh // make sure all writes finished
// Read output
data, err := os.ReadFile(outFile.Name())
if err != nil {
t.Fatalf("Error reading output file: %v", err)
}
return data
}
// verifyErrorOutput checks that error cases produce no output.
func verifyErrorOutput(t *testing.T, data []byte) {
t.Helper()
if len(data) != 0 {
t.Errorf("Expected no output for invalid format, got:\n%s", data)
}
}
// verifyValidOutput checks format-specific output validity.
func verifyValidOutput(t *testing.T, data []byte, format string) {
t.Helper()
content := string(data)
switch format {
case "json":
var outStruct fileproc.OutputData
if err := json.Unmarshal(data, &outStruct); err != nil {
t.Errorf("JSON unmarshal failed: %v", err)
}
case "yaml":
var outStruct fileproc.OutputData
if err := yaml.Unmarshal(data, &outStruct); err != nil {
t.Errorf("YAML unmarshal failed: %v", err)
}
case "markdown":
if !strings.Contains(content, "```") {
t.Error("Expected markdown code fences not found")
}
}
}
// verifyPrefixSuffix checks that output contains expected prefix and suffix.
func verifyPrefixSuffix(t *testing.T, data []byte) {
t.Helper()
content := string(data)
if !strings.Contains(content, "PREFIX") {
t.Errorf("Missing prefix in output: %s", data)
}
if !strings.Contains(content, "SUFFIX") {
t.Errorf("Missing suffix in output: %s", data)
}
}

148
fileproc/yaml_writer.go Normal file
View File

@@ -0,0 +1,148 @@
package fileproc
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"github.com/ivuorinen/gibidify/utils"
)
// YAMLWriter handles YAML format output with streaming support.
type YAMLWriter struct {
outFile *os.File
}
// NewYAMLWriter creates a new YAML writer.
func NewYAMLWriter(outFile *os.File) *YAMLWriter {
return &YAMLWriter{outFile: outFile}
}
// Start writes the YAML header.
func (w *YAMLWriter) Start(prefix, suffix string) error {
// Write YAML header
if _, err := fmt.Fprintf(w.outFile, "prefix: %s\nsuffix: %s\nfiles:\n", yamlQuoteString(prefix), yamlQuoteString(suffix)); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML header")
}
return nil
}
// WriteFile writes a file entry in YAML format.
func (w *YAMLWriter) WriteFile(req WriteRequest) error {
if req.IsStream {
return w.writeStreaming(req)
}
return w.writeInline(req)
}
// Close writes the YAML footer (no footer needed for YAML).
func (w *YAMLWriter) Close() error {
return nil
}
// writeStreaming writes a large file as YAML in streaming chunks.
func (w *YAMLWriter) writeStreaming(req WriteRequest) error {
defer w.closeReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write YAML file entry start
if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(req.Path), language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML file start").WithFilePath(req.Path)
}
// Stream content with YAML indentation
return w.streamYAMLContent(req.Reader, req.Path)
}
// writeInline writes a small file directly as YAML.
func (w *YAMLWriter) writeInline(req WriteRequest) error {
language := detectLanguage(req.Path)
fileData := FileData{
Path: req.Path,
Content: req.Content,
Language: language,
}
// Write YAML entry
if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(fileData.Path), fileData.Language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML entry start").WithFilePath(req.Path)
}
// Write indented content
lines := strings.Split(fileData.Content, "\n")
for _, line := range lines {
if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML content line").WithFilePath(req.Path)
}
}
return nil
}
// streamYAMLContent streams content with YAML indentation.
func (w *YAMLWriter) streamYAMLContent(reader io.Reader, path string) error {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML line").WithFilePath(path)
}
}
if err := scanner.Err(); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to scan YAML content").WithFilePath(path)
}
return nil
}
// closeReader safely closes a reader if it implements io.Closer.
func (w *YAMLWriter) closeReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
utils.LogError(
"Failed to close file reader",
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// yamlQuoteString quotes a string for YAML output if needed.
func yamlQuoteString(s string) string {
if s == "" {
return `""`
}
// Simple YAML quoting - use double quotes if string contains special characters
if strings.ContainsAny(s, "\n\r\t:\"'\\") {
return fmt.Sprintf(`"%s"`, strings.ReplaceAll(s, `"`, `\"`))
}
return s
}
// startYAMLWriter handles YAML format output with streaming support.
func startYAMLWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
writer := NewYAMLWriter(outFile)
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write YAML header", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write YAML file", err)
}
}
// Close writer
if err := writer.Close(); err != nil {
utils.LogError("Failed to write YAML end", err)
}
}