mirror of
https://github.com/ivuorinen/gibidify.git
synced 2026-01-26 03:24:05 +00:00
feat: many features, check TODO.md
This commit is contained in:
196
fileproc/backpressure.go
Normal file
196
fileproc/backpressure.go
Normal file
@@ -0,0 +1,196 @@
|
||||
// Package fileproc provides back-pressure management for memory optimization.
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"runtime"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/ivuorinen/gibidify/config"
|
||||
)
|
||||
|
||||
// BackpressureManager manages memory usage and applies back-pressure when needed.
|
||||
type BackpressureManager struct {
|
||||
enabled bool
|
||||
maxMemoryUsage int64
|
||||
memoryCheckInterval int
|
||||
maxPendingFiles int
|
||||
maxPendingWrites int
|
||||
filesProcessed int64
|
||||
mu sync.RWMutex
|
||||
memoryWarningLogged bool
|
||||
lastMemoryCheck time.Time
|
||||
}
|
||||
|
||||
// NewBackpressureManager creates a new back-pressure manager with configuration.
|
||||
func NewBackpressureManager() *BackpressureManager {
|
||||
return &BackpressureManager{
|
||||
enabled: config.GetBackpressureEnabled(),
|
||||
maxMemoryUsage: config.GetMaxMemoryUsage(),
|
||||
memoryCheckInterval: config.GetMemoryCheckInterval(),
|
||||
maxPendingFiles: config.GetMaxPendingFiles(),
|
||||
maxPendingWrites: config.GetMaxPendingWrites(),
|
||||
lastMemoryCheck: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
// CreateChannels creates properly sized channels based on back-pressure configuration.
|
||||
func (bp *BackpressureManager) CreateChannels() (chan string, chan WriteRequest) {
|
||||
var fileCh chan string
|
||||
var writeCh chan WriteRequest
|
||||
|
||||
if bp.enabled {
|
||||
// Use buffered channels with configured limits
|
||||
fileCh = make(chan string, bp.maxPendingFiles)
|
||||
writeCh = make(chan WriteRequest, bp.maxPendingWrites)
|
||||
logrus.Debugf("Created buffered channels: files=%d, writes=%d", bp.maxPendingFiles, bp.maxPendingWrites)
|
||||
} else {
|
||||
// Use unbuffered channels (default behavior)
|
||||
fileCh = make(chan string)
|
||||
writeCh = make(chan WriteRequest)
|
||||
logrus.Debug("Created unbuffered channels (back-pressure disabled)")
|
||||
}
|
||||
|
||||
return fileCh, writeCh
|
||||
}
|
||||
|
||||
// ShouldApplyBackpressure checks if back-pressure should be applied.
|
||||
func (bp *BackpressureManager) ShouldApplyBackpressure(ctx context.Context) bool {
|
||||
if !bp.enabled {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if we should evaluate memory usage
|
||||
filesProcessed := atomic.AddInt64(&bp.filesProcessed, 1)
|
||||
if int(filesProcessed)%bp.memoryCheckInterval != 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Get current memory usage
|
||||
var m runtime.MemStats
|
||||
runtime.ReadMemStats(&m)
|
||||
currentMemory := int64(m.Alloc)
|
||||
|
||||
bp.mu.Lock()
|
||||
defer bp.mu.Unlock()
|
||||
|
||||
bp.lastMemoryCheck = time.Now()
|
||||
|
||||
// Check if we're over the memory limit
|
||||
if currentMemory > bp.maxMemoryUsage {
|
||||
if !bp.memoryWarningLogged {
|
||||
logrus.Warnf("Memory usage (%d bytes) exceeds limit (%d bytes), applying back-pressure",
|
||||
currentMemory, bp.maxMemoryUsage)
|
||||
bp.memoryWarningLogged = true
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Reset warning flag if we're back under the limit
|
||||
if bp.memoryWarningLogged && currentMemory < bp.maxMemoryUsage*8/10 { // 80% of limit
|
||||
logrus.Infof("Memory usage normalized (%d bytes), removing back-pressure", currentMemory)
|
||||
bp.memoryWarningLogged = false
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// ApplyBackpressure applies back-pressure by triggering garbage collection and adding delay.
|
||||
func (bp *BackpressureManager) ApplyBackpressure(ctx context.Context) {
|
||||
if !bp.enabled {
|
||||
return
|
||||
}
|
||||
|
||||
// Force garbage collection to free up memory
|
||||
runtime.GC()
|
||||
|
||||
// Add a small delay to allow memory to be freed
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(10 * time.Millisecond):
|
||||
// Small delay to allow GC to complete
|
||||
}
|
||||
|
||||
// Log memory usage after GC
|
||||
var m runtime.MemStats
|
||||
runtime.ReadMemStats(&m)
|
||||
logrus.Debugf("Applied back-pressure: memory after GC = %d bytes", m.Alloc)
|
||||
}
|
||||
|
||||
// GetStats returns current back-pressure statistics.
|
||||
func (bp *BackpressureManager) GetStats() BackpressureStats {
|
||||
bp.mu.RLock()
|
||||
defer bp.mu.RUnlock()
|
||||
|
||||
var m runtime.MemStats
|
||||
runtime.ReadMemStats(&m)
|
||||
|
||||
return BackpressureStats{
|
||||
Enabled: bp.enabled,
|
||||
FilesProcessed: atomic.LoadInt64(&bp.filesProcessed),
|
||||
CurrentMemoryUsage: int64(m.Alloc),
|
||||
MaxMemoryUsage: bp.maxMemoryUsage,
|
||||
MemoryWarningActive: bp.memoryWarningLogged,
|
||||
LastMemoryCheck: bp.lastMemoryCheck,
|
||||
MaxPendingFiles: bp.maxPendingFiles,
|
||||
MaxPendingWrites: bp.maxPendingWrites,
|
||||
}
|
||||
}
|
||||
|
||||
// BackpressureStats represents back-pressure manager statistics.
|
||||
type BackpressureStats struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
FilesProcessed int64 `json:"files_processed"`
|
||||
CurrentMemoryUsage int64 `json:"current_memory_usage"`
|
||||
MaxMemoryUsage int64 `json:"max_memory_usage"`
|
||||
MemoryWarningActive bool `json:"memory_warning_active"`
|
||||
LastMemoryCheck time.Time `json:"last_memory_check"`
|
||||
MaxPendingFiles int `json:"max_pending_files"`
|
||||
MaxPendingWrites int `json:"max_pending_writes"`
|
||||
}
|
||||
|
||||
// WaitForChannelSpace waits for space in channels if they're getting full.
|
||||
func (bp *BackpressureManager) WaitForChannelSpace(ctx context.Context, fileCh chan string, writeCh chan WriteRequest) {
|
||||
if !bp.enabled {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if file channel is getting full (>90% capacity)
|
||||
if len(fileCh) > bp.maxPendingFiles*9/10 {
|
||||
logrus.Debugf("File channel is %d%% full, waiting for space", len(fileCh)*100/bp.maxPendingFiles)
|
||||
|
||||
// Wait a bit for the channel to drain
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(5 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
|
||||
// Check if write channel is getting full (>90% capacity)
|
||||
if len(writeCh) > bp.maxPendingWrites*9/10 {
|
||||
logrus.Debugf("Write channel is %d%% full, waiting for space", len(writeCh)*100/bp.maxPendingWrites)
|
||||
|
||||
// Wait a bit for the channel to drain
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(5 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LogBackpressureInfo logs back-pressure configuration and status.
|
||||
func (bp *BackpressureManager) LogBackpressureInfo() {
|
||||
if bp.enabled {
|
||||
logrus.Infof("Back-pressure enabled: maxMemory=%dMB, fileBuffer=%d, writeBuffer=%d, checkInterval=%d",
|
||||
bp.maxMemoryUsage/1024/1024, bp.maxPendingFiles, bp.maxPendingWrites, bp.memoryCheckInterval)
|
||||
} else {
|
||||
logrus.Info("Back-pressure disabled")
|
||||
}
|
||||
}
|
||||
127
fileproc/cache.go
Normal file
127
fileproc/cache.go
Normal file
@@ -0,0 +1,127 @@
|
||||
package fileproc
|
||||
|
||||
// getNormalizedExtension efficiently extracts and normalizes the file extension with caching.
|
||||
func (r *FileTypeRegistry) getNormalizedExtension(filename string) string {
|
||||
// Try cache first (read lock)
|
||||
r.cacheMutex.RLock()
|
||||
if ext, exists := r.extCache[filename]; exists {
|
||||
r.cacheMutex.RUnlock()
|
||||
return ext
|
||||
}
|
||||
r.cacheMutex.RUnlock()
|
||||
|
||||
// Compute normalized extension
|
||||
ext := normalizeExtension(filename)
|
||||
|
||||
// Cache the result (write lock)
|
||||
r.cacheMutex.Lock()
|
||||
// Check cache size and clean if needed
|
||||
if len(r.extCache) >= r.maxCacheSize*2 {
|
||||
r.clearExtCache()
|
||||
r.stats.CacheEvictions++
|
||||
}
|
||||
r.extCache[filename] = ext
|
||||
r.cacheMutex.Unlock()
|
||||
|
||||
return ext
|
||||
}
|
||||
|
||||
// getFileTypeResult gets cached file type detection result or computes it.
|
||||
func (r *FileTypeRegistry) getFileTypeResult(filename string) FileTypeResult {
|
||||
ext := r.getNormalizedExtension(filename)
|
||||
|
||||
// Update statistics
|
||||
r.updateStats(func() {
|
||||
r.stats.TotalLookups++
|
||||
})
|
||||
|
||||
// Try cache first (read lock)
|
||||
r.cacheMutex.RLock()
|
||||
if result, exists := r.resultCache[ext]; exists {
|
||||
r.cacheMutex.RUnlock()
|
||||
r.updateStats(func() {
|
||||
r.stats.CacheHits++
|
||||
})
|
||||
return result
|
||||
}
|
||||
r.cacheMutex.RUnlock()
|
||||
|
||||
// Cache miss
|
||||
r.updateStats(func() {
|
||||
r.stats.CacheMisses++
|
||||
})
|
||||
|
||||
// Compute result
|
||||
result := FileTypeResult{
|
||||
Extension: ext,
|
||||
IsImage: r.imageExts[ext],
|
||||
IsBinary: r.binaryExts[ext],
|
||||
Language: r.languageMap[ext],
|
||||
}
|
||||
|
||||
// Handle special cases for binary detection (like .DS_Store)
|
||||
if !result.IsBinary && isSpecialFile(filename, r.binaryExts) {
|
||||
result.IsBinary = true
|
||||
}
|
||||
|
||||
// Cache the result (write lock)
|
||||
r.cacheMutex.Lock()
|
||||
if len(r.resultCache) >= r.maxCacheSize {
|
||||
r.clearResultCache()
|
||||
r.stats.CacheEvictions++
|
||||
}
|
||||
r.resultCache[ext] = result
|
||||
r.cacheMutex.Unlock()
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// clearExtCache clears half of the extension cache (LRU-like behavior).
|
||||
func (r *FileTypeRegistry) clearExtCache() {
|
||||
r.clearCache(&r.extCache, r.maxCacheSize)
|
||||
}
|
||||
|
||||
// clearResultCache clears half of the result cache.
|
||||
func (r *FileTypeRegistry) clearResultCache() {
|
||||
newCache := make(map[string]FileTypeResult, r.maxCacheSize)
|
||||
count := 0
|
||||
for k, v := range r.resultCache {
|
||||
if count >= r.maxCacheSize/2 {
|
||||
break
|
||||
}
|
||||
newCache[k] = v
|
||||
count++
|
||||
}
|
||||
r.resultCache = newCache
|
||||
}
|
||||
|
||||
// clearCache is a generic cache clearing function.
|
||||
func (r *FileTypeRegistry) clearCache(cache *map[string]string, maxSize int) {
|
||||
newCache := make(map[string]string, maxSize)
|
||||
count := 0
|
||||
for k, v := range *cache {
|
||||
if count >= maxSize/2 {
|
||||
break
|
||||
}
|
||||
newCache[k] = v
|
||||
count++
|
||||
}
|
||||
*cache = newCache
|
||||
}
|
||||
|
||||
// invalidateCache clears both caches when the registry is modified.
|
||||
func (r *FileTypeRegistry) invalidateCache() {
|
||||
r.cacheMutex.Lock()
|
||||
defer r.cacheMutex.Unlock()
|
||||
|
||||
r.extCache = make(map[string]string, r.maxCacheSize)
|
||||
r.resultCache = make(map[string]FileTypeResult, r.maxCacheSize)
|
||||
r.stats.CacheEvictions++
|
||||
}
|
||||
|
||||
// updateStats safely updates statistics.
|
||||
func (r *FileTypeRegistry) updateStats(fn func()) {
|
||||
r.cacheMutex.Lock()
|
||||
fn()
|
||||
r.cacheMutex.Unlock()
|
||||
}
|
||||
@@ -4,6 +4,6 @@ package fileproc
|
||||
// CollectFiles scans the given root directory using the default walker (ProdWalker)
|
||||
// and returns a slice of file paths.
|
||||
func CollectFiles(root string) ([]string, error) {
|
||||
var w Walker = ProdWalker{}
|
||||
w := NewProdWalker()
|
||||
return w.Walk(root)
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
fileproc "github.com/ivuorinen/gibidify/fileproc"
|
||||
"github.com/ivuorinen/gibidify/fileproc"
|
||||
)
|
||||
|
||||
func TestCollectFilesWithFakeWalker(t *testing.T) {
|
||||
|
||||
40
fileproc/config.go
Normal file
40
fileproc/config.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package fileproc
|
||||
|
||||
import "strings"
|
||||
|
||||
// ApplyCustomExtensions applies custom extensions from configuration.
|
||||
func (r *FileTypeRegistry) ApplyCustomExtensions(customImages, customBinary []string, customLanguages map[string]string) {
|
||||
// Add custom image extensions
|
||||
r.addExtensions(customImages, r.AddImageExtension)
|
||||
|
||||
// Add custom binary extensions
|
||||
r.addExtensions(customBinary, r.AddBinaryExtension)
|
||||
|
||||
// Add custom language mappings
|
||||
for ext, lang := range customLanguages {
|
||||
if ext != "" && lang != "" {
|
||||
r.AddLanguageMapping(strings.ToLower(ext), lang)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// addExtensions is a helper to add multiple extensions.
|
||||
func (r *FileTypeRegistry) addExtensions(extensions []string, adder func(string)) {
|
||||
for _, ext := range extensions {
|
||||
if ext != "" {
|
||||
adder(strings.ToLower(ext))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ConfigureFromSettings applies configuration settings to the registry.
|
||||
// This function is called from main.go after config is loaded to avoid circular imports.
|
||||
func ConfigureFromSettings(
|
||||
customImages, customBinary []string,
|
||||
customLanguages map[string]string,
|
||||
disabledImages, disabledBinary, disabledLanguages []string,
|
||||
) {
|
||||
registry := GetDefaultRegistry()
|
||||
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
|
||||
registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
|
||||
}
|
||||
99
fileproc/detection.go
Normal file
99
fileproc/detection.go
Normal file
@@ -0,0 +1,99 @@
|
||||
package fileproc
|
||||
|
||||
import "strings"
|
||||
|
||||
// Package-level detection functions
|
||||
|
||||
// IsImage checks if the file extension indicates an image file.
|
||||
func IsImage(filename string) bool {
|
||||
return getRegistry().IsImage(filename)
|
||||
}
|
||||
|
||||
// IsBinary checks if the file extension indicates a binary file.
|
||||
func IsBinary(filename string) bool {
|
||||
return getRegistry().IsBinary(filename)
|
||||
}
|
||||
|
||||
// GetLanguage returns the language identifier for the given filename based on its extension.
|
||||
func GetLanguage(filename string) string {
|
||||
return getRegistry().GetLanguage(filename)
|
||||
}
|
||||
|
||||
// Registry methods for detection
|
||||
|
||||
// IsImage checks if the file extension indicates an image file.
|
||||
func (r *FileTypeRegistry) IsImage(filename string) bool {
|
||||
result := r.getFileTypeResult(filename)
|
||||
return result.IsImage
|
||||
}
|
||||
|
||||
// IsBinary checks if the file extension indicates a binary file.
|
||||
func (r *FileTypeRegistry) IsBinary(filename string) bool {
|
||||
result := r.getFileTypeResult(filename)
|
||||
return result.IsBinary
|
||||
}
|
||||
|
||||
// GetLanguage returns the language identifier for the given filename based on its extension.
|
||||
func (r *FileTypeRegistry) GetLanguage(filename string) string {
|
||||
if len(filename) < minExtensionLength {
|
||||
return ""
|
||||
}
|
||||
result := r.getFileTypeResult(filename)
|
||||
return result.Language
|
||||
}
|
||||
|
||||
// Extension management methods
|
||||
|
||||
// AddImageExtension adds a new image extension to the registry.
|
||||
func (r *FileTypeRegistry) AddImageExtension(ext string) {
|
||||
r.addExtension(ext, r.imageExts)
|
||||
}
|
||||
|
||||
// AddBinaryExtension adds a new binary extension to the registry.
|
||||
func (r *FileTypeRegistry) AddBinaryExtension(ext string) {
|
||||
r.addExtension(ext, r.binaryExts)
|
||||
}
|
||||
|
||||
// AddLanguageMapping adds a new language mapping to the registry.
|
||||
func (r *FileTypeRegistry) AddLanguageMapping(ext, language string) {
|
||||
r.languageMap[strings.ToLower(ext)] = language
|
||||
r.invalidateCache()
|
||||
}
|
||||
|
||||
// addExtension is a helper to add extensions to a map.
|
||||
func (r *FileTypeRegistry) addExtension(ext string, target map[string]bool) {
|
||||
target[strings.ToLower(ext)] = true
|
||||
r.invalidateCache()
|
||||
}
|
||||
|
||||
// removeExtension is a helper to remove extensions from a map.
|
||||
func (r *FileTypeRegistry) removeExtension(ext string, target map[string]bool) {
|
||||
delete(target, strings.ToLower(ext))
|
||||
}
|
||||
|
||||
// DisableExtensions removes specified extensions from the registry.
|
||||
func (r *FileTypeRegistry) DisableExtensions(disabledImages, disabledBinary, disabledLanguages []string) {
|
||||
// Disable image extensions
|
||||
for _, ext := range disabledImages {
|
||||
if ext != "" {
|
||||
r.removeExtension(ext, r.imageExts)
|
||||
}
|
||||
}
|
||||
|
||||
// Disable binary extensions
|
||||
for _, ext := range disabledBinary {
|
||||
if ext != "" {
|
||||
r.removeExtension(ext, r.binaryExts)
|
||||
}
|
||||
}
|
||||
|
||||
// Disable language extensions
|
||||
for _, ext := range disabledLanguages {
|
||||
if ext != "" {
|
||||
delete(r.languageMap, strings.ToLower(ext))
|
||||
}
|
||||
}
|
||||
|
||||
// Invalidate cache after all modifications
|
||||
r.invalidateCache()
|
||||
}
|
||||
161
fileproc/extensions.go
Normal file
161
fileproc/extensions.go
Normal file
@@ -0,0 +1,161 @@
|
||||
package fileproc
|
||||
|
||||
// getImageExtensions returns the default image file extensions.
|
||||
func getImageExtensions() map[string]bool {
|
||||
return map[string]bool{
|
||||
".png": true,
|
||||
".jpg": true,
|
||||
".jpeg": true,
|
||||
".gif": true,
|
||||
".bmp": true,
|
||||
".tiff": true,
|
||||
".tif": true,
|
||||
".svg": true,
|
||||
".webp": true,
|
||||
".ico": true,
|
||||
}
|
||||
}
|
||||
|
||||
// getBinaryExtensions returns the default binary file extensions.
|
||||
func getBinaryExtensions() map[string]bool {
|
||||
return map[string]bool{
|
||||
// Executables and libraries
|
||||
".exe": true,
|
||||
".dll": true,
|
||||
".so": true,
|
||||
".dylib": true,
|
||||
".bin": true,
|
||||
".o": true,
|
||||
".a": true,
|
||||
".lib": true,
|
||||
|
||||
// Compiled bytecode
|
||||
".jar": true,
|
||||
".class": true,
|
||||
".pyc": true,
|
||||
".pyo": true,
|
||||
|
||||
// Data files
|
||||
".dat": true,
|
||||
".db": true,
|
||||
".sqlite": true,
|
||||
".ds_store": true,
|
||||
|
||||
// Documents
|
||||
".pdf": true,
|
||||
|
||||
// Archives
|
||||
".zip": true,
|
||||
".tar": true,
|
||||
".gz": true,
|
||||
".bz2": true,
|
||||
".xz": true,
|
||||
".7z": true,
|
||||
".rar": true,
|
||||
|
||||
// Fonts
|
||||
".ttf": true,
|
||||
".otf": true,
|
||||
".woff": true,
|
||||
".woff2": true,
|
||||
|
||||
// Media files
|
||||
".mp3": true,
|
||||
".mp4": true,
|
||||
".avi": true,
|
||||
".mov": true,
|
||||
".wmv": true,
|
||||
".flv": true,
|
||||
".webm": true,
|
||||
".ogg": true,
|
||||
".wav": true,
|
||||
".flac": true,
|
||||
}
|
||||
}
|
||||
|
||||
// getLanguageMap returns the default language mappings.
|
||||
func getLanguageMap() map[string]string {
|
||||
return map[string]string{
|
||||
// Systems programming
|
||||
".go": "go",
|
||||
".c": "c",
|
||||
".cpp": "cpp",
|
||||
".h": "c",
|
||||
".hpp": "cpp",
|
||||
".rs": "rust",
|
||||
|
||||
// Scripting languages
|
||||
".py": "python",
|
||||
".rb": "ruby",
|
||||
".pl": "perl",
|
||||
".lua": "lua",
|
||||
".php": "php",
|
||||
|
||||
// Web technologies
|
||||
".js": "javascript",
|
||||
".ts": "typescript",
|
||||
".jsx": "javascript",
|
||||
".tsx": "typescript",
|
||||
".html": "html",
|
||||
".htm": "html",
|
||||
".css": "css",
|
||||
".scss": "scss",
|
||||
".sass": "sass",
|
||||
".less": "less",
|
||||
".vue": "vue",
|
||||
|
||||
// JVM languages
|
||||
".java": "java",
|
||||
".scala": "scala",
|
||||
".kt": "kotlin",
|
||||
".clj": "clojure",
|
||||
|
||||
// .NET languages
|
||||
".cs": "csharp",
|
||||
".vb": "vbnet",
|
||||
".fs": "fsharp",
|
||||
|
||||
// Apple platforms
|
||||
".swift": "swift",
|
||||
".m": "objc",
|
||||
".mm": "objcpp",
|
||||
|
||||
// Shell scripts
|
||||
".sh": "bash",
|
||||
".bash": "bash",
|
||||
".zsh": "zsh",
|
||||
".fish": "fish",
|
||||
".ps1": "powershell",
|
||||
".bat": "batch",
|
||||
".cmd": "batch",
|
||||
|
||||
// Data formats
|
||||
".json": "json",
|
||||
".yaml": "yaml",
|
||||
".yml": "yaml",
|
||||
".toml": "toml",
|
||||
".xml": "xml",
|
||||
".sql": "sql",
|
||||
|
||||
// Documentation
|
||||
".md": "markdown",
|
||||
".rst": "rst",
|
||||
".tex": "latex",
|
||||
|
||||
// Functional languages
|
||||
".hs": "haskell",
|
||||
".ml": "ocaml",
|
||||
".mli": "ocaml",
|
||||
".elm": "elm",
|
||||
".ex": "elixir",
|
||||
".exs": "elixir",
|
||||
".erl": "erlang",
|
||||
".hrl": "erlang",
|
||||
|
||||
// Other languages
|
||||
".r": "r",
|
||||
".dart": "dart",
|
||||
".nim": "nim",
|
||||
".nims": "nim",
|
||||
}
|
||||
}
|
||||
@@ -3,8 +3,8 @@ package fileproc
|
||||
|
||||
// FakeWalker implements Walker for testing purposes.
|
||||
type FakeWalker struct {
|
||||
Files []string
|
||||
Err error
|
||||
Files []string
|
||||
}
|
||||
|
||||
// Walk returns predetermined file paths or an error, depending on FakeWalker's configuration.
|
||||
|
||||
55
fileproc/file_filters.go
Normal file
55
fileproc/file_filters.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/ivuorinen/gibidify/config"
|
||||
)
|
||||
|
||||
// FileFilter defines filtering criteria for files and directories.
|
||||
type FileFilter struct {
|
||||
ignoredDirs []string
|
||||
sizeLimit int64
|
||||
}
|
||||
|
||||
// NewFileFilter creates a new file filter with current configuration.
|
||||
func NewFileFilter() *FileFilter {
|
||||
return &FileFilter{
|
||||
ignoredDirs: config.GetIgnoredDirectories(),
|
||||
sizeLimit: config.GetFileSizeLimit(),
|
||||
}
|
||||
}
|
||||
|
||||
// shouldSkipEntry determines if an entry should be skipped based on ignore rules and filters.
|
||||
func (f *FileFilter) shouldSkipEntry(entry os.DirEntry, fullPath string, rules []ignoreRule) bool {
|
||||
if entry.IsDir() {
|
||||
return f.shouldSkipDirectory(entry)
|
||||
}
|
||||
|
||||
if f.shouldSkipFile(entry, fullPath) {
|
||||
return true
|
||||
}
|
||||
|
||||
return matchesIgnoreRules(fullPath, rules)
|
||||
}
|
||||
|
||||
// shouldSkipDirectory checks if a directory should be skipped based on the ignored directories list.
|
||||
func (f *FileFilter) shouldSkipDirectory(entry os.DirEntry) bool {
|
||||
for _, d := range f.ignoredDirs {
|
||||
if entry.Name() == d {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// shouldSkipFile checks if a file should be skipped based on size limit and file type.
|
||||
func (f *FileFilter) shouldSkipFile(entry os.DirEntry, fullPath string) bool {
|
||||
// Check if file exceeds the configured size limit.
|
||||
if info, err := entry.Info(); err == nil && info.Size() > f.sizeLimit {
|
||||
return true
|
||||
}
|
||||
|
||||
// Apply the default filter to ignore binary and image files.
|
||||
return IsBinary(fullPath) || IsImage(fullPath)
|
||||
}
|
||||
827
fileproc/filetypes_test.go
Normal file
827
fileproc/filetypes_test.go
Normal file
@@ -0,0 +1,827 @@
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestFileTypeRegistry_ModificationMethods tests the modification methods of FileTypeRegistry.
|
||||
func TestFileTypeRegistry_ModificationMethods(t *testing.T) {
|
||||
// Create a new registry instance for testing
|
||||
registry := &FileTypeRegistry{
|
||||
imageExts: make(map[string]bool),
|
||||
binaryExts: make(map[string]bool),
|
||||
languageMap: make(map[string]string),
|
||||
}
|
||||
|
||||
// Test AddImageExtension
|
||||
t.Run("AddImageExtension", func(t *testing.T) {
|
||||
// Add a new image extension
|
||||
registry.AddImageExtension(".webp")
|
||||
if !registry.IsImage("test.webp") {
|
||||
t.Errorf("Expected .webp to be recognized as image after adding")
|
||||
}
|
||||
|
||||
// Test case insensitive addition
|
||||
registry.AddImageExtension(".AVIF")
|
||||
if !registry.IsImage("test.avif") {
|
||||
t.Errorf("Expected .avif to be recognized as image after adding .AVIF")
|
||||
}
|
||||
if !registry.IsImage("test.AVIF") {
|
||||
t.Errorf("Expected .AVIF to be recognized as image")
|
||||
}
|
||||
|
||||
// Test with dot prefix
|
||||
registry.AddImageExtension("heic")
|
||||
if registry.IsImage("test.heic") {
|
||||
t.Errorf("Expected extension without dot to not work")
|
||||
}
|
||||
|
||||
// Test with proper dot prefix
|
||||
registry.AddImageExtension(".heic")
|
||||
if !registry.IsImage("test.heic") {
|
||||
t.Errorf("Expected .heic to be recognized as image")
|
||||
}
|
||||
})
|
||||
|
||||
// Test AddBinaryExtension
|
||||
t.Run("AddBinaryExtension", func(t *testing.T) {
|
||||
// Add a new binary extension
|
||||
registry.AddBinaryExtension(".custom")
|
||||
if !registry.IsBinary("test.custom") {
|
||||
t.Errorf("Expected .custom to be recognized as binary after adding")
|
||||
}
|
||||
|
||||
// Test case insensitive addition
|
||||
registry.AddBinaryExtension(".NEWBIN")
|
||||
if !registry.IsBinary("test.newbin") {
|
||||
t.Errorf("Expected .newbin to be recognized as binary after adding .NEWBIN")
|
||||
}
|
||||
if !registry.IsBinary("test.NEWBIN") {
|
||||
t.Errorf("Expected .NEWBIN to be recognized as binary")
|
||||
}
|
||||
|
||||
// Test overwriting existing extension
|
||||
registry.AddBinaryExtension(".custom")
|
||||
if !registry.IsBinary("test.custom") {
|
||||
t.Errorf("Expected .custom to still be recognized as binary after re-adding")
|
||||
}
|
||||
})
|
||||
|
||||
// Test AddLanguageMapping
|
||||
t.Run("AddLanguageMapping", func(t *testing.T) {
|
||||
// Add a new language mapping
|
||||
registry.AddLanguageMapping(".zig", "zig")
|
||||
if registry.GetLanguage("test.zig") != "zig" {
|
||||
t.Errorf("Expected .zig to map to 'zig', got '%s'", registry.GetLanguage("test.zig"))
|
||||
}
|
||||
|
||||
// Test case insensitive addition
|
||||
registry.AddLanguageMapping(".V", "vlang")
|
||||
if registry.GetLanguage("test.v") != "vlang" {
|
||||
t.Errorf("Expected .v to map to 'vlang' after adding .V, got '%s'", registry.GetLanguage("test.v"))
|
||||
}
|
||||
if registry.GetLanguage("test.V") != "vlang" {
|
||||
t.Errorf("Expected .V to map to 'vlang', got '%s'", registry.GetLanguage("test.V"))
|
||||
}
|
||||
|
||||
// Test overwriting existing mapping
|
||||
registry.AddLanguageMapping(".zig", "ziglang")
|
||||
if registry.GetLanguage("test.zig") != "ziglang" {
|
||||
t.Errorf("Expected .zig to map to 'ziglang' after update, got '%s'", registry.GetLanguage("test.zig"))
|
||||
}
|
||||
|
||||
// Test empty language
|
||||
registry.AddLanguageMapping(".empty", "")
|
||||
if registry.GetLanguage("test.empty") != "" {
|
||||
t.Errorf("Expected .empty to map to empty string, got '%s'", registry.GetLanguage("test.empty"))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestFileTypeRegistry_LanguageDetection tests the language detection functionality.
|
||||
func TestFileTypeRegistry_LanguageDetection(t *testing.T) {
|
||||
registry := GetDefaultRegistry()
|
||||
|
||||
tests := []struct {
|
||||
filename string
|
||||
expected string
|
||||
}{
|
||||
// Programming languages
|
||||
{"main.go", "go"},
|
||||
{"script.py", "python"},
|
||||
{"app.js", "javascript"},
|
||||
{"component.tsx", "typescript"},
|
||||
{"service.ts", "typescript"},
|
||||
{"App.java", "java"},
|
||||
{"program.c", "c"},
|
||||
{"program.cpp", "cpp"},
|
||||
{"header.h", "c"},
|
||||
{"header.hpp", "cpp"},
|
||||
{"main.rs", "rust"},
|
||||
{"script.rb", "ruby"},
|
||||
{"index.php", "php"},
|
||||
{"app.swift", "swift"},
|
||||
{"MainActivity.kt", "kotlin"},
|
||||
{"Main.scala", "scala"},
|
||||
{"analysis.r", "r"},
|
||||
{"ViewController.m", "objc"},
|
||||
{"ViewController.mm", "objcpp"},
|
||||
{"Program.cs", "csharp"},
|
||||
{"Module.vb", "vbnet"},
|
||||
{"program.fs", "fsharp"},
|
||||
{"script.lua", "lua"},
|
||||
{"script.pl", "perl"},
|
||||
|
||||
// Shell scripts
|
||||
{"script.sh", "bash"},
|
||||
{"script.bash", "bash"},
|
||||
{"script.zsh", "zsh"},
|
||||
{"script.fish", "fish"},
|
||||
{"script.ps1", "powershell"},
|
||||
{"script.bat", "batch"},
|
||||
{"script.cmd", "batch"},
|
||||
|
||||
// Data and markup
|
||||
{"query.sql", "sql"},
|
||||
{"index.html", "html"},
|
||||
{"page.htm", "html"},
|
||||
{"data.xml", "xml"},
|
||||
{"style.css", "css"},
|
||||
{"style.scss", "scss"},
|
||||
{"style.sass", "sass"},
|
||||
{"style.less", "less"},
|
||||
{"data.json", "json"},
|
||||
{"config.yaml", "yaml"},
|
||||
{"config.yml", "yaml"},
|
||||
{"config.toml", "toml"},
|
||||
{"README.md", "markdown"},
|
||||
{"doc.rst", "rst"},
|
||||
{"paper.tex", "latex"},
|
||||
|
||||
// Modern languages
|
||||
{"main.dart", "dart"},
|
||||
{"Main.elm", "elm"},
|
||||
{"core.clj", "clojure"},
|
||||
{"server.ex", "elixir"},
|
||||
{"test.exs", "elixir"},
|
||||
{"server.erl", "erlang"},
|
||||
{"header.hrl", "erlang"},
|
||||
{"main.hs", "haskell"},
|
||||
{"module.ml", "ocaml"},
|
||||
{"interface.mli", "ocaml"},
|
||||
{"main.nim", "nim"},
|
||||
{"config.nims", "nim"},
|
||||
|
||||
// Web frameworks
|
||||
{"Component.vue", "vue"},
|
||||
{"Component.jsx", "javascript"},
|
||||
|
||||
// Case sensitivity tests
|
||||
{"MAIN.GO", "go"},
|
||||
{"Script.PY", "python"},
|
||||
{"APP.JS", "javascript"},
|
||||
|
||||
// Edge cases
|
||||
{"", ""}, // Empty filename
|
||||
{"a", ""}, // Too short (less than minExtensionLength)
|
||||
{"noext", ""}, // No extension
|
||||
{".hidden", ""}, // Hidden file with no name
|
||||
{"file.", ""}, // Extension is just a dot
|
||||
{"file.unknown", ""}, // Unknown extension
|
||||
{"file.123", ""}, // Numeric extension
|
||||
{"a.b", ""}, // Very short filename and extension
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.filename, func(t *testing.T) {
|
||||
result := registry.GetLanguage(tt.filename)
|
||||
if result != tt.expected {
|
||||
t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestFileTypeRegistry_ImageDetection tests the image detection functionality.
|
||||
func TestFileTypeRegistry_ImageDetection(t *testing.T) {
|
||||
registry := GetDefaultRegistry()
|
||||
|
||||
tests := []struct {
|
||||
filename string
|
||||
expected bool
|
||||
}{
|
||||
// Common image formats
|
||||
{"photo.png", true},
|
||||
{"image.jpg", true},
|
||||
{"picture.jpeg", true},
|
||||
{"animation.gif", true},
|
||||
{"bitmap.bmp", true},
|
||||
{"image.tiff", true},
|
||||
{"scan.tif", true},
|
||||
{"vector.svg", true},
|
||||
{"modern.webp", true},
|
||||
{"favicon.ico", true},
|
||||
|
||||
// Case sensitivity tests
|
||||
{"PHOTO.PNG", true},
|
||||
{"IMAGE.JPG", true},
|
||||
{"PICTURE.JPEG", true},
|
||||
|
||||
// Non-image files
|
||||
{"document.txt", false},
|
||||
{"script.js", false},
|
||||
{"data.json", false},
|
||||
{"archive.zip", false},
|
||||
{"executable.exe", false},
|
||||
|
||||
// Edge cases
|
||||
{"", false}, // Empty filename
|
||||
{"image", false}, // No extension
|
||||
{".png", true}, // Just extension
|
||||
{"file.png.bak", false}, // Multiple extensions
|
||||
{"image.unknown", false}, // Unknown extension
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.filename, func(t *testing.T) {
|
||||
result := registry.IsImage(tt.filename)
|
||||
if result != tt.expected {
|
||||
t.Errorf("IsImage(%q) = %t, expected %t", tt.filename, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestFileTypeRegistry_BinaryDetection tests the binary detection functionality.
|
||||
func TestFileTypeRegistry_BinaryDetection(t *testing.T) {
|
||||
registry := GetDefaultRegistry()
|
||||
|
||||
tests := []struct {
|
||||
filename string
|
||||
expected bool
|
||||
}{
|
||||
// Executable files
|
||||
{"program.exe", true},
|
||||
{"library.dll", true},
|
||||
{"libfoo.so", true},
|
||||
{"framework.dylib", true},
|
||||
{"data.bin", true},
|
||||
|
||||
// Object and library files
|
||||
{"object.o", true},
|
||||
{"archive.a", true},
|
||||
{"library.lib", true},
|
||||
{"application.jar", true},
|
||||
{"bytecode.class", true},
|
||||
{"compiled.pyc", true},
|
||||
{"optimized.pyo", true},
|
||||
|
||||
// System files
|
||||
{".DS_Store", true},
|
||||
|
||||
// Document files (treated as binary)
|
||||
{"document.pdf", true},
|
||||
|
||||
// Archive files
|
||||
{"archive.zip", true},
|
||||
{"backup.tar", true},
|
||||
{"compressed.gz", true},
|
||||
{"data.bz2", true},
|
||||
{"package.xz", true},
|
||||
{"archive.7z", true},
|
||||
{"backup.rar", true},
|
||||
|
||||
// Font files
|
||||
{"font.ttf", true},
|
||||
{"font.otf", true},
|
||||
{"font.woff", true},
|
||||
{"font.woff2", true},
|
||||
|
||||
// Media files
|
||||
{"song.mp3", true},
|
||||
{"video.mp4", true},
|
||||
{"movie.avi", true},
|
||||
{"clip.mov", true},
|
||||
{"video.wmv", true},
|
||||
{"animation.flv", true},
|
||||
{"modern.webm", true},
|
||||
{"audio.ogg", true},
|
||||
{"sound.wav", true},
|
||||
{"music.flac", true},
|
||||
|
||||
// Database files
|
||||
{"data.dat", true},
|
||||
{"database.db", true},
|
||||
{"app.sqlite", true},
|
||||
|
||||
// Case sensitivity tests
|
||||
{"PROGRAM.EXE", true},
|
||||
{"LIBRARY.DLL", true},
|
||||
|
||||
// Non-binary files
|
||||
{"document.txt", false},
|
||||
{"script.js", false},
|
||||
{"data.json", false},
|
||||
{"style.css", false},
|
||||
{"page.html", false},
|
||||
|
||||
// Edge cases
|
||||
{"", false}, // Empty filename
|
||||
{"binary", false}, // No extension
|
||||
{".exe", true}, // Just extension
|
||||
{"file.exe.bak", false}, // Multiple extensions
|
||||
{"file.unknown", false}, // Unknown extension
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.filename, func(t *testing.T) {
|
||||
result := registry.IsBinary(tt.filename)
|
||||
if result != tt.expected {
|
||||
t.Errorf("IsBinary(%q) = %t, expected %t", tt.filename, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestFileTypeRegistry_DefaultRegistryConsistency tests that the default registry is consistent.
|
||||
func TestFileTypeRegistry_DefaultRegistryConsistency(t *testing.T) {
|
||||
// Get registry multiple times and ensure it's the same instance
|
||||
registry1 := GetDefaultRegistry()
|
||||
registry2 := GetDefaultRegistry()
|
||||
registry3 := getRegistry()
|
||||
|
||||
if registry1 != registry2 {
|
||||
t.Error("GetDefaultRegistry() should return the same instance")
|
||||
}
|
||||
if registry1 != registry3 {
|
||||
t.Error("getRegistry() should return the same instance as GetDefaultRegistry()")
|
||||
}
|
||||
|
||||
// Test that global functions use the same registry
|
||||
filename := "test.go"
|
||||
if IsImage(filename) != registry1.IsImage(filename) {
|
||||
t.Error("IsImage() global function should match registry method")
|
||||
}
|
||||
if IsBinary(filename) != registry1.IsBinary(filename) {
|
||||
t.Error("IsBinary() global function should match registry method")
|
||||
}
|
||||
if GetLanguage(filename) != registry1.GetLanguage(filename) {
|
||||
t.Error("GetLanguage() global function should match registry method")
|
||||
}
|
||||
}
|
||||
|
||||
// TestFileTypeRegistry_ThreadSafety tests the thread safety of the FileTypeRegistry.
|
||||
func TestFileTypeRegistry_ThreadSafety(t *testing.T) {
|
||||
const numGoroutines = 100
|
||||
const numOperationsPerGoroutine = 100
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Test concurrent read operations
|
||||
t.Run("ConcurrentReads", func(t *testing.T) {
|
||||
for i := 0; i < numGoroutines; i++ {
|
||||
wg.Add(1)
|
||||
go func(id int) {
|
||||
defer wg.Done()
|
||||
registry := GetDefaultRegistry()
|
||||
|
||||
for j := 0; j < numOperationsPerGoroutine; j++ {
|
||||
// Test various file detection operations
|
||||
_ = registry.IsImage("test.png")
|
||||
_ = registry.IsBinary("test.exe")
|
||||
_ = registry.GetLanguage("test.go")
|
||||
|
||||
// Test global functions too
|
||||
_ = IsImage("image.jpg")
|
||||
_ = IsBinary("binary.dll")
|
||||
_ = GetLanguage("script.py")
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
})
|
||||
|
||||
// Test concurrent registry access (singleton creation)
|
||||
t.Run("ConcurrentRegistryAccess", func(t *testing.T) {
|
||||
// Reset the registry to test concurrent initialization
|
||||
// Note: This is not safe in a real application, but needed for testing
|
||||
registryOnce = sync.Once{}
|
||||
registry = nil
|
||||
|
||||
registries := make([]*FileTypeRegistry, numGoroutines)
|
||||
|
||||
for i := 0; i < numGoroutines; i++ {
|
||||
wg.Add(1)
|
||||
go func(id int) {
|
||||
defer wg.Done()
|
||||
registries[id] = GetDefaultRegistry()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// Verify all goroutines got the same registry instance
|
||||
firstRegistry := registries[0]
|
||||
for i := 1; i < numGoroutines; i++ {
|
||||
if registries[i] != firstRegistry {
|
||||
t.Errorf("Registry %d is different from registry 0", i)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Test concurrent modifications on separate registry instances
|
||||
t.Run("ConcurrentModifications", func(t *testing.T) {
|
||||
// Create separate registry instances for each goroutine to test modification thread safety
|
||||
for i := 0; i < numGoroutines; i++ {
|
||||
wg.Add(1)
|
||||
go func(id int) {
|
||||
defer wg.Done()
|
||||
|
||||
// Create a new registry instance for this goroutine
|
||||
registry := &FileTypeRegistry{
|
||||
imageExts: make(map[string]bool),
|
||||
binaryExts: make(map[string]bool),
|
||||
languageMap: make(map[string]string),
|
||||
}
|
||||
|
||||
for j := 0; j < numOperationsPerGoroutine; j++ {
|
||||
// Add unique extensions for this goroutine
|
||||
extSuffix := fmt.Sprintf("_%d_%d", id, j)
|
||||
|
||||
registry.AddImageExtension(".img" + extSuffix)
|
||||
registry.AddBinaryExtension(".bin" + extSuffix)
|
||||
registry.AddLanguageMapping(".lang"+extSuffix, "lang"+extSuffix)
|
||||
|
||||
// Verify the additions worked
|
||||
if !registry.IsImage("test.img" + extSuffix) {
|
||||
t.Errorf("Failed to add image extension .img%s", extSuffix)
|
||||
}
|
||||
if !registry.IsBinary("test.bin" + extSuffix) {
|
||||
t.Errorf("Failed to add binary extension .bin%s", extSuffix)
|
||||
}
|
||||
if registry.GetLanguage("test.lang"+extSuffix) != "lang"+extSuffix {
|
||||
t.Errorf("Failed to add language mapping .lang%s", extSuffix)
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
})
|
||||
}
|
||||
|
||||
// TestFileTypeRegistry_EdgeCases tests edge cases and boundary conditions.
|
||||
func TestFileTypeRegistry_EdgeCases(t *testing.T) {
|
||||
registry := GetDefaultRegistry()
|
||||
|
||||
// Test various edge cases for filename handling
|
||||
edgeCases := []struct {
|
||||
name string
|
||||
filename string
|
||||
desc string
|
||||
}{
|
||||
{"empty", "", "empty filename"},
|
||||
{"single_char", "a", "single character filename"},
|
||||
{"just_dot", ".", "just a dot"},
|
||||
{"double_dot", "..", "double dot"},
|
||||
{"hidden_file", ".hidden", "hidden file"},
|
||||
{"hidden_with_ext", ".hidden.txt", "hidden file with extension"},
|
||||
{"multiple_dots", "file.tar.gz", "multiple extensions"},
|
||||
{"trailing_dot", "file.", "trailing dot"},
|
||||
{"unicode", "файл.txt", "unicode filename"},
|
||||
{"spaces", "my file.txt", "filename with spaces"},
|
||||
{"special_chars", "file@#$.txt", "filename with special characters"},
|
||||
{"very_long", "very_long_filename_with_many_characters_in_it.extension", "very long filename"},
|
||||
{"no_basename", ".gitignore", "dotfile with no basename"},
|
||||
{"case_mixed", "FiLe.ExT", "mixed case"},
|
||||
}
|
||||
|
||||
for _, tc := range edgeCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// These should not panic
|
||||
_ = registry.IsImage(tc.filename)
|
||||
_ = registry.IsBinary(tc.filename)
|
||||
_ = registry.GetLanguage(tc.filename)
|
||||
|
||||
// Global functions should also not panic
|
||||
_ = IsImage(tc.filename)
|
||||
_ = IsBinary(tc.filename)
|
||||
_ = GetLanguage(tc.filename)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestFileTypeRegistry_MinimumExtensionLength tests the minimum extension length requirement.
|
||||
func TestFileTypeRegistry_MinimumExtensionLength(t *testing.T) {
|
||||
registry := GetDefaultRegistry()
|
||||
|
||||
tests := []struct {
|
||||
filename string
|
||||
expected string
|
||||
}{
|
||||
{"", ""}, // Empty filename
|
||||
{"a", ""}, // Single character (less than minExtensionLength)
|
||||
{"ab", ""}, // Two characters, no extension
|
||||
{"a.b", ""}, // Extension too short, but filename too short anyway
|
||||
{"ab.c", "c"}, // Valid: filename >= minExtensionLength and .c is valid extension
|
||||
{"a.go", "go"}, // Valid extension
|
||||
{"ab.py", "python"}, // Valid extension
|
||||
{"a.unknown", ""}, // Valid length but unknown extension
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.filename, func(t *testing.T) {
|
||||
result := registry.GetLanguage(tt.filename)
|
||||
if result != tt.expected {
|
||||
t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkFileTypeRegistry tests performance of the registry operations.
|
||||
func BenchmarkFileTypeRegistry_IsImage(b *testing.B) {
|
||||
registry := GetDefaultRegistry()
|
||||
filename := "test.png"
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = registry.IsImage(filename)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFileTypeRegistry_IsBinary(b *testing.B) {
|
||||
registry := GetDefaultRegistry()
|
||||
filename := "test.exe"
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = registry.IsBinary(filename)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFileTypeRegistry_GetLanguage(b *testing.B) {
|
||||
registry := GetDefaultRegistry()
|
||||
filename := "test.go"
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = registry.GetLanguage(filename)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFileTypeRegistry_GlobalFunctions(b *testing.B) {
|
||||
filename := "test.go"
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = IsImage(filename)
|
||||
_ = IsBinary(filename)
|
||||
_ = GetLanguage(filename)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFileTypeRegistry_ConcurrentAccess(b *testing.B) {
|
||||
filename := "test.go"
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_ = IsImage(filename)
|
||||
_ = IsBinary(filename)
|
||||
_ = GetLanguage(filename)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestFileTypeRegistry_Configuration tests the configuration functionality.
|
||||
func TestFileTypeRegistry_Configuration(t *testing.T) {
|
||||
// Create a new registry instance for testing
|
||||
registry := &FileTypeRegistry{
|
||||
imageExts: make(map[string]bool),
|
||||
binaryExts: make(map[string]bool),
|
||||
languageMap: make(map[string]string),
|
||||
}
|
||||
|
||||
// Test ApplyCustomExtensions
|
||||
t.Run("ApplyCustomExtensions", func(t *testing.T) {
|
||||
customImages := []string{".webp", ".avif", ".heic"}
|
||||
customBinary := []string{".custom", ".mybin"}
|
||||
customLanguages := map[string]string{
|
||||
".zig": "zig",
|
||||
".odin": "odin",
|
||||
".v": "vlang",
|
||||
}
|
||||
|
||||
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
|
||||
|
||||
// Test custom image extensions
|
||||
for _, ext := range customImages {
|
||||
if !registry.IsImage("test" + ext) {
|
||||
t.Errorf("Expected %s to be recognized as image", ext)
|
||||
}
|
||||
}
|
||||
|
||||
// Test custom binary extensions
|
||||
for _, ext := range customBinary {
|
||||
if !registry.IsBinary("test" + ext) {
|
||||
t.Errorf("Expected %s to be recognized as binary", ext)
|
||||
}
|
||||
}
|
||||
|
||||
// Test custom language mappings
|
||||
for ext, expectedLang := range customLanguages {
|
||||
if lang := registry.GetLanguage("test" + ext); lang != expectedLang {
|
||||
t.Errorf("Expected %s to map to %s, got %s", ext, expectedLang, lang)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Test DisableExtensions
|
||||
t.Run("DisableExtensions", func(t *testing.T) {
|
||||
// Add some extensions first
|
||||
registry.AddImageExtension(".png")
|
||||
registry.AddImageExtension(".jpg")
|
||||
registry.AddBinaryExtension(".exe")
|
||||
registry.AddBinaryExtension(".dll")
|
||||
registry.AddLanguageMapping(".go", "go")
|
||||
registry.AddLanguageMapping(".py", "python")
|
||||
|
||||
// Verify they work
|
||||
if !registry.IsImage("test.png") {
|
||||
t.Error("Expected .png to be image before disabling")
|
||||
}
|
||||
if !registry.IsBinary("test.exe") {
|
||||
t.Error("Expected .exe to be binary before disabling")
|
||||
}
|
||||
if registry.GetLanguage("test.go") != "go" {
|
||||
t.Error("Expected .go to map to go before disabling")
|
||||
}
|
||||
|
||||
// Disable some extensions
|
||||
disabledImages := []string{".png"}
|
||||
disabledBinary := []string{".exe"}
|
||||
disabledLanguages := []string{".go"}
|
||||
|
||||
registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
|
||||
|
||||
// Test that disabled extensions no longer work
|
||||
if registry.IsImage("test.png") {
|
||||
t.Error("Expected .png to not be image after disabling")
|
||||
}
|
||||
if registry.IsBinary("test.exe") {
|
||||
t.Error("Expected .exe to not be binary after disabling")
|
||||
}
|
||||
if registry.GetLanguage("test.go") != "" {
|
||||
t.Error("Expected .go to not map to language after disabling")
|
||||
}
|
||||
|
||||
// Test that non-disabled extensions still work
|
||||
if !registry.IsImage("test.jpg") {
|
||||
t.Error("Expected .jpg to still be image after disabling .png")
|
||||
}
|
||||
if !registry.IsBinary("test.dll") {
|
||||
t.Error("Expected .dll to still be binary after disabling .exe")
|
||||
}
|
||||
if registry.GetLanguage("test.py") != "python" {
|
||||
t.Error("Expected .py to still map to python after disabling .go")
|
||||
}
|
||||
})
|
||||
|
||||
// Test empty values handling
|
||||
t.Run("EmptyValuesHandling", func(t *testing.T) {
|
||||
registry := &FileTypeRegistry{
|
||||
imageExts: make(map[string]bool),
|
||||
binaryExts: make(map[string]bool),
|
||||
languageMap: make(map[string]string),
|
||||
}
|
||||
|
||||
// Test with empty values
|
||||
customImages := []string{"", ".valid", ""}
|
||||
customBinary := []string{"", ".valid"}
|
||||
customLanguages := map[string]string{
|
||||
"": "invalid",
|
||||
".valid": "",
|
||||
".good": "good",
|
||||
}
|
||||
|
||||
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
|
||||
|
||||
// Only valid entries should be added
|
||||
if registry.IsImage("test.") {
|
||||
t.Error("Expected empty extension to not be added as image")
|
||||
}
|
||||
if !registry.IsImage("test.valid") {
|
||||
t.Error("Expected .valid to be added as image")
|
||||
}
|
||||
if registry.IsBinary("test.") {
|
||||
t.Error("Expected empty extension to not be added as binary")
|
||||
}
|
||||
if !registry.IsBinary("test.valid") {
|
||||
t.Error("Expected .valid to be added as binary")
|
||||
}
|
||||
if registry.GetLanguage("test.") != "" {
|
||||
t.Error("Expected empty extension to not be added as language")
|
||||
}
|
||||
if registry.GetLanguage("test.valid") != "" {
|
||||
t.Error("Expected .valid with empty language to not be added")
|
||||
}
|
||||
if registry.GetLanguage("test.good") != "good" {
|
||||
t.Error("Expected .good to map to good")
|
||||
}
|
||||
})
|
||||
|
||||
// Test case insensitive handling
|
||||
t.Run("CaseInsensitiveHandling", func(t *testing.T) {
|
||||
registry := &FileTypeRegistry{
|
||||
imageExts: make(map[string]bool),
|
||||
binaryExts: make(map[string]bool),
|
||||
languageMap: make(map[string]string),
|
||||
}
|
||||
|
||||
customImages := []string{".WEBP", ".Avif"}
|
||||
customBinary := []string{".CUSTOM", ".MyBin"}
|
||||
customLanguages := map[string]string{
|
||||
".ZIG": "zig",
|
||||
".Odin": "odin",
|
||||
}
|
||||
|
||||
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
|
||||
|
||||
// Test that both upper and lower case work
|
||||
if !registry.IsImage("test.webp") {
|
||||
t.Error("Expected .webp (lowercase) to work after adding .WEBP")
|
||||
}
|
||||
if !registry.IsImage("test.WEBP") {
|
||||
t.Error("Expected .WEBP (uppercase) to work")
|
||||
}
|
||||
if !registry.IsBinary("test.custom") {
|
||||
t.Error("Expected .custom (lowercase) to work after adding .CUSTOM")
|
||||
}
|
||||
if !registry.IsBinary("test.CUSTOM") {
|
||||
t.Error("Expected .CUSTOM (uppercase) to work")
|
||||
}
|
||||
if registry.GetLanguage("test.zig") != "zig" {
|
||||
t.Error("Expected .zig (lowercase) to work after adding .ZIG")
|
||||
}
|
||||
if registry.GetLanguage("test.ZIG") != "zig" {
|
||||
t.Error("Expected .ZIG (uppercase) to work")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestConfigureFromSettings tests the global configuration function.
|
||||
func TestConfigureFromSettings(t *testing.T) {
|
||||
// Reset registry to ensure clean state
|
||||
registryOnce = sync.Once{}
|
||||
registry = nil
|
||||
|
||||
// Test configuration application
|
||||
customImages := []string{".webp", ".avif"}
|
||||
customBinary := []string{".custom"}
|
||||
customLanguages := map[string]string{".zig": "zig"}
|
||||
disabledImages := []string{".gif"} // Disable default extension
|
||||
disabledBinary := []string{".exe"} // Disable default extension
|
||||
disabledLanguages := []string{".rb"} // Disable default extension
|
||||
|
||||
ConfigureFromSettings(
|
||||
customImages,
|
||||
customBinary,
|
||||
customLanguages,
|
||||
disabledImages,
|
||||
disabledBinary,
|
||||
disabledLanguages,
|
||||
)
|
||||
|
||||
// Test that custom extensions work
|
||||
if !IsImage("test.webp") {
|
||||
t.Error("Expected custom image extension .webp to work")
|
||||
}
|
||||
if !IsBinary("test.custom") {
|
||||
t.Error("Expected custom binary extension .custom to work")
|
||||
}
|
||||
if GetLanguage("test.zig") != "zig" {
|
||||
t.Error("Expected custom language .zig to work")
|
||||
}
|
||||
|
||||
// Test that disabled extensions don't work
|
||||
if IsImage("test.gif") {
|
||||
t.Error("Expected disabled image extension .gif to not work")
|
||||
}
|
||||
if IsBinary("test.exe") {
|
||||
t.Error("Expected disabled binary extension .exe to not work")
|
||||
}
|
||||
if GetLanguage("test.rb") != "" {
|
||||
t.Error("Expected disabled language extension .rb to not work")
|
||||
}
|
||||
|
||||
// Test that non-disabled defaults still work
|
||||
if !IsImage("test.png") {
|
||||
t.Error("Expected non-disabled image extension .png to still work")
|
||||
}
|
||||
if !IsBinary("test.dll") {
|
||||
t.Error("Expected non-disabled binary extension .dll to still work")
|
||||
}
|
||||
if GetLanguage("test.go") != "go" {
|
||||
t.Error("Expected non-disabled language extension .go to still work")
|
||||
}
|
||||
}
|
||||
28
fileproc/formats.go
Normal file
28
fileproc/formats.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package fileproc
|
||||
|
||||
// FileData represents a single file's path and content.
|
||||
type FileData struct {
|
||||
Path string `json:"path" yaml:"path"`
|
||||
Content string `json:"content" yaml:"content"`
|
||||
Language string `json:"language" yaml:"language"`
|
||||
}
|
||||
|
||||
// OutputData represents the full output structure.
|
||||
type OutputData struct {
|
||||
Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
|
||||
Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
|
||||
Files []FileData `json:"files" yaml:"files"`
|
||||
}
|
||||
|
||||
// FormatWriter defines the interface for format-specific writers.
|
||||
type FormatWriter interface {
|
||||
Start(prefix, suffix string) error
|
||||
WriteFile(req WriteRequest) error
|
||||
Close() error
|
||||
}
|
||||
|
||||
// detectLanguage tries to infer the code block language from the file extension.
|
||||
func detectLanguage(filePath string) string {
|
||||
registry := GetDefaultRegistry()
|
||||
return registry.GetLanguage(filePath)
|
||||
}
|
||||
66
fileproc/ignore_rules.go
Normal file
66
fileproc/ignore_rules.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
ignore "github.com/sabhiram/go-gitignore"
|
||||
)
|
||||
|
||||
// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
|
||||
type ignoreRule struct {
|
||||
gi *ignore.GitIgnore
|
||||
base string
|
||||
}
|
||||
|
||||
// loadIgnoreRules loads ignore rules from the current directory and combines them with parent rules.
|
||||
func loadIgnoreRules(currentDir string, parentRules []ignoreRule) []ignoreRule {
|
||||
// Pre-allocate for parent rules plus possible .gitignore and .ignore
|
||||
const expectedIgnoreFiles = 2
|
||||
rules := make([]ignoreRule, 0, len(parentRules)+expectedIgnoreFiles)
|
||||
rules = append(rules, parentRules...)
|
||||
|
||||
// Check for .gitignore and .ignore files in the current directory.
|
||||
for _, fileName := range []string{".gitignore", ".ignore"} {
|
||||
if rule := tryLoadIgnoreFile(currentDir, fileName); rule != nil {
|
||||
rules = append(rules, *rule)
|
||||
}
|
||||
}
|
||||
|
||||
return rules
|
||||
}
|
||||
|
||||
// tryLoadIgnoreFile attempts to load an ignore file from the given directory.
|
||||
func tryLoadIgnoreFile(dir, fileName string) *ignoreRule {
|
||||
ignorePath := filepath.Join(dir, fileName)
|
||||
if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
|
||||
if gi, err := ignore.CompileIgnoreFile(ignorePath); err == nil {
|
||||
return &ignoreRule{
|
||||
base: dir,
|
||||
gi: gi,
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// matchesIgnoreRules checks if a path matches any of the ignore rules.
|
||||
func matchesIgnoreRules(fullPath string, rules []ignoreRule) bool {
|
||||
for _, rule := range rules {
|
||||
if matchesRule(fullPath, rule) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// matchesRule checks if a path matches a specific ignore rule.
|
||||
func matchesRule(fullPath string, rule ignoreRule) bool {
|
||||
// Compute the path relative to the base where the ignore rule was defined.
|
||||
rel, err := filepath.Rel(rule.base, fullPath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
// If the rule matches, skip this entry.
|
||||
return rule.gi.MatchesPath(rel)
|
||||
}
|
||||
188
fileproc/json_writer.go
Normal file
188
fileproc/json_writer.go
Normal file
@@ -0,0 +1,188 @@
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/ivuorinen/gibidify/utils"
|
||||
)
|
||||
|
||||
// JSONWriter handles JSON format output with streaming support.
|
||||
type JSONWriter struct {
|
||||
outFile *os.File
|
||||
firstFile bool
|
||||
}
|
||||
|
||||
// NewJSONWriter creates a new JSON writer.
|
||||
func NewJSONWriter(outFile *os.File) *JSONWriter {
|
||||
return &JSONWriter{
|
||||
outFile: outFile,
|
||||
firstFile: true,
|
||||
}
|
||||
}
|
||||
|
||||
// Start writes the JSON header.
|
||||
func (w *JSONWriter) Start(prefix, suffix string) error {
|
||||
// Start JSON structure
|
||||
if _, err := w.outFile.WriteString(`{"prefix":"`); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON start")
|
||||
}
|
||||
|
||||
// Write escaped prefix
|
||||
escapedPrefix := escapeJSONString(prefix)
|
||||
if _, err := w.outFile.WriteString(escapedPrefix); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON prefix")
|
||||
}
|
||||
|
||||
if _, err := w.outFile.WriteString(`","suffix":"`); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON middle")
|
||||
}
|
||||
|
||||
// Write escaped suffix
|
||||
escapedSuffix := escapeJSONString(suffix)
|
||||
if _, err := w.outFile.WriteString(escapedSuffix); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON suffix")
|
||||
}
|
||||
|
||||
if _, err := w.outFile.WriteString(`","files":[`); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON files start")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteFile writes a file entry in JSON format.
|
||||
func (w *JSONWriter) WriteFile(req WriteRequest) error {
|
||||
if !w.firstFile {
|
||||
if _, err := w.outFile.WriteString(","); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON separator")
|
||||
}
|
||||
}
|
||||
w.firstFile = false
|
||||
|
||||
if req.IsStream {
|
||||
return w.writeStreaming(req)
|
||||
}
|
||||
return w.writeInline(req)
|
||||
}
|
||||
|
||||
// Close writes the JSON footer.
|
||||
func (w *JSONWriter) Close() error {
|
||||
// Close JSON structure
|
||||
if _, err := w.outFile.WriteString("]}"); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON end")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeStreaming writes a large file as JSON in streaming chunks.
|
||||
func (w *JSONWriter) writeStreaming(req WriteRequest) error {
|
||||
defer w.closeReader(req.Reader, req.Path)
|
||||
|
||||
language := detectLanguage(req.Path)
|
||||
|
||||
// Write file start
|
||||
escapedPath := escapeJSONString(req.Path)
|
||||
if _, err := fmt.Fprintf(w.outFile, `{"path":"%s","language":"%s","content":"`, escapedPath, language); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file start").WithFilePath(req.Path)
|
||||
}
|
||||
|
||||
// Stream content with JSON escaping
|
||||
if err := w.streamJSONContent(req.Reader, req.Path); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write file end
|
||||
if _, err := w.outFile.WriteString(`"}`); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file end").WithFilePath(req.Path)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeInline writes a small file directly as JSON.
|
||||
func (w *JSONWriter) writeInline(req WriteRequest) error {
|
||||
language := detectLanguage(req.Path)
|
||||
fileData := FileData{
|
||||
Path: req.Path,
|
||||
Content: req.Content,
|
||||
Language: language,
|
||||
}
|
||||
|
||||
encoded, err := json.Marshal(fileData)
|
||||
if err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingEncode, "failed to marshal JSON").WithFilePath(req.Path)
|
||||
}
|
||||
|
||||
if _, err := w.outFile.Write(encoded); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file").WithFilePath(req.Path)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// streamJSONContent streams content with JSON escaping.
|
||||
func (w *JSONWriter) streamJSONContent(reader io.Reader, path string) error {
|
||||
buf := make([]byte, StreamChunkSize)
|
||||
for {
|
||||
n, err := reader.Read(buf)
|
||||
if n > 0 {
|
||||
escaped := escapeJSONString(string(buf[:n]))
|
||||
if _, writeErr := w.outFile.WriteString(escaped); writeErr != nil {
|
||||
return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON chunk").WithFilePath(path)
|
||||
}
|
||||
}
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read JSON chunk").WithFilePath(path)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// closeReader safely closes a reader if it implements io.Closer.
|
||||
func (w *JSONWriter) closeReader(reader io.Reader, path string) {
|
||||
if closer, ok := reader.(io.Closer); ok {
|
||||
if err := closer.Close(); err != nil {
|
||||
utils.LogError(
|
||||
"Failed to close file reader",
|
||||
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// escapeJSONString escapes a string for JSON output.
|
||||
func escapeJSONString(s string) string {
|
||||
// Use json.Marshal to properly escape the string, then remove the quotes
|
||||
escaped, _ := json.Marshal(s)
|
||||
return string(escaped[1 : len(escaped)-1]) // Remove surrounding quotes
|
||||
}
|
||||
|
||||
// startJSONWriter handles JSON format output with streaming support.
|
||||
func startJSONWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
|
||||
defer close(done)
|
||||
|
||||
writer := NewJSONWriter(outFile)
|
||||
|
||||
// Start writing
|
||||
if err := writer.Start(prefix, suffix); err != nil {
|
||||
utils.LogError("Failed to write JSON start", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Process files
|
||||
for req := range writeCh {
|
||||
if err := writer.WriteFile(req); err != nil {
|
||||
utils.LogError("Failed to write JSON file", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Close writer
|
||||
if err := writer.Close(); err != nil {
|
||||
utils.LogError("Failed to write JSON end", err)
|
||||
}
|
||||
}
|
||||
139
fileproc/markdown_writer.go
Normal file
139
fileproc/markdown_writer.go
Normal file
@@ -0,0 +1,139 @@
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/ivuorinen/gibidify/utils"
|
||||
)
|
||||
|
||||
// MarkdownWriter handles markdown format output with streaming support.
|
||||
type MarkdownWriter struct {
|
||||
outFile *os.File
|
||||
}
|
||||
|
||||
// NewMarkdownWriter creates a new markdown writer.
|
||||
func NewMarkdownWriter(outFile *os.File) *MarkdownWriter {
|
||||
return &MarkdownWriter{outFile: outFile}
|
||||
}
|
||||
|
||||
// Start writes the markdown header.
|
||||
func (w *MarkdownWriter) Start(prefix, suffix string) error {
|
||||
if prefix != "" {
|
||||
if _, err := fmt.Fprintf(w.outFile, "# %s\n\n", prefix); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write prefix")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteFile writes a file entry in markdown format.
|
||||
func (w *MarkdownWriter) WriteFile(req WriteRequest) error {
|
||||
if req.IsStream {
|
||||
return w.writeStreaming(req)
|
||||
}
|
||||
return w.writeInline(req)
|
||||
}
|
||||
|
||||
// Close writes the markdown footer.
|
||||
func (w *MarkdownWriter) Close(suffix string) error {
|
||||
if suffix != "" {
|
||||
if _, err := fmt.Fprintf(w.outFile, "\n# %s\n", suffix); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write suffix")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeStreaming writes a large file in streaming chunks.
|
||||
func (w *MarkdownWriter) writeStreaming(req WriteRequest) error {
|
||||
defer w.closeReader(req.Reader, req.Path)
|
||||
|
||||
language := detectLanguage(req.Path)
|
||||
|
||||
// Write file header
|
||||
if _, err := fmt.Fprintf(w.outFile, "## File: `%s`\n```%s\n", req.Path, language); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file header").WithFilePath(req.Path)
|
||||
}
|
||||
|
||||
// Stream file content in chunks
|
||||
if err := w.streamContent(req.Reader, req.Path); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write file footer
|
||||
if _, err := w.outFile.WriteString("\n```\n\n"); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file footer").WithFilePath(req.Path)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeInline writes a small file directly from content.
|
||||
func (w *MarkdownWriter) writeInline(req WriteRequest) error {
|
||||
language := detectLanguage(req.Path)
|
||||
formatted := fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", req.Path, language, req.Content)
|
||||
|
||||
if _, err := w.outFile.WriteString(formatted); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write inline content").WithFilePath(req.Path)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// streamContent streams file content in chunks.
|
||||
func (w *MarkdownWriter) streamContent(reader io.Reader, path string) error {
|
||||
buf := make([]byte, StreamChunkSize)
|
||||
for {
|
||||
n, err := reader.Read(buf)
|
||||
if n > 0 {
|
||||
if _, writeErr := w.outFile.Write(buf[:n]); writeErr != nil {
|
||||
return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write chunk").WithFilePath(path)
|
||||
}
|
||||
}
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read chunk").WithFilePath(path)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// closeReader safely closes a reader if it implements io.Closer.
|
||||
func (w *MarkdownWriter) closeReader(reader io.Reader, path string) {
|
||||
if closer, ok := reader.(io.Closer); ok {
|
||||
if err := closer.Close(); err != nil {
|
||||
utils.LogError(
|
||||
"Failed to close file reader",
|
||||
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// startMarkdownWriter handles markdown format output with streaming support.
|
||||
func startMarkdownWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
|
||||
defer close(done)
|
||||
|
||||
writer := NewMarkdownWriter(outFile)
|
||||
|
||||
// Start writing
|
||||
if err := writer.Start(prefix, suffix); err != nil {
|
||||
utils.LogError("Failed to write markdown prefix", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Process files
|
||||
for req := range writeCh {
|
||||
if err := writer.WriteFile(req); err != nil {
|
||||
utils.LogError("Failed to write markdown file", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Close writer
|
||||
if err := writer.Close(suffix); err != nil {
|
||||
utils.LogError("Failed to write markdown suffix", err)
|
||||
}
|
||||
}
|
||||
@@ -3,34 +3,157 @@ package fileproc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/ivuorinen/gibidify/config"
|
||||
"github.com/ivuorinen/gibidify/utils"
|
||||
)
|
||||
|
||||
const (
|
||||
// StreamChunkSize is the size of chunks when streaming large files (64KB).
|
||||
StreamChunkSize = 65536
|
||||
// StreamThreshold is the file size above which we use streaming (1MB).
|
||||
StreamThreshold = 1048576
|
||||
// MaxMemoryBuffer is the maximum memory to use for buffering content (10MB).
|
||||
MaxMemoryBuffer = 10485760
|
||||
)
|
||||
|
||||
// WriteRequest represents the content to be written.
|
||||
type WriteRequest struct {
|
||||
Path string
|
||||
Content string
|
||||
Path string
|
||||
Content string
|
||||
IsStream bool
|
||||
Reader io.Reader
|
||||
}
|
||||
|
||||
// FileProcessor handles file processing operations.
|
||||
type FileProcessor struct {
|
||||
rootPath string
|
||||
sizeLimit int64
|
||||
}
|
||||
|
||||
// NewFileProcessor creates a new file processor.
|
||||
func NewFileProcessor(rootPath string) *FileProcessor {
|
||||
return &FileProcessor{
|
||||
rootPath: rootPath,
|
||||
sizeLimit: config.GetFileSizeLimit(),
|
||||
}
|
||||
}
|
||||
|
||||
// ProcessFile reads the file at filePath and sends a formatted output to outCh.
|
||||
// It automatically chooses between loading the entire file or streaming based on file size.
|
||||
func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) {
|
||||
content, err := os.ReadFile(filePath)
|
||||
processor := NewFileProcessor(rootPath)
|
||||
processor.Process(filePath, outCh)
|
||||
}
|
||||
|
||||
// Process handles file processing with the configured settings.
|
||||
func (p *FileProcessor) Process(filePath string, outCh chan<- WriteRequest) {
|
||||
// Validate file
|
||||
fileInfo, err := p.validateFile(filePath)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to read file %s: %v", filePath, err)
|
||||
return // Error already logged
|
||||
}
|
||||
|
||||
// Get relative path
|
||||
relPath := p.getRelativePath(filePath)
|
||||
|
||||
// Choose processing strategy based on file size
|
||||
if fileInfo.Size() <= StreamThreshold {
|
||||
p.processInMemory(filePath, relPath, outCh)
|
||||
} else {
|
||||
p.processStreaming(filePath, relPath, outCh)
|
||||
}
|
||||
}
|
||||
|
||||
// validateFile checks if the file can be processed.
|
||||
func (p *FileProcessor) validateFile(filePath string) (os.FileInfo, error) {
|
||||
fileInfo, err := os.Stat(filePath)
|
||||
if err != nil {
|
||||
structErr := utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to stat file").WithFilePath(filePath)
|
||||
utils.LogErrorf(structErr, "Failed to stat file %s", filePath)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check size limit
|
||||
if fileInfo.Size() > p.sizeLimit {
|
||||
utils.LogErrorf(
|
||||
utils.NewStructuredError(
|
||||
utils.ErrorTypeValidation,
|
||||
utils.CodeValidationSize,
|
||||
fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", fileInfo.Size(), p.sizeLimit),
|
||||
).WithFilePath(filePath).WithContext("file_size", fileInfo.Size()).WithContext("size_limit", p.sizeLimit),
|
||||
"Skipping large file %s", filePath,
|
||||
)
|
||||
return nil, fmt.Errorf("file too large")
|
||||
}
|
||||
|
||||
return fileInfo, nil
|
||||
}
|
||||
|
||||
// getRelativePath computes the path relative to rootPath.
|
||||
func (p *FileProcessor) getRelativePath(filePath string) string {
|
||||
relPath, err := filepath.Rel(p.rootPath, filePath)
|
||||
if err != nil {
|
||||
return filePath // Fallback
|
||||
}
|
||||
return relPath
|
||||
}
|
||||
|
||||
// processInMemory loads the entire file into memory (for small files).
|
||||
func (p *FileProcessor) processInMemory(filePath, relPath string, outCh chan<- WriteRequest) {
|
||||
content, err := os.ReadFile(filePath) // #nosec G304 - filePath is validated by walker
|
||||
if err != nil {
|
||||
structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to read file").WithFilePath(filePath)
|
||||
utils.LogErrorf(structErr, "Failed to read file %s", filePath)
|
||||
return
|
||||
}
|
||||
|
||||
// Compute path relative to rootPath, so /a/b/c/d.c becomes c/d.c
|
||||
relPath, err := filepath.Rel(rootPath, filePath)
|
||||
if err != nil {
|
||||
// Fallback if something unexpected happens
|
||||
relPath = filePath
|
||||
outCh <- WriteRequest{
|
||||
Path: relPath,
|
||||
Content: p.formatContent(relPath, string(content)),
|
||||
IsStream: false,
|
||||
}
|
||||
}
|
||||
|
||||
// processStreaming creates a streaming reader for large files.
|
||||
func (p *FileProcessor) processStreaming(filePath, relPath string, outCh chan<- WriteRequest) {
|
||||
reader := p.createStreamReader(filePath, relPath)
|
||||
if reader == nil {
|
||||
return // Error already logged
|
||||
}
|
||||
|
||||
// Format: separator, then relative path, then content
|
||||
formatted := fmt.Sprintf("\n---\n%s\n%s\n", relPath, string(content))
|
||||
outCh <- WriteRequest{Path: relPath, Content: formatted}
|
||||
outCh <- WriteRequest{
|
||||
Path: relPath,
|
||||
Content: "", // Empty since content is in Reader
|
||||
IsStream: true,
|
||||
Reader: reader,
|
||||
}
|
||||
}
|
||||
|
||||
// createStreamReader creates a reader that combines header and file content.
|
||||
func (p *FileProcessor) createStreamReader(filePath, relPath string) io.Reader {
|
||||
file, err := os.Open(filePath) // #nosec G304 - filePath is validated by walker
|
||||
if err != nil {
|
||||
structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to open file for streaming").WithFilePath(filePath)
|
||||
utils.LogErrorf(structErr, "Failed to open file for streaming %s", filePath)
|
||||
return nil
|
||||
}
|
||||
// Note: file will be closed by the writer
|
||||
|
||||
header := p.formatHeader(relPath)
|
||||
return io.MultiReader(header, file)
|
||||
}
|
||||
|
||||
// formatContent formats the file content with header.
|
||||
func (p *FileProcessor) formatContent(relPath, content string) string {
|
||||
return fmt.Sprintf("\n---\n%s\n%s\n", relPath, content)
|
||||
}
|
||||
|
||||
// formatHeader creates a reader for the file header.
|
||||
func (p *FileProcessor) formatHeader(relPath string) io.Reader {
|
||||
return strings.NewReader(fmt.Sprintf("\n---\n%s\n", relPath))
|
||||
}
|
||||
|
||||
@@ -6,12 +6,15 @@ import (
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
fileproc "github.com/ivuorinen/gibidify/fileproc"
|
||||
"github.com/ivuorinen/gibidify/fileproc"
|
||||
"github.com/ivuorinen/gibidify/testutil"
|
||||
)
|
||||
|
||||
func TestProcessFile(t *testing.T) {
|
||||
// Reset and load default config to ensure proper file size limits
|
||||
testutil.ResetViperConfig(t, "")
|
||||
// Create a temporary file with known content.
|
||||
tmpFile, err := os.CreateTemp("", "testfile")
|
||||
tmpFile, err := os.CreateTemp(t.TempDir(), "testfile")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
107
fileproc/registry.go
Normal file
107
fileproc/registry.go
Normal file
@@ -0,0 +1,107 @@
|
||||
// Package fileproc provides file processing utilities.
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const minExtensionLength = 2
|
||||
|
||||
var (
|
||||
registry *FileTypeRegistry
|
||||
registryOnce sync.Once
|
||||
)
|
||||
|
||||
// FileTypeRegistry manages file type detection and classification.
|
||||
type FileTypeRegistry struct {
|
||||
imageExts map[string]bool
|
||||
binaryExts map[string]bool
|
||||
languageMap map[string]string
|
||||
|
||||
// Cache for frequent lookups to avoid repeated string operations
|
||||
extCache map[string]string // filename -> normalized extension
|
||||
resultCache map[string]FileTypeResult // extension -> cached result
|
||||
cacheMutex sync.RWMutex
|
||||
maxCacheSize int
|
||||
|
||||
// Performance statistics
|
||||
stats RegistryStats
|
||||
}
|
||||
|
||||
// RegistryStats tracks performance metrics for the registry.
|
||||
type RegistryStats struct {
|
||||
TotalLookups uint64
|
||||
CacheHits uint64
|
||||
CacheMisses uint64
|
||||
CacheEvictions uint64
|
||||
}
|
||||
|
||||
// FileTypeResult represents cached file type detection results.
|
||||
type FileTypeResult struct {
|
||||
IsImage bool
|
||||
IsBinary bool
|
||||
Language string
|
||||
Extension string
|
||||
}
|
||||
|
||||
// initRegistry initializes the default file type registry with common extensions.
|
||||
func initRegistry() *FileTypeRegistry {
|
||||
return &FileTypeRegistry{
|
||||
imageExts: getImageExtensions(),
|
||||
binaryExts: getBinaryExtensions(),
|
||||
languageMap: getLanguageMap(),
|
||||
extCache: make(map[string]string, 1000), // Cache for extension normalization
|
||||
resultCache: make(map[string]FileTypeResult, 500), // Cache for type results
|
||||
maxCacheSize: 500,
|
||||
}
|
||||
}
|
||||
|
||||
// getRegistry returns the singleton file type registry, creating it if necessary.
|
||||
func getRegistry() *FileTypeRegistry {
|
||||
registryOnce.Do(func() {
|
||||
registry = initRegistry()
|
||||
})
|
||||
return registry
|
||||
}
|
||||
|
||||
// GetDefaultRegistry returns the default file type registry.
|
||||
func GetDefaultRegistry() *FileTypeRegistry {
|
||||
return getRegistry()
|
||||
}
|
||||
|
||||
// GetStats returns a copy of the current registry statistics.
|
||||
func (r *FileTypeRegistry) GetStats() RegistryStats {
|
||||
r.cacheMutex.RLock()
|
||||
defer r.cacheMutex.RUnlock()
|
||||
return r.stats
|
||||
}
|
||||
|
||||
// GetCacheInfo returns current cache size information.
|
||||
func (r *FileTypeRegistry) GetCacheInfo() (extCacheSize, resultCacheSize, maxCacheSize int) {
|
||||
r.cacheMutex.RLock()
|
||||
defer r.cacheMutex.RUnlock()
|
||||
return len(r.extCache), len(r.resultCache), r.maxCacheSize
|
||||
}
|
||||
|
||||
// ResetRegistryForTesting resets the registry to its initial state.
|
||||
// This function should only be used in tests.
|
||||
func ResetRegistryForTesting() {
|
||||
registryOnce = sync.Once{}
|
||||
registry = nil
|
||||
}
|
||||
|
||||
// normalizeExtension extracts and normalizes the file extension.
|
||||
func normalizeExtension(filename string) string {
|
||||
return strings.ToLower(filepath.Ext(filename))
|
||||
}
|
||||
|
||||
// isSpecialFile checks if the filename matches special cases like .DS_Store.
|
||||
func isSpecialFile(filename string, extensions map[string]bool) bool {
|
||||
if filepath.Ext(filename) == "" {
|
||||
basename := strings.ToLower(filepath.Base(filename))
|
||||
return extensions[basename]
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -4,10 +4,8 @@ package fileproc
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/ivuorinen/gibidify/config"
|
||||
ignore "github.com/sabhiram/go-gitignore"
|
||||
"github.com/ivuorinen/gibidify/utils"
|
||||
)
|
||||
|
||||
// Walker defines an interface for scanning directories.
|
||||
@@ -18,22 +16,25 @@ type Walker interface {
|
||||
// ProdWalker implements Walker using a custom directory walker that
|
||||
// respects .gitignore and .ignore files, configuration-defined ignore directories,
|
||||
// and ignores binary and image files by default.
|
||||
type ProdWalker struct{}
|
||||
type ProdWalker struct {
|
||||
filter *FileFilter
|
||||
}
|
||||
|
||||
// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
|
||||
type ignoreRule struct {
|
||||
base string
|
||||
gi *ignore.GitIgnore
|
||||
// NewProdWalker creates a new production walker with current configuration.
|
||||
func NewProdWalker() *ProdWalker {
|
||||
return &ProdWalker{
|
||||
filter: NewFileFilter(),
|
||||
}
|
||||
}
|
||||
|
||||
// Walk scans the given root directory recursively and returns a slice of file paths
|
||||
// that are not ignored based on .gitignore/.ignore files, the configuration, or the default binary/image filter.
|
||||
func (pw ProdWalker) Walk(root string) ([]string, error) {
|
||||
absRoot, err := filepath.Abs(root)
|
||||
func (w *ProdWalker) Walk(root string) ([]string, error) {
|
||||
absRoot, err := utils.GetAbsolutePath(root)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to resolve root path").WithFilePath(root)
|
||||
}
|
||||
return walkDir(absRoot, absRoot, []ignoreRule{})
|
||||
return w.walkDir(absRoot, []ignoreRule{})
|
||||
}
|
||||
|
||||
// walkDir recursively walks the directory tree starting at currentDir.
|
||||
@@ -41,122 +42,34 @@ func (pw ProdWalker) Walk(root string) ([]string, error) {
|
||||
// appends the corresponding rules to the inherited list. Each file/directory is
|
||||
// then checked against the accumulated ignore rules, the configuration's list of ignored directories,
|
||||
// and a default filter that ignores binary and image files.
|
||||
func walkDir(root string, currentDir string, parentRules []ignoreRule) ([]string, error) {
|
||||
func (w *ProdWalker) walkDir(currentDir string, parentRules []ignoreRule) ([]string, error) {
|
||||
var results []string
|
||||
|
||||
entries, err := os.ReadDir(currentDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to read directory").WithFilePath(currentDir)
|
||||
}
|
||||
|
||||
// Start with the parent's ignore rules.
|
||||
rules := make([]ignoreRule, len(parentRules))
|
||||
copy(rules, parentRules)
|
||||
|
||||
// Check for .gitignore and .ignore files in the current directory.
|
||||
for _, fileName := range []string{".gitignore", ".ignore"} {
|
||||
ignorePath := filepath.Join(currentDir, fileName)
|
||||
if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
|
||||
gi, err := ignore.CompileIgnoreFile(ignorePath)
|
||||
if err == nil {
|
||||
rules = append(rules, ignoreRule{
|
||||
base: currentDir,
|
||||
gi: gi,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the list of directories to ignore from configuration.
|
||||
ignoredDirs := config.GetIgnoredDirectories()
|
||||
sizeLimit := config.GetFileSizeLimit() // e.g., 5242880 for 5 MB
|
||||
rules := loadIgnoreRules(currentDir, parentRules)
|
||||
|
||||
for _, entry := range entries {
|
||||
fullPath := filepath.Join(currentDir, entry.Name())
|
||||
|
||||
// For directories, check if its name is in the config ignore list.
|
||||
if entry.IsDir() {
|
||||
for _, d := range ignoredDirs {
|
||||
if entry.Name() == d {
|
||||
// Skip this directory entirely.
|
||||
goto SkipEntry
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Check if file exceeds the configured size limit.
|
||||
info, err := entry.Info()
|
||||
if err == nil && info.Size() > sizeLimit {
|
||||
goto SkipEntry
|
||||
}
|
||||
|
||||
// For files, apply the default filter to ignore binary and image files.
|
||||
if isBinaryOrImage(fullPath) {
|
||||
goto SkipEntry
|
||||
}
|
||||
if w.filter.shouldSkipEntry(entry, fullPath, rules) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check accumulated ignore rules.
|
||||
for _, rule := range rules {
|
||||
// Compute the path relative to the base where the ignore rule was defined.
|
||||
rel, err := filepath.Rel(rule.base, fullPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
// If the rule matches, skip this entry.
|
||||
if rule.gi.MatchesPath(rel) {
|
||||
goto SkipEntry
|
||||
}
|
||||
}
|
||||
|
||||
// If not ignored, then process the entry.
|
||||
// Process entry
|
||||
if entry.IsDir() {
|
||||
subFiles, err := walkDir(root, fullPath, rules)
|
||||
subFiles, err := w.walkDir(fullPath, rules)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingTraversal, "failed to traverse subdirectory").WithFilePath(fullPath)
|
||||
}
|
||||
results = append(results, subFiles...)
|
||||
} else {
|
||||
results = append(results, fullPath)
|
||||
}
|
||||
SkipEntry:
|
||||
continue
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// isBinaryOrImage checks if a file should be considered binary or an image based on its extension.
|
||||
// The check is case-insensitive.
|
||||
func isBinaryOrImage(filePath string) bool {
|
||||
ext := strings.ToLower(filepath.Ext(filePath))
|
||||
// Common image file extensions.
|
||||
imageExtensions := map[string]bool{
|
||||
".png": true,
|
||||
".jpg": true,
|
||||
".jpeg": true,
|
||||
".gif": true,
|
||||
".bmp": true,
|
||||
".tiff": true,
|
||||
".ico": true,
|
||||
".svg": true,
|
||||
".webp": true,
|
||||
}
|
||||
// Common binary file extensions.
|
||||
binaryExtensions := map[string]bool{
|
||||
".exe": true,
|
||||
".dll": true,
|
||||
".so": true,
|
||||
".bin": true,
|
||||
".dat": true,
|
||||
".zip": true,
|
||||
".tar": true,
|
||||
".gz": true,
|
||||
".7z": true,
|
||||
".rar": true,
|
||||
".DS_Store": true,
|
||||
}
|
||||
if imageExtensions[ext] || binaryExtensions[ext] {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1,64 +1,42 @@
|
||||
package fileproc_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/ivuorinen/gibidify/config"
|
||||
fileproc "github.com/ivuorinen/gibidify/fileproc"
|
||||
"github.com/spf13/viper"
|
||||
|
||||
"github.com/ivuorinen/gibidify/fileproc"
|
||||
"github.com/ivuorinen/gibidify/testutil"
|
||||
)
|
||||
|
||||
func TestProdWalkerWithIgnore(t *testing.T) {
|
||||
// Create a temporary directory structure.
|
||||
rootDir, err := os.MkdirTemp("", "walker_test_root")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp root directory: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := os.RemoveAll(rootDir); err != nil {
|
||||
t.Fatalf("cleanup failed: %v", err)
|
||||
}
|
||||
}()
|
||||
rootDir := t.TempDir()
|
||||
|
||||
subDir := filepath.Join(rootDir, "vendor")
|
||||
if err := os.Mkdir(subDir, 0755); err != nil {
|
||||
t.Fatalf("Failed to create subDir: %v", err)
|
||||
}
|
||||
subDir := testutil.CreateTestDirectory(t, rootDir, "vendor")
|
||||
|
||||
// Write sample files
|
||||
filePaths := []string{
|
||||
filepath.Join(rootDir, "file1.go"),
|
||||
filepath.Join(rootDir, "file2.txt"),
|
||||
filepath.Join(subDir, "file_in_vendor.txt"), // should be ignored
|
||||
}
|
||||
for _, fp := range filePaths {
|
||||
if err := os.WriteFile(fp, []byte("content"), 0644); err != nil {
|
||||
t.Fatalf("Failed to write file %s: %v", fp, err)
|
||||
}
|
||||
}
|
||||
testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{
|
||||
{Name: "file1.go", Content: "content"},
|
||||
{Name: "file2.txt", Content: "content"},
|
||||
})
|
||||
testutil.CreateTestFile(t, subDir, "file_in_vendor.txt", []byte("content")) // should be ignored
|
||||
|
||||
// .gitignore that ignores *.txt and itself
|
||||
gitignoreContent := `*.txt
|
||||
.gitignore
|
||||
`
|
||||
gitignorePath := filepath.Join(rootDir, ".gitignore")
|
||||
if err := os.WriteFile(gitignorePath, []byte(gitignoreContent), 0644); err != nil {
|
||||
t.Fatalf("Failed to write .gitignore: %v", err)
|
||||
}
|
||||
testutil.CreateTestFile(t, rootDir, ".gitignore", []byte(gitignoreContent))
|
||||
|
||||
// Initialize config to ignore "vendor" directory
|
||||
viper.Reset()
|
||||
config.LoadConfig()
|
||||
testutil.ResetViperConfig(t, "")
|
||||
viper.Set("ignoreDirectories", []string{"vendor"})
|
||||
|
||||
// Run walker
|
||||
var w fileproc.Walker = fileproc.ProdWalker{}
|
||||
w := fileproc.NewProdWalker()
|
||||
found, err := w.Walk(rootDir)
|
||||
if err != nil {
|
||||
t.Fatalf("Walk returned error: %v", err)
|
||||
}
|
||||
testutil.MustSucceed(t, err, "walking directory")
|
||||
|
||||
// We expect only file1.go to appear
|
||||
if len(found) != 1 {
|
||||
@@ -70,38 +48,24 @@ func TestProdWalkerWithIgnore(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestProdWalkerBinaryCheck(t *testing.T) {
|
||||
rootDir, err := os.MkdirTemp("", "walker_test_bincheck")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp root directory: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := os.RemoveAll(rootDir); err != nil {
|
||||
t.Fatalf("cleanup failed: %v", err)
|
||||
}
|
||||
}()
|
||||
rootDir := t.TempDir()
|
||||
|
||||
// Create a mock binary file
|
||||
binFile := filepath.Join(rootDir, "somefile.exe")
|
||||
if err := os.WriteFile(binFile, []byte("fake-binary-content"), 0644); err != nil {
|
||||
t.Fatalf("Failed to write file %s: %v", binFile, err)
|
||||
}
|
||||
|
||||
// Create a normal file
|
||||
normalFile := filepath.Join(rootDir, "keep.go")
|
||||
if err := os.WriteFile(normalFile, []byte("package main"), 0644); err != nil {
|
||||
t.Fatalf("Failed to write file %s: %v", normalFile, err)
|
||||
}
|
||||
// Create test files
|
||||
testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{
|
||||
{Name: "somefile.exe", Content: "fake-binary-content"},
|
||||
{Name: "keep.go", Content: "package main"},
|
||||
})
|
||||
|
||||
// Reset and load default config
|
||||
viper.Reset()
|
||||
config.LoadConfig()
|
||||
testutil.ResetViperConfig(t, "")
|
||||
|
||||
// Reset FileTypeRegistry to ensure clean state
|
||||
fileproc.ResetRegistryForTesting()
|
||||
|
||||
// Run walker
|
||||
var w fileproc.Walker = fileproc.ProdWalker{}
|
||||
w := fileproc.NewProdWalker()
|
||||
found, err := w.Walk(rootDir)
|
||||
if err != nil {
|
||||
t.Fatalf("Walk returned error: %v", err)
|
||||
}
|
||||
testutil.MustSucceed(t, err, "walking directory")
|
||||
|
||||
// Only "keep.go" should be returned
|
||||
if len(found) != 1 {
|
||||
@@ -113,34 +77,17 @@ func TestProdWalkerBinaryCheck(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestProdWalkerSizeLimit(t *testing.T) {
|
||||
rootDir, err := os.MkdirTemp("", "walker_test_sizelimit")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp root directory: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := os.RemoveAll(rootDir); err != nil {
|
||||
t.Fatalf("cleanup failed: %v", err)
|
||||
}
|
||||
}()
|
||||
rootDir := t.TempDir()
|
||||
|
||||
// Create a file exceeding the size limit
|
||||
largeFilePath := filepath.Join(rootDir, "largefile.txt")
|
||||
// Create test files
|
||||
largeFileData := make([]byte, 6*1024*1024) // 6 MB
|
||||
if err := os.WriteFile(largeFilePath, largeFileData, 0644); err != nil {
|
||||
t.Fatalf("Failed to write large file: %v", err)
|
||||
}
|
||||
|
||||
// Create a small file
|
||||
smallFilePath := filepath.Join(rootDir, "smallfile.go")
|
||||
if err := os.WriteFile(smallFilePath, []byte("package main"), 0644); err != nil {
|
||||
t.Fatalf("Failed to write small file: %v", err)
|
||||
}
|
||||
testutil.CreateTestFile(t, rootDir, "largefile.txt", largeFileData)
|
||||
testutil.CreateTestFile(t, rootDir, "smallfile.go", []byte("package main"))
|
||||
|
||||
// Reset and load default config, which sets size limit to 5 MB
|
||||
viper.Reset()
|
||||
config.LoadConfig()
|
||||
testutil.ResetViperConfig(t, "")
|
||||
|
||||
var w fileproc.Walker = fileproc.ProdWalker{}
|
||||
w := fileproc.NewProdWalker()
|
||||
found, err := w.Walk(rootDir)
|
||||
if err != nil {
|
||||
t.Fatalf("Walk returned error: %v", err)
|
||||
|
||||
@@ -1,101 +1,29 @@
|
||||
// Package fileproc provides a writer for the output of the file processor.
|
||||
//
|
||||
// The StartWriter function writes the output in the specified format.
|
||||
// The formatMarkdown function formats the output in Markdown format.
|
||||
// The detectLanguage function tries to infer the code block language from the file extension.
|
||||
// The OutputData struct represents the full output structure.
|
||||
// The FileData struct represents a single file's path and content.
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"gopkg.in/yaml.v3"
|
||||
"github.com/ivuorinen/gibidify/utils"
|
||||
)
|
||||
|
||||
// FileData represents a single file's path and content.
|
||||
type FileData struct {
|
||||
Path string `json:"path" yaml:"path"`
|
||||
Content string `json:"content" yaml:"content"`
|
||||
}
|
||||
|
||||
// OutputData represents the full output structure.
|
||||
type OutputData struct {
|
||||
Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
|
||||
Files []FileData `json:"files" yaml:"files"`
|
||||
Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
|
||||
}
|
||||
|
||||
// StartWriter writes the output in the specified format.
|
||||
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format string, prefix, suffix string) {
|
||||
var files []FileData
|
||||
|
||||
// Read from channel until closed
|
||||
for req := range writeCh {
|
||||
files = append(files, FileData(req))
|
||||
}
|
||||
|
||||
// Create output struct
|
||||
output := OutputData{Prefix: prefix, Files: files, Suffix: suffix}
|
||||
|
||||
// Serialize based on format
|
||||
var outputData []byte
|
||||
var err error
|
||||
|
||||
// StartWriter writes the output in the specified format with memory optimization.
|
||||
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format, prefix, suffix string) {
|
||||
switch format {
|
||||
case "json":
|
||||
outputData, err = json.MarshalIndent(output, "", " ")
|
||||
case "yaml":
|
||||
outputData, err = yaml.Marshal(output)
|
||||
case "markdown":
|
||||
outputData = []byte(formatMarkdown(output))
|
||||
startMarkdownWriter(outFile, writeCh, done, prefix, suffix)
|
||||
case "json":
|
||||
startJSONWriter(outFile, writeCh, done, prefix, suffix)
|
||||
case "yaml":
|
||||
startYAMLWriter(outFile, writeCh, done, prefix, suffix)
|
||||
default:
|
||||
err = fmt.Errorf("unsupported format: %s", format)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logrus.Errorf("Error encoding output: %v", err)
|
||||
err := utils.NewStructuredError(
|
||||
utils.ErrorTypeValidation,
|
||||
utils.CodeValidationFormat,
|
||||
fmt.Sprintf("unsupported format: %s", format),
|
||||
).WithContext("format", format)
|
||||
utils.LogError("Failed to encode output", err)
|
||||
close(done)
|
||||
return
|
||||
}
|
||||
|
||||
// Write to file
|
||||
if _, err := outFile.Write(outputData); err != nil {
|
||||
logrus.Errorf("Error writing to file: %v", err)
|
||||
}
|
||||
|
||||
close(done)
|
||||
}
|
||||
|
||||
func formatMarkdown(output OutputData) string {
|
||||
markdown := "# " + output.Prefix + "\n\n"
|
||||
|
||||
for _, file := range output.Files {
|
||||
markdown += fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", file.Path, detectLanguage(file.Path), file.Content)
|
||||
}
|
||||
|
||||
markdown += "# " + output.Suffix
|
||||
return markdown
|
||||
}
|
||||
|
||||
// detectLanguage tries to infer code block language from file extension.
|
||||
func detectLanguage(filename string) string {
|
||||
if len(filename) < 3 {
|
||||
return ""
|
||||
}
|
||||
switch {
|
||||
case len(filename) >= 3 && filename[len(filename)-3:] == ".go":
|
||||
return "go"
|
||||
case len(filename) >= 3 && filename[len(filename)-3:] == ".py":
|
||||
return "python"
|
||||
case len(filename) >= 2 && filename[len(filename)-2:] == ".c":
|
||||
return "c"
|
||||
case len(filename) >= 3 && filename[len(filename)-3:] == ".js":
|
||||
return "javascript"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,8 +7,9 @@ import (
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
fileproc "github.com/ivuorinen/gibidify/fileproc"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
"github.com/ivuorinen/gibidify/fileproc"
|
||||
)
|
||||
|
||||
func TestStartWriter_Formats(t *testing.T) {
|
||||
@@ -18,107 +19,109 @@ func TestStartWriter_Formats(t *testing.T) {
|
||||
format string
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
name: "JSON format",
|
||||
format: "json",
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "YAML format",
|
||||
format: "yaml",
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "Markdown format",
|
||||
format: "markdown",
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "Invalid format",
|
||||
format: "invalid",
|
||||
expectError: true,
|
||||
},
|
||||
{"JSON format", "json", false},
|
||||
{"YAML format", "yaml", false},
|
||||
{"Markdown format", "markdown", false},
|
||||
{"Invalid format", "invalid", true},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
outFile, err := os.CreateTemp("", "gibidify_test_output")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp file: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := outFile.Close(); err != nil {
|
||||
t.Errorf("close temp file: %v", err)
|
||||
}
|
||||
if err := os.Remove(outFile.Name()); err != nil {
|
||||
t.Errorf("remove temp file: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Prepare channels
|
||||
writeCh := make(chan fileproc.WriteRequest, 2)
|
||||
doneCh := make(chan struct{})
|
||||
|
||||
// Write a couple of sample requests
|
||||
writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"}
|
||||
writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"}
|
||||
close(writeCh)
|
||||
|
||||
// Start the writer
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
fileproc.StartWriter(outFile, writeCh, doneCh, tc.format, "PREFIX", "SUFFIX")
|
||||
}()
|
||||
|
||||
// Wait until writer signals completion
|
||||
wg.Wait()
|
||||
<-doneCh // make sure all writes finished
|
||||
|
||||
// Read output
|
||||
data, err := os.ReadFile(outFile.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("Error reading output file: %v", err)
|
||||
}
|
||||
|
||||
data := runWriterTest(t, tc.format)
|
||||
if tc.expectError {
|
||||
// For an invalid format, we expect StartWriter to log an error
|
||||
// and produce no content or minimal content. There's no official
|
||||
// error returned, so check if it's empty or obviously incorrect.
|
||||
if len(data) != 0 {
|
||||
t.Errorf("Expected no output for invalid format, got:\n%s", data)
|
||||
}
|
||||
verifyErrorOutput(t, data)
|
||||
} else {
|
||||
// Valid format: check basic properties in the output
|
||||
content := string(data)
|
||||
switch tc.format {
|
||||
case "json":
|
||||
// Quick parse check
|
||||
var outStruct fileproc.OutputData
|
||||
if err := json.Unmarshal(data, &outStruct); err != nil {
|
||||
t.Errorf("JSON unmarshal failed: %v", err)
|
||||
}
|
||||
case "yaml":
|
||||
var outStruct fileproc.OutputData
|
||||
if err := yaml.Unmarshal(data, &outStruct); err != nil {
|
||||
t.Errorf("YAML unmarshal failed: %v", err)
|
||||
}
|
||||
case "markdown":
|
||||
// Check presence of code fences or "## File: ..."
|
||||
if !strings.Contains(content, "```") {
|
||||
t.Error("Expected markdown code fences not found")
|
||||
}
|
||||
}
|
||||
|
||||
// Prefix and suffix checks (common to JSON, YAML, markdown)
|
||||
if !strings.Contains(string(data), "PREFIX") {
|
||||
t.Errorf("Missing prefix in output: %s", data)
|
||||
}
|
||||
if !strings.Contains(string(data), "SUFFIX") {
|
||||
t.Errorf("Missing suffix in output: %s", data)
|
||||
}
|
||||
verifyValidOutput(t, data, tc.format)
|
||||
verifyPrefixSuffix(t, data)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// runWriterTest executes the writer with the given format and returns the output data.
|
||||
func runWriterTest(t *testing.T, format string) []byte {
|
||||
t.Helper()
|
||||
outFile, err := os.CreateTemp(t.TempDir(), "gibidify_test_output")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp file: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if closeErr := outFile.Close(); closeErr != nil {
|
||||
t.Errorf("close temp file: %v", closeErr)
|
||||
}
|
||||
if removeErr := os.Remove(outFile.Name()); removeErr != nil {
|
||||
t.Errorf("remove temp file: %v", removeErr)
|
||||
}
|
||||
}()
|
||||
|
||||
// Prepare channels
|
||||
writeCh := make(chan fileproc.WriteRequest, 2)
|
||||
doneCh := make(chan struct{})
|
||||
|
||||
// Write a couple of sample requests
|
||||
writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"}
|
||||
writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"}
|
||||
close(writeCh)
|
||||
|
||||
// Start the writer
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
fileproc.StartWriter(outFile, writeCh, doneCh, format, "PREFIX", "SUFFIX")
|
||||
}()
|
||||
|
||||
// Wait until writer signals completion
|
||||
wg.Wait()
|
||||
<-doneCh // make sure all writes finished
|
||||
|
||||
// Read output
|
||||
data, err := os.ReadFile(outFile.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("Error reading output file: %v", err)
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
// verifyErrorOutput checks that error cases produce no output.
|
||||
func verifyErrorOutput(t *testing.T, data []byte) {
|
||||
t.Helper()
|
||||
if len(data) != 0 {
|
||||
t.Errorf("Expected no output for invalid format, got:\n%s", data)
|
||||
}
|
||||
}
|
||||
|
||||
// verifyValidOutput checks format-specific output validity.
|
||||
func verifyValidOutput(t *testing.T, data []byte, format string) {
|
||||
t.Helper()
|
||||
content := string(data)
|
||||
switch format {
|
||||
case "json":
|
||||
var outStruct fileproc.OutputData
|
||||
if err := json.Unmarshal(data, &outStruct); err != nil {
|
||||
t.Errorf("JSON unmarshal failed: %v", err)
|
||||
}
|
||||
case "yaml":
|
||||
var outStruct fileproc.OutputData
|
||||
if err := yaml.Unmarshal(data, &outStruct); err != nil {
|
||||
t.Errorf("YAML unmarshal failed: %v", err)
|
||||
}
|
||||
case "markdown":
|
||||
if !strings.Contains(content, "```") {
|
||||
t.Error("Expected markdown code fences not found")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// verifyPrefixSuffix checks that output contains expected prefix and suffix.
|
||||
func verifyPrefixSuffix(t *testing.T, data []byte) {
|
||||
t.Helper()
|
||||
content := string(data)
|
||||
if !strings.Contains(content, "PREFIX") {
|
||||
t.Errorf("Missing prefix in output: %s", data)
|
||||
}
|
||||
if !strings.Contains(content, "SUFFIX") {
|
||||
t.Errorf("Missing suffix in output: %s", data)
|
||||
}
|
||||
}
|
||||
|
||||
148
fileproc/yaml_writer.go
Normal file
148
fileproc/yaml_writer.go
Normal file
@@ -0,0 +1,148 @@
|
||||
package fileproc
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/ivuorinen/gibidify/utils"
|
||||
)
|
||||
|
||||
// YAMLWriter handles YAML format output with streaming support.
|
||||
type YAMLWriter struct {
|
||||
outFile *os.File
|
||||
}
|
||||
|
||||
// NewYAMLWriter creates a new YAML writer.
|
||||
func NewYAMLWriter(outFile *os.File) *YAMLWriter {
|
||||
return &YAMLWriter{outFile: outFile}
|
||||
}
|
||||
|
||||
// Start writes the YAML header.
|
||||
func (w *YAMLWriter) Start(prefix, suffix string) error {
|
||||
// Write YAML header
|
||||
if _, err := fmt.Fprintf(w.outFile, "prefix: %s\nsuffix: %s\nfiles:\n", yamlQuoteString(prefix), yamlQuoteString(suffix)); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML header")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteFile writes a file entry in YAML format.
|
||||
func (w *YAMLWriter) WriteFile(req WriteRequest) error {
|
||||
if req.IsStream {
|
||||
return w.writeStreaming(req)
|
||||
}
|
||||
return w.writeInline(req)
|
||||
}
|
||||
|
||||
// Close writes the YAML footer (no footer needed for YAML).
|
||||
func (w *YAMLWriter) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeStreaming writes a large file as YAML in streaming chunks.
|
||||
func (w *YAMLWriter) writeStreaming(req WriteRequest) error {
|
||||
defer w.closeReader(req.Reader, req.Path)
|
||||
|
||||
language := detectLanguage(req.Path)
|
||||
|
||||
// Write YAML file entry start
|
||||
if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(req.Path), language); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML file start").WithFilePath(req.Path)
|
||||
}
|
||||
|
||||
// Stream content with YAML indentation
|
||||
return w.streamYAMLContent(req.Reader, req.Path)
|
||||
}
|
||||
|
||||
// writeInline writes a small file directly as YAML.
|
||||
func (w *YAMLWriter) writeInline(req WriteRequest) error {
|
||||
language := detectLanguage(req.Path)
|
||||
fileData := FileData{
|
||||
Path: req.Path,
|
||||
Content: req.Content,
|
||||
Language: language,
|
||||
}
|
||||
|
||||
// Write YAML entry
|
||||
if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(fileData.Path), fileData.Language); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML entry start").WithFilePath(req.Path)
|
||||
}
|
||||
|
||||
// Write indented content
|
||||
lines := strings.Split(fileData.Content, "\n")
|
||||
for _, line := range lines {
|
||||
if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML content line").WithFilePath(req.Path)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// streamYAMLContent streams content with YAML indentation.
|
||||
func (w *YAMLWriter) streamYAMLContent(reader io.Reader, path string) error {
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML line").WithFilePath(path)
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to scan YAML content").WithFilePath(path)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// closeReader safely closes a reader if it implements io.Closer.
|
||||
func (w *YAMLWriter) closeReader(reader io.Reader, path string) {
|
||||
if closer, ok := reader.(io.Closer); ok {
|
||||
if err := closer.Close(); err != nil {
|
||||
utils.LogError(
|
||||
"Failed to close file reader",
|
||||
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// yamlQuoteString quotes a string for YAML output if needed.
|
||||
func yamlQuoteString(s string) string {
|
||||
if s == "" {
|
||||
return `""`
|
||||
}
|
||||
// Simple YAML quoting - use double quotes if string contains special characters
|
||||
if strings.ContainsAny(s, "\n\r\t:\"'\\") {
|
||||
return fmt.Sprintf(`"%s"`, strings.ReplaceAll(s, `"`, `\"`))
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// startYAMLWriter handles YAML format output with streaming support.
|
||||
func startYAMLWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
|
||||
defer close(done)
|
||||
|
||||
writer := NewYAMLWriter(outFile)
|
||||
|
||||
// Start writing
|
||||
if err := writer.Start(prefix, suffix); err != nil {
|
||||
utils.LogError("Failed to write YAML header", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Process files
|
||||
for req := range writeCh {
|
||||
if err := writer.WriteFile(req); err != nil {
|
||||
utils.LogError("Failed to write YAML file", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Close writer
|
||||
if err := writer.Close(); err != nil {
|
||||
utils.LogError("Failed to write YAML end", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user