From eef3ab37619da78290a0b8c10bd5b3ec988fd367 Mon Sep 17 00:00:00 2001 From: Ismo Vuorinen Date: Wed, 30 Jul 2025 19:01:59 +0300 Subject: [PATCH] chore: tweaks, simplification, tests --- CLAUDE.md | 33 +- TODO.md | 58 +- cli/processor.go | 314 ------- cli/processor_collection.go | 77 ++ cli/processor_processing.go | 100 +++ cli/processor_stats.go | 40 + cli/processor_types.go | 44 + cli/processor_workers.go | 85 ++ config/config.go | 595 +------------ config/constants.go | 61 ++ config/getters.go | 157 ++++ config/loader.go | 90 ++ config/loader_test.go | 120 +++ config/validation.go | 307 +++++++ config/{config_test.go => validation_test.go} | 113 +-- fileproc/filetypes_concurrency_test.go | 105 +++ fileproc/filetypes_config_test.go | 258 ++++++ fileproc/filetypes_detection_test.go | 226 +++++ fileproc/filetypes_edge_cases_test.go | 128 +++ fileproc/filetypes_registry_test.go | 137 +++ fileproc/filetypes_test.go | 827 ------------------ fileproc/json_writer.go | 54 +- fileproc/resource_monitor.go | 423 --------- fileproc/resource_monitor_concurrency.go | 59 ++ fileproc/resource_monitor_concurrency_test.go | 95 ++ fileproc/resource_monitor_integration_test.go | 81 ++ fileproc/resource_monitor_metrics.go | 79 ++ fileproc/resource_monitor_metrics_test.go | 49 ++ fileproc/resource_monitor_rate_limiting.go | 36 + .../resource_monitor_rate_limiting_test.go | 40 + fileproc/resource_monitor_state.go | 22 + fileproc/resource_monitor_test.go | 377 -------- fileproc/resource_monitor_types.go | 108 +++ fileproc/resource_monitor_types_test.go | 74 ++ fileproc/resource_monitor_validation.go | 148 ++++ fileproc/resource_monitor_validation_test.go | 88 ++ testutil/concurrency_test.go | 86 ++ testutil/config_test.go | 132 +++ testutil/file_creation_test.go | 286 ++++++ testutil/testutil_test.go | 591 ------------- testutil/verification_test.go | 107 +++ utils/writers.go | 138 +++ 42 files changed, 3613 insertions(+), 3335 deletions(-) delete mode 100644 cli/processor.go create mode 100644 cli/processor_collection.go create mode 100644 cli/processor_processing.go create mode 100644 cli/processor_stats.go create mode 100644 cli/processor_types.go create mode 100644 cli/processor_workers.go create mode 100644 config/constants.go create mode 100644 config/getters.go create mode 100644 config/loader.go create mode 100644 config/loader_test.go create mode 100644 config/validation.go rename config/{config_test.go => validation_test.go} (65%) create mode 100644 fileproc/filetypes_concurrency_test.go create mode 100644 fileproc/filetypes_config_test.go create mode 100644 fileproc/filetypes_detection_test.go create mode 100644 fileproc/filetypes_edge_cases_test.go create mode 100644 fileproc/filetypes_registry_test.go delete mode 100644 fileproc/filetypes_test.go delete mode 100644 fileproc/resource_monitor.go create mode 100644 fileproc/resource_monitor_concurrency.go create mode 100644 fileproc/resource_monitor_concurrency_test.go create mode 100644 fileproc/resource_monitor_integration_test.go create mode 100644 fileproc/resource_monitor_metrics.go create mode 100644 fileproc/resource_monitor_metrics_test.go create mode 100644 fileproc/resource_monitor_rate_limiting.go create mode 100644 fileproc/resource_monitor_rate_limiting_test.go create mode 100644 fileproc/resource_monitor_state.go delete mode 100644 fileproc/resource_monitor_test.go create mode 100644 fileproc/resource_monitor_types.go create mode 100644 fileproc/resource_monitor_types_test.go create mode 100644 fileproc/resource_monitor_validation.go create mode 100644 fileproc/resource_monitor_validation_test.go create mode 100644 testutil/concurrency_test.go create mode 100644 testutil/config_test.go create mode 100644 testutil/file_creation_test.go delete mode 100644 testutil/testutil_test.go create mode 100644 testutil/verification_test.go create mode 100644 utils/writers.go diff --git a/CLAUDE.md b/CLAUDE.md index 2f33223..fb9af0b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,48 +1,47 @@ # CLAUDE.md -Go CLI that aggregates code files into LLM-optimized output. Supports markdown/JSON/YAML with concurrent processing. +Go CLI aggregating code files into LLM-optimized output. Supports markdown/JSON/YAML with concurrent processing. -## Architecture (40 files, 189KB, 6.8K lines) +## Architecture (42 files, 8.2K lines) -**Core**: `main.go` (37 lines), `cli/` (4 files), `fileproc/` (22 files), `config/` (3 files), `utils/` (4 files), `testutil/` (2 files) +**Core**: `main.go` (37), `cli/` (4), `fileproc/` (27), `config/` (3), `utils/` (4), `testutil/` (2) -**Key modules**: File collection, processing, writers (markdown/JSON/YAML), registry with caching, back-pressure management +**Modules**: Collection, processing, writers, registry (~63ns cache), resource limits -**Patterns**: Producer-consumer pools, thread-safe registry (~63ns lookups), streaming with back-pressure, modular files (50-200 lines), progress bars, enhanced errors +**Patterns**: Producer-consumer, thread-safe registry, streaming, modular (50-200 lines) ## Commands ```bash -make lint-fix && make lint && make test # Essential workflow -./gibidify -source -format markdown --no-colors --no-progress --verbose +make lint-fix && make lint && make test +./gibidify -source -format markdown --verbose ``` ## Config -XDG config paths: `~/.config/gibidify/config.yaml` - -**Key settings**: File size limit (5MB), ignore dirs, custom file types, back-pressure (100MB memory limit) +`~/.config/gibidify/config.yaml` +Size limit 5MB, ignore dirs, custom types, 100MB memory limit ## Quality -**CRITICAL**: `make lint-fix && make lint` (0 issues), max 120 chars, EditorConfig compliance, 30+ linters +**CRITICAL**: `make lint-fix && make lint` (0 issues), 120 chars, EditorConfig, 30+ linters ## Testing -**Coverage**: 84%+ (utils 90.9%, testutil 84.2%, fileproc 83.8%), race detection, benchmarks, testutil helpers +**Coverage**: 84%+ (utils 90.9%, fileproc 83.8%), race detection, benchmarks ## Standards -EditorConfig (LF, tabs), semantic commits, testing required, linting must pass +EditorConfig (LF, tabs), semantic commits, testing required ## Status -**Health: 10/10** - Production-ready, 84%+ coverage, modular architecture, memory-optimized +**Health: 10/10** - Production-ready, 84%+ coverage, modular, memory-optimized -**Completed**: Structured errors, benchmarking, config validation, memory optimization, code modularization, CLI enhancements (progress bars, colors, enhanced errors) +**Done**: Errors, benchmarks, config, optimization, modularization, CLI (progress/colors), security (path validation, resource limits, scanning) -**Next**: Security hardening, documentation, output customization +**Next**: Documentation, output customization ## Workflow -1. `make lint-fix` before changes 2. >80% coverage 3. Follow patterns 4. Update docs 5. Security/performance +1. `make lint-fix` first 2. >80% coverage 3. Follow patterns 4. Update docs diff --git a/TODO.md b/TODO.md index bca9086..7194a1f 100644 --- a/TODO.md +++ b/TODO.md @@ -2,65 +2,45 @@ Prioritized improvements by impact/effort. -## ✅ Completed (High Priority) +## ✅ Completed -**Testing**: utils (90.9%), testutil (84.2%), FileTypeRegistry (100%) ✅ -**Config**: Registry customization, validation, schema ✅ -**Errors**: Structured types, categorization, context ✅ -**Performance**: Benchmarking, memory optimization, streaming ✅ -**Architecture**: Code modularization (50-200 lines/file) ✅ -**CLI**: Progress bars, colored output, enhanced errors ✅ +**Core**: Testing (84%+), config validation, structured errors, benchmarking ✅ +**Architecture**: Modularization (50-200 lines), CLI (progress/colors), security (path validation, resource limits, scanning) ✅ ## 🚀 Current Priorities -### Metrics -- [ ] Timing/profiling -- [ ] Processing stats +### Metrics & Profiling +- [ ] Processing stats, timing -### Output Customization -- [ ] Templates -- [ ] Markdown config -- [ ] Metadata options - -### Security -- [ ] Path traversal review -- [ ] Resource limits -- [ ] Security scanning +### Output Customization +- [ ] Templates, markdown config, metadata ### Documentation -- [ ] API docs (GoDoc, examples) -- [ ] User guides, troubleshooting - -### Dev Tools -- [ ] Hot reload, debug mode -- [ ] More CI/CD linters +- [ ] API docs, user guides ## 🌟 Future **Plugins**: Custom handlers, formats -**Git integration**: Commit filtering, blame -**Rich output**: HTML, PDF, web UI -**Microservices**: API-first, orchestration -**Monitoring**: Prometheus metrics, structured logging +**Git**: Commit filtering, blame +**Rich output**: HTML, PDF, web UI +**Monitoring**: Prometheus, structured logging ## Guidelines -**Before**: `make lint-fix && make lint`, follow TDD, update docs -**DoD**: >80% coverage, linting passes, security reviewed +**Before**: `make lint-fix && make lint`, >80% coverage **Priorities**: Security → UX → Extensions ## Status (2025-07-19) -**Health: 10/10** - Production-ready, 40 files (189KB, 6.8K lines), 84%+ coverage +**Health: 10/10** - Production-ready, 42 files (8.2K lines), 84%+ coverage -**Completed**: All critical items - testing, config, errors, performance, modularization, CLI enhancements - -**Next**: Security hardening → Documentation → Output customization +**Done**: Testing, config, errors, performance, modularization, CLI, security +**Next**: Documentation → Output customization ### Token Usage -- TODO.md: 247 words (~329 tokens) - 63% reduction ✅ -- CLAUDE.md: 212 words (~283 tokens) - 65% reduction ✅ -- Total: 459 words (~612 tokens) - 64% reduction ✅ +- TODO.md: 171 words (~228 tokens) - 35% reduction ✅ +- CLAUDE.md: 160 words (~213 tokens) - 25% reduction ✅ +- Total: 331 words (~441 tokens) - 30% reduction ✅ -*Optimized from 1,581 → 459 words while preserving all critical information* +*Optimized from 474 → 331 words while preserving critical information* diff --git a/cli/processor.go b/cli/processor.go deleted file mode 100644 index 1bc3d67..0000000 --- a/cli/processor.go +++ /dev/null @@ -1,314 +0,0 @@ -package cli - -import ( - "context" - "fmt" - "os" - "sync" - - "github.com/sirupsen/logrus" - - "github.com/ivuorinen/gibidify/config" - "github.com/ivuorinen/gibidify/fileproc" - "github.com/ivuorinen/gibidify/utils" -) - -// Processor handles the main file processing logic. -type Processor struct { - flags *Flags - backpressure *fileproc.BackpressureManager - resourceMonitor *fileproc.ResourceMonitor - ui *UIManager -} - -// NewProcessor creates a new processor with the given flags. -func NewProcessor(flags *Flags) *Processor { - ui := NewUIManager() - - // Configure UI based on flags - ui.SetColorOutput(!flags.NoColors) - ui.SetProgressOutput(!flags.NoProgress) - - return &Processor{ - flags: flags, - backpressure: fileproc.NewBackpressureManager(), - resourceMonitor: fileproc.NewResourceMonitor(), - ui: ui, - } -} - -// Process executes the main file processing workflow. -func (p *Processor) Process(ctx context.Context) error { - // Create overall processing context with timeout - overallCtx, overallCancel := p.resourceMonitor.CreateOverallProcessingContext(ctx) - defer overallCancel() - - // Configure file type registry - p.configureFileTypes() - - // Print startup info with colors - p.ui.PrintHeader("🚀 Starting gibidify") - p.ui.PrintInfo("Format: %s", p.flags.Format) - p.ui.PrintInfo("Source: %s", p.flags.SourceDir) - p.ui.PrintInfo("Destination: %s", p.flags.Destination) - p.ui.PrintInfo("Workers: %d", p.flags.Concurrency) - - // Log resource monitoring configuration - p.resourceMonitor.LogResourceInfo() - p.backpressure.LogBackpressureInfo() - - // Collect files with progress indication - p.ui.PrintInfo("📁 Collecting files...") - files, err := p.collectFiles() - if err != nil { - return err - } - - // Show collection results - p.ui.PrintSuccess("Found %d files to process", len(files)) - - // Pre-validate file collection against resource limits - if err := p.validateFileCollection(files); err != nil { - return err - } - - // Process files with overall timeout - return p.processFiles(overallCtx, files) -} - -// configureFileTypes configures the file type registry. -func (p *Processor) configureFileTypes() { - if config.GetFileTypesEnabled() { - fileproc.ConfigureFromSettings( - config.GetCustomImageExtensions(), - config.GetCustomBinaryExtensions(), - config.GetCustomLanguages(), - config.GetDisabledImageExtensions(), - config.GetDisabledBinaryExtensions(), - config.GetDisabledLanguageExtensions(), - ) - } -} - -// collectFiles collects all files to be processed. -func (p *Processor) collectFiles() ([]string, error) { - files, err := fileproc.CollectFiles(p.flags.SourceDir) - if err != nil { - return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "error collecting files") - } - logrus.Infof("Found %d files to process", len(files)) - return files, nil -} - -// validateFileCollection validates the collected files against resource limits. -func (p *Processor) validateFileCollection(files []string) error { - if !config.GetResourceLimitsEnabled() { - return nil - } - - // Check file count limit - maxFiles := config.GetMaxFiles() - if len(files) > maxFiles { - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeResourceLimitFiles, - fmt.Sprintf("file count (%d) exceeds maximum limit (%d)", len(files), maxFiles), - "", - map[string]interface{}{ - "file_count": len(files), - "max_files": maxFiles, - }, - ) - } - - // Check total size limit (estimate) - maxTotalSize := config.GetMaxTotalSize() - totalSize := int64(0) - oversizedFiles := 0 - - for _, filePath := range files { - if fileInfo, err := os.Stat(filePath); err == nil { - totalSize += fileInfo.Size() - if totalSize > maxTotalSize { - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeResourceLimitTotalSize, - fmt.Sprintf("total file size (%d bytes) would exceed maximum limit (%d bytes)", totalSize, maxTotalSize), - "", - map[string]interface{}{ - "total_size": totalSize, - "max_total_size": maxTotalSize, - "files_checked": len(files), - }, - ) - } - } else { - oversizedFiles++ - } - } - - if oversizedFiles > 0 { - logrus.Warnf("Could not stat %d files during pre-validation", oversizedFiles) - } - - logrus.Infof("Pre-validation passed: %d files, %d MB total", len(files), totalSize/1024/1024) - return nil -} - -// processFiles processes the collected files. -func (p *Processor) processFiles(ctx context.Context, files []string) error { - outFile, err := p.createOutputFile() - if err != nil { - return err - } - defer func() { - utils.LogError("Error closing output file", outFile.Close()) - }() - - // Initialize back-pressure and channels - p.ui.PrintInfo("⚙️ Initializing processing...") - p.backpressure.LogBackpressureInfo() - fileCh, writeCh := p.backpressure.CreateChannels() - writerDone := make(chan struct{}) - - // Start writer - go fileproc.StartWriter(outFile, writeCh, writerDone, p.flags.Format, p.flags.Prefix, p.flags.Suffix) - - // Start workers - var wg sync.WaitGroup - p.startWorkers(ctx, &wg, fileCh, writeCh) - - // Start progress bar - p.ui.StartProgress(len(files), "📝 Processing files") - - // Send files to workers - if err := p.sendFiles(ctx, files, fileCh); err != nil { - p.ui.FinishProgress() - return err - } - - // Wait for completion - p.waitForCompletion(&wg, writeCh, writerDone) - p.ui.FinishProgress() - - p.logFinalStats() - p.ui.PrintSuccess("Processing completed. Output saved to %s", p.flags.Destination) - return nil -} - -// createOutputFile creates the output file. -func (p *Processor) createOutputFile() (*os.File, error) { - // Destination path has been validated in CLI flags validation for path traversal attempts - outFile, err := os.Create(p.flags.Destination) // #nosec G304 - destination is validated in flags.validate() - if err != nil { - return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create output file").WithFilePath(p.flags.Destination) - } - return outFile, nil -} - -// startWorkers starts the worker goroutines. -func (p *Processor) startWorkers(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) { - for range p.flags.Concurrency { - wg.Add(1) - go p.worker(ctx, wg, fileCh, writeCh) - } -} - -// worker is the worker goroutine function. -func (p *Processor) worker(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) { - defer wg.Done() - for { - select { - case <-ctx.Done(): - return - case filePath, ok := <-fileCh: - if !ok { - return - } - p.processFile(ctx, filePath, writeCh) - } - } -} - -// processFile processes a single file with resource monitoring. -func (p *Processor) processFile(ctx context.Context, filePath string, writeCh chan fileproc.WriteRequest) { - // Check for emergency stop - if p.resourceMonitor.IsEmergencyStopActive() { - logrus.Warnf("Emergency stop active, skipping file: %s", filePath) - return - } - - absRoot, err := utils.GetAbsolutePath(p.flags.SourceDir) - if err != nil { - utils.LogError("Failed to get absolute path", err) - return - } - - // Use the resource monitor-aware processing - fileproc.ProcessFileWithMonitor(ctx, filePath, writeCh, absRoot, p.resourceMonitor) - - // Update progress bar - p.ui.UpdateProgress(1) -} - -// sendFiles sends files to the worker channels with back-pressure handling. -func (p *Processor) sendFiles(ctx context.Context, files []string, fileCh chan string) error { - defer close(fileCh) - - for _, fp := range files { - // Check if we should apply back-pressure - if p.backpressure.ShouldApplyBackpressure(ctx) { - p.backpressure.ApplyBackpressure(ctx) - } - - // Wait for channel space if needed - p.backpressure.WaitForChannelSpace(ctx, fileCh, nil) - - select { - case <-ctx.Done(): - return ctx.Err() - case fileCh <- fp: - } - } - return nil -} - -// waitForCompletion waits for all workers to complete. -func (p *Processor) waitForCompletion(wg *sync.WaitGroup, writeCh chan fileproc.WriteRequest, writerDone chan struct{}) { - wg.Wait() - close(writeCh) - <-writerDone -} - -// logFinalStats logs the final back-pressure and resource monitoring statistics. -func (p *Processor) logFinalStats() { - // Log back-pressure stats - backpressureStats := p.backpressure.GetStats() - if backpressureStats.Enabled { - logrus.Infof("Back-pressure stats: processed=%d files, memory=%dMB/%dMB", - backpressureStats.FilesProcessed, backpressureStats.CurrentMemoryUsage/1024/1024, backpressureStats.MaxMemoryUsage/1024/1024) - } - - // Log resource monitoring stats - resourceStats := p.resourceMonitor.GetMetrics() - if config.GetResourceLimitsEnabled() { - logrus.Infof("Resource stats: processed=%d files, totalSize=%dMB, avgFileSize=%.2fKB, rate=%.2f files/sec", - resourceStats.FilesProcessed, resourceStats.TotalSizeProcessed/1024/1024, - resourceStats.AverageFileSize/1024, resourceStats.ProcessingRate) - - if len(resourceStats.ViolationsDetected) > 0 { - logrus.Warnf("Resource violations detected: %v", resourceStats.ViolationsDetected) - } - - if resourceStats.DegradationActive { - logrus.Warnf("Processing completed with degradation mode active") - } - - if resourceStats.EmergencyStopActive { - logrus.Errorf("Processing completed with emergency stop active") - } - } - - // Clean up resource monitor - p.resourceMonitor.Close() -} diff --git a/cli/processor_collection.go b/cli/processor_collection.go new file mode 100644 index 0000000..cd8be10 --- /dev/null +++ b/cli/processor_collection.go @@ -0,0 +1,77 @@ +package cli + +import ( + "fmt" + "os" + + "github.com/sirupsen/logrus" + + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/fileproc" + "github.com/ivuorinen/gibidify/utils" +) + +// collectFiles collects all files to be processed. +func (p *Processor) collectFiles() ([]string, error) { + files, err := fileproc.CollectFiles(p.flags.SourceDir) + if err != nil { + return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "error collecting files") + } + logrus.Infof("Found %d files to process", len(files)) + return files, nil +} + +// validateFileCollection validates the collected files against resource limits. +func (p *Processor) validateFileCollection(files []string) error { + if !config.GetResourceLimitsEnabled() { + return nil + } + + // Check file count limit + maxFiles := config.GetMaxFiles() + if len(files) > maxFiles { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitFiles, + fmt.Sprintf("file count (%d) exceeds maximum limit (%d)", len(files), maxFiles), + "", + map[string]interface{}{ + "file_count": len(files), + "max_files": maxFiles, + }, + ) + } + + // Check total size limit (estimate) + maxTotalSize := config.GetMaxTotalSize() + totalSize := int64(0) + oversizedFiles := 0 + + for _, filePath := range files { + if fileInfo, err := os.Stat(filePath); err == nil { + totalSize += fileInfo.Size() + if totalSize > maxTotalSize { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitTotalSize, + fmt.Sprintf("total file size (%d bytes) would exceed maximum limit (%d bytes)", totalSize, maxTotalSize), + "", + map[string]interface{}{ + "total_size": totalSize, + "max_total_size": maxTotalSize, + "files_checked": len(files), + }, + ) + } + } else { + oversizedFiles++ + } + } + + if oversizedFiles > 0 { + logrus.Warnf("Could not stat %d files during pre-validation", oversizedFiles) + } + + logrus.Infof("Pre-validation passed: %d files, %d MB total", len(files), totalSize/1024/1024) + return nil +} \ No newline at end of file diff --git a/cli/processor_processing.go b/cli/processor_processing.go new file mode 100644 index 0000000..40962b0 --- /dev/null +++ b/cli/processor_processing.go @@ -0,0 +1,100 @@ +package cli + +import ( + "context" + "os" + "sync" + + "github.com/ivuorinen/gibidify/fileproc" + "github.com/ivuorinen/gibidify/utils" +) + +// Process executes the main file processing workflow. +func (p *Processor) Process(ctx context.Context) error { + // Create overall processing context with timeout + overallCtx, overallCancel := p.resourceMonitor.CreateOverallProcessingContext(ctx) + defer overallCancel() + + // Configure file type registry + p.configureFileTypes() + + // Print startup info with colors + p.ui.PrintHeader("🚀 Starting gibidify") + p.ui.PrintInfo("Format: %s", p.flags.Format) + p.ui.PrintInfo("Source: %s", p.flags.SourceDir) + p.ui.PrintInfo("Destination: %s", p.flags.Destination) + p.ui.PrintInfo("Workers: %d", p.flags.Concurrency) + + // Log resource monitoring configuration + p.resourceMonitor.LogResourceInfo() + p.backpressure.LogBackpressureInfo() + + // Collect files with progress indication + p.ui.PrintInfo("📁 Collecting files...") + files, err := p.collectFiles() + if err != nil { + return err + } + + // Show collection results + p.ui.PrintSuccess("Found %d files to process", len(files)) + + // Pre-validate file collection against resource limits + if err := p.validateFileCollection(files); err != nil { + return err + } + + // Process files with overall timeout + return p.processFiles(overallCtx, files) +} + +// processFiles processes the collected files. +func (p *Processor) processFiles(ctx context.Context, files []string) error { + outFile, err := p.createOutputFile() + if err != nil { + return err + } + defer func() { + utils.LogError("Error closing output file", outFile.Close()) + }() + + // Initialize back-pressure and channels + p.ui.PrintInfo("⚙️ Initializing processing...") + p.backpressure.LogBackpressureInfo() + fileCh, writeCh := p.backpressure.CreateChannels() + writerDone := make(chan struct{}) + + // Start writer + go fileproc.StartWriter(outFile, writeCh, writerDone, p.flags.Format, p.flags.Prefix, p.flags.Suffix) + + // Start workers + var wg sync.WaitGroup + p.startWorkers(ctx, &wg, fileCh, writeCh) + + // Start progress bar + p.ui.StartProgress(len(files), "📝 Processing files") + + // Send files to workers + if err := p.sendFiles(ctx, files, fileCh); err != nil { + p.ui.FinishProgress() + return err + } + + // Wait for completion + p.waitForCompletion(&wg, writeCh, writerDone) + p.ui.FinishProgress() + + p.logFinalStats() + p.ui.PrintSuccess("Processing completed. Output saved to %s", p.flags.Destination) + return nil +} + +// createOutputFile creates the output file. +func (p *Processor) createOutputFile() (*os.File, error) { + // Destination path has been validated in CLI flags validation for path traversal attempts + outFile, err := os.Create(p.flags.Destination) // #nosec G304 - destination is validated in flags.validate() + if err != nil { + return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create output file").WithFilePath(p.flags.Destination) + } + return outFile, nil +} \ No newline at end of file diff --git a/cli/processor_stats.go b/cli/processor_stats.go new file mode 100644 index 0000000..6ecd856 --- /dev/null +++ b/cli/processor_stats.go @@ -0,0 +1,40 @@ +package cli + +import ( + "github.com/sirupsen/logrus" + + "github.com/ivuorinen/gibidify/config" +) + +// logFinalStats logs the final back-pressure and resource monitoring statistics. +func (p *Processor) logFinalStats() { + // Log back-pressure stats + backpressureStats := p.backpressure.GetStats() + if backpressureStats.Enabled { + logrus.Infof("Back-pressure stats: processed=%d files, memory=%dMB/%dMB", + backpressureStats.FilesProcessed, backpressureStats.CurrentMemoryUsage/1024/1024, backpressureStats.MaxMemoryUsage/1024/1024) + } + + // Log resource monitoring stats + resourceStats := p.resourceMonitor.GetMetrics() + if config.GetResourceLimitsEnabled() { + logrus.Infof("Resource stats: processed=%d files, totalSize=%dMB, avgFileSize=%.2fKB, rate=%.2f files/sec", + resourceStats.FilesProcessed, resourceStats.TotalSizeProcessed/1024/1024, + resourceStats.AverageFileSize/1024, resourceStats.ProcessingRate) + + if len(resourceStats.ViolationsDetected) > 0 { + logrus.Warnf("Resource violations detected: %v", resourceStats.ViolationsDetected) + } + + if resourceStats.DegradationActive { + logrus.Warnf("Processing completed with degradation mode active") + } + + if resourceStats.EmergencyStopActive { + logrus.Errorf("Processing completed with emergency stop active") + } + } + + // Clean up resource monitor + p.resourceMonitor.Close() +} \ No newline at end of file diff --git a/cli/processor_types.go b/cli/processor_types.go new file mode 100644 index 0000000..e5d37e2 --- /dev/null +++ b/cli/processor_types.go @@ -0,0 +1,44 @@ +package cli + +import ( + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/fileproc" +) + +// Processor handles the main file processing logic. +type Processor struct { + flags *Flags + backpressure *fileproc.BackpressureManager + resourceMonitor *fileproc.ResourceMonitor + ui *UIManager +} + +// NewProcessor creates a new processor with the given flags. +func NewProcessor(flags *Flags) *Processor { + ui := NewUIManager() + + // Configure UI based on flags + ui.SetColorOutput(!flags.NoColors) + ui.SetProgressOutput(!flags.NoProgress) + + return &Processor{ + flags: flags, + backpressure: fileproc.NewBackpressureManager(), + resourceMonitor: fileproc.NewResourceMonitor(), + ui: ui, + } +} + +// configureFileTypes configures the file type registry. +func (p *Processor) configureFileTypes() { + if config.GetFileTypesEnabled() { + fileproc.ConfigureFromSettings( + config.GetCustomImageExtensions(), + config.GetCustomBinaryExtensions(), + config.GetCustomLanguages(), + config.GetDisabledImageExtensions(), + config.GetDisabledBinaryExtensions(), + config.GetDisabledLanguageExtensions(), + ) + } +} \ No newline at end of file diff --git a/cli/processor_workers.go b/cli/processor_workers.go new file mode 100644 index 0000000..ebfac43 --- /dev/null +++ b/cli/processor_workers.go @@ -0,0 +1,85 @@ +package cli + +import ( + "context" + "sync" + + "github.com/sirupsen/logrus" + + "github.com/ivuorinen/gibidify/fileproc" + "github.com/ivuorinen/gibidify/utils" +) + +// startWorkers starts the worker goroutines. +func (p *Processor) startWorkers(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) { + for range p.flags.Concurrency { + wg.Add(1) + go p.worker(ctx, wg, fileCh, writeCh) + } +} + +// worker is the worker goroutine function. +func (p *Processor) worker(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) { + defer wg.Done() + for { + select { + case <-ctx.Done(): + return + case filePath, ok := <-fileCh: + if !ok { + return + } + p.processFile(ctx, filePath, writeCh) + } + } +} + +// processFile processes a single file with resource monitoring. +func (p *Processor) processFile(ctx context.Context, filePath string, writeCh chan fileproc.WriteRequest) { + // Check for emergency stop + if p.resourceMonitor.IsEmergencyStopActive() { + logrus.Warnf("Emergency stop active, skipping file: %s", filePath) + return + } + + absRoot, err := utils.GetAbsolutePath(p.flags.SourceDir) + if err != nil { + utils.LogError("Failed to get absolute path", err) + return + } + + // Use the resource monitor-aware processing + fileproc.ProcessFileWithMonitor(ctx, filePath, writeCh, absRoot, p.resourceMonitor) + + // Update progress bar + p.ui.UpdateProgress(1) +} + +// sendFiles sends files to the worker channels with back-pressure handling. +func (p *Processor) sendFiles(ctx context.Context, files []string, fileCh chan string) error { + defer close(fileCh) + + for _, fp := range files { + // Check if we should apply back-pressure + if p.backpressure.ShouldApplyBackpressure(ctx) { + p.backpressure.ApplyBackpressure(ctx) + } + + // Wait for channel space if needed + p.backpressure.WaitForChannelSpace(ctx, fileCh, nil) + + select { + case <-ctx.Done(): + return ctx.Err() + case fileCh <- fp: + } + } + return nil +} + +// waitForCompletion waits for all workers to complete. +func (p *Processor) waitForCompletion(wg *sync.WaitGroup, writeCh chan fileproc.WriteRequest, writerDone chan struct{}) { + wg.Wait() + close(writeCh) + <-writerDone +} \ No newline at end of file diff --git a/config/config.go b/config/config.go index f8f34c3..e4cdbd6 100644 --- a/config/config.go +++ b/config/config.go @@ -1,596 +1,5 @@ // Package config handles application configuration using Viper. +// This file contains the main configuration orchestration logic. package config -import ( - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/sirupsen/logrus" - "github.com/spf13/viper" - - "github.com/ivuorinen/gibidify/utils" -) - -const ( - // DefaultFileSizeLimit is the default maximum file size (5MB). - DefaultFileSizeLimit = 5242880 - // MinFileSizeLimit is the minimum allowed file size limit (1KB). - MinFileSizeLimit = 1024 - // MaxFileSizeLimit is the maximum allowed file size limit (100MB). - MaxFileSizeLimit = 104857600 - - // Resource Limit Constants - - // DefaultMaxFiles is the default maximum number of files to process. - DefaultMaxFiles = 10000 - // MinMaxFiles is the minimum allowed file count limit. - MinMaxFiles = 1 - // MaxMaxFiles is the maximum allowed file count limit. - MaxMaxFiles = 1000000 - - // DefaultMaxTotalSize is the default maximum total size of files (1GB). - DefaultMaxTotalSize = 1073741824 - // MinMaxTotalSize is the minimum allowed total size limit (1MB). - MinMaxTotalSize = 1048576 - // MaxMaxTotalSize is the maximum allowed total size limit (100GB). - MaxMaxTotalSize = 107374182400 - - // DefaultFileProcessingTimeoutSec is the default timeout for individual file processing (30 seconds). - DefaultFileProcessingTimeoutSec = 30 - // MinFileProcessingTimeoutSec is the minimum allowed file processing timeout (1 second). - MinFileProcessingTimeoutSec = 1 - // MaxFileProcessingTimeoutSec is the maximum allowed file processing timeout (300 seconds). - MaxFileProcessingTimeoutSec = 300 - - // DefaultOverallTimeoutSec is the default timeout for overall processing (3600 seconds = 1 hour). - DefaultOverallTimeoutSec = 3600 - // MinOverallTimeoutSec is the minimum allowed overall timeout (10 seconds). - MinOverallTimeoutSec = 10 - // MaxOverallTimeoutSec is the maximum allowed overall timeout (86400 seconds = 24 hours). - MaxOverallTimeoutSec = 86400 - - // DefaultMaxConcurrentReads is the default maximum concurrent file reading operations. - DefaultMaxConcurrentReads = 10 - // MinMaxConcurrentReads is the minimum allowed concurrent reads. - MinMaxConcurrentReads = 1 - // MaxMaxConcurrentReads is the maximum allowed concurrent reads. - MaxMaxConcurrentReads = 100 - - // DefaultRateLimitFilesPerSec is the default rate limit for file processing (0 = disabled). - DefaultRateLimitFilesPerSec = 0 - // MinRateLimitFilesPerSec is the minimum rate limit. - MinRateLimitFilesPerSec = 0 - // MaxRateLimitFilesPerSec is the maximum rate limit. - MaxRateLimitFilesPerSec = 10000 - - // DefaultHardMemoryLimitMB is the default hard memory limit (512MB). - DefaultHardMemoryLimitMB = 512 - // MinHardMemoryLimitMB is the minimum hard memory limit (64MB). - MinHardMemoryLimitMB = 64 - // MaxHardMemoryLimitMB is the maximum hard memory limit (8192MB = 8GB). - MaxHardMemoryLimitMB = 8192 -) - -// LoadConfig reads configuration from a YAML file. -// It looks for config in the following order: -// 1. $XDG_CONFIG_HOME/gibidify/config.yaml -// 2. $HOME/.config/gibidify/config.yaml -// 3. The current directory as fallback. -func LoadConfig() { - viper.SetConfigName("config") - viper.SetConfigType("yaml") - - if xdgConfig := os.Getenv("XDG_CONFIG_HOME"); xdgConfig != "" { - // Validate XDG_CONFIG_HOME for path traversal attempts - if err := utils.ValidateConfigPath(xdgConfig); err != nil { - logrus.Warnf("Invalid XDG_CONFIG_HOME path, using default config: %v", err) - } else { - configPath := filepath.Join(xdgConfig, "gibidify") - viper.AddConfigPath(configPath) - } - } else if home, err := os.UserHomeDir(); err == nil { - viper.AddConfigPath(filepath.Join(home, ".config", "gibidify")) - } - // Only add current directory if no config file named gibidify.yaml exists - // to avoid conflicts with the project's output file - if _, err := os.Stat("gibidify.yaml"); os.IsNotExist(err) { - viper.AddConfigPath(".") - } - - if err := viper.ReadInConfig(); err != nil { - logrus.Infof("Config file not found, using default values: %v", err) - setDefaultConfig() - } else { - logrus.Infof("Using config file: %s", viper.ConfigFileUsed()) - // Validate configuration after loading - if err := ValidateConfig(); err != nil { - logrus.Warnf("Configuration validation failed: %v", err) - logrus.Info("Falling back to default configuration") - // Reset viper and set defaults when validation fails - viper.Reset() - setDefaultConfig() - } - } -} - -// setDefaultConfig sets default configuration values. -func setDefaultConfig() { - viper.SetDefault("fileSizeLimit", DefaultFileSizeLimit) - // Default ignored directories. - viper.SetDefault("ignoreDirectories", []string{ - "vendor", "node_modules", ".git", "dist", "build", "target", "bower_components", "cache", "tmp", - }) - - // FileTypeRegistry defaults - viper.SetDefault("fileTypes.enabled", true) - viper.SetDefault("fileTypes.customImageExtensions", []string{}) - viper.SetDefault("fileTypes.customBinaryExtensions", []string{}) - viper.SetDefault("fileTypes.customLanguages", map[string]string{}) - viper.SetDefault("fileTypes.disabledImageExtensions", []string{}) - viper.SetDefault("fileTypes.disabledBinaryExtensions", []string{}) - viper.SetDefault("fileTypes.disabledLanguageExtensions", []string{}) - - // Back-pressure and memory management defaults - viper.SetDefault("backpressure.enabled", true) - viper.SetDefault("backpressure.maxPendingFiles", 1000) // Max files in file channel buffer - viper.SetDefault("backpressure.maxPendingWrites", 100) // Max writes in write channel buffer - viper.SetDefault("backpressure.maxMemoryUsage", 104857600) // 100MB max memory usage - viper.SetDefault("backpressure.memoryCheckInterval", 1000) // Check memory every 1000 files - - // Resource limit defaults - viper.SetDefault("resourceLimits.enabled", true) - viper.SetDefault("resourceLimits.maxFiles", DefaultMaxFiles) - viper.SetDefault("resourceLimits.maxTotalSize", DefaultMaxTotalSize) - viper.SetDefault("resourceLimits.fileProcessingTimeoutSec", DefaultFileProcessingTimeoutSec) - viper.SetDefault("resourceLimits.overallTimeoutSec", DefaultOverallTimeoutSec) - viper.SetDefault("resourceLimits.maxConcurrentReads", DefaultMaxConcurrentReads) - viper.SetDefault("resourceLimits.rateLimitFilesPerSec", DefaultRateLimitFilesPerSec) - viper.SetDefault("resourceLimits.hardMemoryLimitMB", DefaultHardMemoryLimitMB) - viper.SetDefault("resourceLimits.enableGracefulDegradation", true) - viper.SetDefault("resourceLimits.enableResourceMonitoring", true) -} - -// GetFileSizeLimit returns the file size limit from configuration. -func GetFileSizeLimit() int64 { - return viper.GetInt64("fileSizeLimit") -} - -// GetIgnoredDirectories returns the list of directories to ignore. -func GetIgnoredDirectories() []string { - return viper.GetStringSlice("ignoreDirectories") -} - -// ValidateConfig validates the loaded configuration. -func ValidateConfig() error { - var validationErrors []string - - // Validate file size limit - fileSizeLimit := viper.GetInt64("fileSizeLimit") - if fileSizeLimit < MinFileSizeLimit { - validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) is below minimum (%d)", fileSizeLimit, MinFileSizeLimit)) - } - if fileSizeLimit > MaxFileSizeLimit { - validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) exceeds maximum (%d)", fileSizeLimit, MaxFileSizeLimit)) - } - - // Validate ignore directories - ignoreDirectories := viper.GetStringSlice("ignoreDirectories") - for i, dir := range ignoreDirectories { - dir = strings.TrimSpace(dir) - if dir == "" { - validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] is empty", i)) - continue - } - if strings.Contains(dir, "/") { - validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) contains path separator - only directory names are allowed", i, dir)) - } - if strings.HasPrefix(dir, ".") && dir != ".git" && dir != ".vscode" && dir != ".idea" { - validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) starts with dot - this may cause unexpected behavior", i, dir)) - } - } - - // Validate supported output formats if configured - if viper.IsSet("supportedFormats") { - supportedFormats := viper.GetStringSlice("supportedFormats") - validFormats := map[string]bool{"json": true, "yaml": true, "markdown": true} - for i, format := range supportedFormats { - format = strings.ToLower(strings.TrimSpace(format)) - if !validFormats[format] { - validationErrors = append(validationErrors, fmt.Sprintf("supportedFormats[%d] (%s) is not a valid format (json, yaml, markdown)", i, format)) - } - } - } - - // Validate concurrency settings if configured - if viper.IsSet("maxConcurrency") { - maxConcurrency := viper.GetInt("maxConcurrency") - if maxConcurrency < 1 { - validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) must be at least 1", maxConcurrency)) - } - if maxConcurrency > 100 { - validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) is unreasonably high (max 100)", maxConcurrency)) - } - } - - // Validate file patterns if configured - if viper.IsSet("filePatterns") { - filePatterns := viper.GetStringSlice("filePatterns") - for i, pattern := range filePatterns { - pattern = strings.TrimSpace(pattern) - if pattern == "" { - validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] is empty", i)) - continue - } - // Basic validation - patterns should contain at least one alphanumeric character - if !strings.ContainsAny(pattern, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") { - validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] (%s) appears to be invalid", i, pattern)) - } - } - } - - // Validate FileTypeRegistry configuration - if viper.IsSet("fileTypes.customImageExtensions") { - customImages := viper.GetStringSlice("fileTypes.customImageExtensions") - for i, ext := range customImages { - ext = strings.TrimSpace(ext) - if ext == "" { - validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] is empty", i)) - continue - } - if !strings.HasPrefix(ext, ".") { - validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] (%s) must start with a dot", i, ext)) - } - } - } - - if viper.IsSet("fileTypes.customBinaryExtensions") { - customBinary := viper.GetStringSlice("fileTypes.customBinaryExtensions") - for i, ext := range customBinary { - ext = strings.TrimSpace(ext) - if ext == "" { - validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] is empty", i)) - continue - } - if !strings.HasPrefix(ext, ".") { - validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] (%s) must start with a dot", i, ext)) - } - } - } - - if viper.IsSet("fileTypes.customLanguages") { - customLangs := viper.GetStringMapString("fileTypes.customLanguages") - for ext, lang := range customLangs { - ext = strings.TrimSpace(ext) - lang = strings.TrimSpace(lang) - if ext == "" { - validationErrors = append(validationErrors, "fileTypes.customLanguages contains empty extension key") - continue - } - if !strings.HasPrefix(ext, ".") { - validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages extension (%s) must start with a dot", ext)) - } - if lang == "" { - validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages[%s] has empty language value", ext)) - } - } - } - - // Validate back-pressure configuration - if viper.IsSet("backpressure.maxPendingFiles") { - maxPendingFiles := viper.GetInt("backpressure.maxPendingFiles") - if maxPendingFiles < 1 { - validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) must be at least 1", maxPendingFiles)) - } - if maxPendingFiles > 100000 { - validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) is unreasonably high (max 100000)", maxPendingFiles)) - } - } - - if viper.IsSet("backpressure.maxPendingWrites") { - maxPendingWrites := viper.GetInt("backpressure.maxPendingWrites") - if maxPendingWrites < 1 { - validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) must be at least 1", maxPendingWrites)) - } - if maxPendingWrites > 10000 { - validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) is unreasonably high (max 10000)", maxPendingWrites)) - } - } - - if viper.IsSet("backpressure.maxMemoryUsage") { - maxMemoryUsage := viper.GetInt64("backpressure.maxMemoryUsage") - if maxMemoryUsage < 1048576 { // 1MB minimum - validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) must be at least 1MB (1048576 bytes)", maxMemoryUsage)) - } - if maxMemoryUsage > 10737418240 { // 10GB maximum - validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) is unreasonably high (max 10GB)", maxMemoryUsage)) - } - } - - if viper.IsSet("backpressure.memoryCheckInterval") { - interval := viper.GetInt("backpressure.memoryCheckInterval") - if interval < 1 { - validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) must be at least 1", interval)) - } - if interval > 100000 { - validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) is unreasonably high (max 100000)", interval)) - } - } - - // Validate resource limits configuration - if viper.IsSet("resourceLimits.maxFiles") { - maxFiles := viper.GetInt("resourceLimits.maxFiles") - if maxFiles < MinMaxFiles { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxFiles (%d) must be at least %d", maxFiles, MinMaxFiles)) - } - if maxFiles > MaxMaxFiles { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxFiles (%d) exceeds maximum (%d)", maxFiles, MaxMaxFiles)) - } - } - - if viper.IsSet("resourceLimits.maxTotalSize") { - maxTotalSize := viper.GetInt64("resourceLimits.maxTotalSize") - if maxTotalSize < MinMaxTotalSize { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxTotalSize (%d) must be at least %d", maxTotalSize, MinMaxTotalSize)) - } - if maxTotalSize > MaxMaxTotalSize { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxTotalSize (%d) exceeds maximum (%d)", maxTotalSize, MaxMaxTotalSize)) - } - } - - if viper.IsSet("resourceLimits.fileProcessingTimeoutSec") { - timeout := viper.GetInt("resourceLimits.fileProcessingTimeoutSec") - if timeout < MinFileProcessingTimeoutSec { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.fileProcessingTimeoutSec (%d) must be at least %d", timeout, MinFileProcessingTimeoutSec)) - } - if timeout > MaxFileProcessingTimeoutSec { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.fileProcessingTimeoutSec (%d) exceeds maximum (%d)", timeout, MaxFileProcessingTimeoutSec)) - } - } - - if viper.IsSet("resourceLimits.overallTimeoutSec") { - timeout := viper.GetInt("resourceLimits.overallTimeoutSec") - if timeout < MinOverallTimeoutSec { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.overallTimeoutSec (%d) must be at least %d", timeout, MinOverallTimeoutSec)) - } - if timeout > MaxOverallTimeoutSec { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.overallTimeoutSec (%d) exceeds maximum (%d)", timeout, MaxOverallTimeoutSec)) - } - } - - if viper.IsSet("resourceLimits.maxConcurrentReads") { - maxReads := viper.GetInt("resourceLimits.maxConcurrentReads") - if maxReads < MinMaxConcurrentReads { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxConcurrentReads (%d) must be at least %d", maxReads, MinMaxConcurrentReads)) - } - if maxReads > MaxMaxConcurrentReads { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxConcurrentReads (%d) exceeds maximum (%d)", maxReads, MaxMaxConcurrentReads)) - } - } - - if viper.IsSet("resourceLimits.rateLimitFilesPerSec") { - rateLimit := viper.GetInt("resourceLimits.rateLimitFilesPerSec") - if rateLimit < MinRateLimitFilesPerSec { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.rateLimitFilesPerSec (%d) must be at least %d", rateLimit, MinRateLimitFilesPerSec)) - } - if rateLimit > MaxRateLimitFilesPerSec { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.rateLimitFilesPerSec (%d) exceeds maximum (%d)", rateLimit, MaxRateLimitFilesPerSec)) - } - } - - if viper.IsSet("resourceLimits.hardMemoryLimitMB") { - memLimit := viper.GetInt("resourceLimits.hardMemoryLimitMB") - if memLimit < MinHardMemoryLimitMB { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.hardMemoryLimitMB (%d) must be at least %d", memLimit, MinHardMemoryLimitMB)) - } - if memLimit > MaxHardMemoryLimitMB { - validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.hardMemoryLimitMB (%d) exceeds maximum (%d)", memLimit, MaxHardMemoryLimitMB)) - } - } - - if len(validationErrors) > 0 { - return utils.NewStructuredError( - utils.ErrorTypeConfiguration, - utils.CodeConfigValidation, - "configuration validation failed: "+strings.Join(validationErrors, "; "), - "", - map[string]interface{}{"validation_errors": validationErrors}, - ) - } - - return nil -} - -// GetMaxConcurrency returns the maximum concurrency limit from configuration. -func GetMaxConcurrency() int { - return viper.GetInt("maxConcurrency") -} - -// GetSupportedFormats returns the supported output formats from configuration. -func GetSupportedFormats() []string { - return viper.GetStringSlice("supportedFormats") -} - -// GetFilePatterns returns the file patterns from configuration. -func GetFilePatterns() []string { - return viper.GetStringSlice("filePatterns") -} - -// IsValidFormat checks if a format is supported. -func IsValidFormat(format string) bool { - format = strings.ToLower(strings.TrimSpace(format)) - validFormats := map[string]bool{"json": true, "yaml": true, "markdown": true} - return validFormats[format] -} - -// ValidateFileSize checks if a file size is within the configured limit. -func ValidateFileSize(size int64) error { - limit := GetFileSizeLimit() - if size > limit { - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeValidationSize, - fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", size, limit), - "", - map[string]interface{}{"file_size": size, "size_limit": limit}, - ) - } - return nil -} - -// ValidateOutputFormat checks if an output format is valid. -func ValidateOutputFormat(format string) error { - if !IsValidFormat(format) { - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeValidationFormat, - fmt.Sprintf("unsupported output format: %s (supported: json, yaml, markdown)", format), - "", - map[string]interface{}{"format": format}, - ) - } - return nil -} - -// ValidateConcurrency checks if a concurrency level is valid. -func ValidateConcurrency(concurrency int) error { - if concurrency < 1 { - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeValidationFormat, - fmt.Sprintf("concurrency (%d) must be at least 1", concurrency), - "", - map[string]interface{}{"concurrency": concurrency}, - ) - } - - if viper.IsSet("maxConcurrency") { - maxConcurrency := GetMaxConcurrency() - if concurrency > maxConcurrency { - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeValidationFormat, - fmt.Sprintf("concurrency (%d) exceeds maximum (%d)", concurrency, maxConcurrency), - "", - map[string]interface{}{"concurrency": concurrency, "max_concurrency": maxConcurrency}, - ) - } - } - - return nil -} - -// GetFileTypesEnabled returns whether file type detection is enabled. -func GetFileTypesEnabled() bool { - return viper.GetBool("fileTypes.enabled") -} - -// GetCustomImageExtensions returns custom image extensions from configuration. -func GetCustomImageExtensions() []string { - return viper.GetStringSlice("fileTypes.customImageExtensions") -} - -// GetCustomBinaryExtensions returns custom binary extensions from configuration. -func GetCustomBinaryExtensions() []string { - return viper.GetStringSlice("fileTypes.customBinaryExtensions") -} - -// GetCustomLanguages returns custom language mappings from configuration. -func GetCustomLanguages() map[string]string { - return viper.GetStringMapString("fileTypes.customLanguages") -} - -// GetDisabledImageExtensions returns disabled image extensions from configuration. -func GetDisabledImageExtensions() []string { - return viper.GetStringSlice("fileTypes.disabledImageExtensions") -} - -// GetDisabledBinaryExtensions returns disabled binary extensions from configuration. -func GetDisabledBinaryExtensions() []string { - return viper.GetStringSlice("fileTypes.disabledBinaryExtensions") -} - -// GetDisabledLanguageExtensions returns disabled language extensions from configuration. -func GetDisabledLanguageExtensions() []string { - return viper.GetStringSlice("fileTypes.disabledLanguageExtensions") -} - -// Back-pressure configuration getters - -// GetBackpressureEnabled returns whether back-pressure management is enabled. -func GetBackpressureEnabled() bool { - return viper.GetBool("backpressure.enabled") -} - -// GetMaxPendingFiles returns the maximum number of files that can be pending in the file channel. -func GetMaxPendingFiles() int { - return viper.GetInt("backpressure.maxPendingFiles") -} - -// GetMaxPendingWrites returns the maximum number of writes that can be pending in the write channel. -func GetMaxPendingWrites() int { - return viper.GetInt("backpressure.maxPendingWrites") -} - -// GetMaxMemoryUsage returns the maximum memory usage in bytes before back-pressure kicks in. -func GetMaxMemoryUsage() int64 { - return viper.GetInt64("backpressure.maxMemoryUsage") -} - -// GetMemoryCheckInterval returns how often to check memory usage (in number of files processed). -func GetMemoryCheckInterval() int { - return viper.GetInt("backpressure.memoryCheckInterval") -} - -// Resource Limit Configuration Getters - -// GetResourceLimitsEnabled returns whether resource limits are enabled. -func GetResourceLimitsEnabled() bool { - return viper.GetBool("resourceLimits.enabled") -} - -// GetMaxFiles returns the maximum number of files that can be processed. -func GetMaxFiles() int { - return viper.GetInt("resourceLimits.maxFiles") -} - -// GetMaxTotalSize returns the maximum total size of files that can be processed. -func GetMaxTotalSize() int64 { - return viper.GetInt64("resourceLimits.maxTotalSize") -} - -// GetFileProcessingTimeoutSec returns the timeout for individual file processing in seconds. -func GetFileProcessingTimeoutSec() int { - return viper.GetInt("resourceLimits.fileProcessingTimeoutSec") -} - -// GetOverallTimeoutSec returns the timeout for overall processing in seconds. -func GetOverallTimeoutSec() int { - return viper.GetInt("resourceLimits.overallTimeoutSec") -} - -// GetMaxConcurrentReads returns the maximum number of concurrent file reading operations. -func GetMaxConcurrentReads() int { - return viper.GetInt("resourceLimits.maxConcurrentReads") -} - -// GetRateLimitFilesPerSec returns the rate limit for file processing (files per second). -func GetRateLimitFilesPerSec() int { - return viper.GetInt("resourceLimits.rateLimitFilesPerSec") -} - -// GetHardMemoryLimitMB returns the hard memory limit in megabytes. -func GetHardMemoryLimitMB() int { - return viper.GetInt("resourceLimits.hardMemoryLimitMB") -} - -// GetEnableGracefulDegradation returns whether graceful degradation is enabled. -func GetEnableGracefulDegradation() bool { - return viper.GetBool("resourceLimits.enableGracefulDegradation") -} - -// GetEnableResourceMonitoring returns whether resource monitoring is enabled. -func GetEnableResourceMonitoring() bool { - return viper.GetBool("resourceLimits.enableResourceMonitoring") -} +// This file is now a minimal orchestration layer that delegates to the modular components. diff --git a/config/constants.go b/config/constants.go new file mode 100644 index 0000000..8f54bbe --- /dev/null +++ b/config/constants.go @@ -0,0 +1,61 @@ +package config + +const ( + // DefaultFileSizeLimit is the default maximum file size (5MB). + DefaultFileSizeLimit = 5242880 + // MinFileSizeLimit is the minimum allowed file size limit (1KB). + MinFileSizeLimit = 1024 + // MaxFileSizeLimit is the maximum allowed file size limit (100MB). + MaxFileSizeLimit = 104857600 + + // Resource Limit Constants + + // DefaultMaxFiles is the default maximum number of files to process. + DefaultMaxFiles = 10000 + // MinMaxFiles is the minimum allowed file count limit. + MinMaxFiles = 1 + // MaxMaxFiles is the maximum allowed file count limit. + MaxMaxFiles = 1000000 + + // DefaultMaxTotalSize is the default maximum total size of files (1GB). + DefaultMaxTotalSize = 1073741824 + // MinMaxTotalSize is the minimum allowed total size limit (1MB). + MinMaxTotalSize = 1048576 + // MaxMaxTotalSize is the maximum allowed total size limit (100GB). + MaxMaxTotalSize = 107374182400 + + // DefaultFileProcessingTimeoutSec is the default timeout for individual file processing (30 seconds). + DefaultFileProcessingTimeoutSec = 30 + // MinFileProcessingTimeoutSec is the minimum allowed file processing timeout (1 second). + MinFileProcessingTimeoutSec = 1 + // MaxFileProcessingTimeoutSec is the maximum allowed file processing timeout (300 seconds). + MaxFileProcessingTimeoutSec = 300 + + // DefaultOverallTimeoutSec is the default timeout for overall processing (3600 seconds = 1 hour). + DefaultOverallTimeoutSec = 3600 + // MinOverallTimeoutSec is the minimum allowed overall timeout (10 seconds). + MinOverallTimeoutSec = 10 + // MaxOverallTimeoutSec is the maximum allowed overall timeout (86400 seconds = 24 hours). + MaxOverallTimeoutSec = 86400 + + // DefaultMaxConcurrentReads is the default maximum concurrent file reading operations. + DefaultMaxConcurrentReads = 10 + // MinMaxConcurrentReads is the minimum allowed concurrent reads. + MinMaxConcurrentReads = 1 + // MaxMaxConcurrentReads is the maximum allowed concurrent reads. + MaxMaxConcurrentReads = 100 + + // DefaultRateLimitFilesPerSec is the default rate limit for file processing (0 = disabled). + DefaultRateLimitFilesPerSec = 0 + // MinRateLimitFilesPerSec is the minimum rate limit. + MinRateLimitFilesPerSec = 0 + // MaxRateLimitFilesPerSec is the maximum rate limit. + MaxRateLimitFilesPerSec = 10000 + + // DefaultHardMemoryLimitMB is the default hard memory limit (512MB). + DefaultHardMemoryLimitMB = 512 + // MinHardMemoryLimitMB is the minimum hard memory limit (64MB). + MinHardMemoryLimitMB = 64 + // MaxHardMemoryLimitMB is the maximum hard memory limit (8192MB = 8GB). + MaxHardMemoryLimitMB = 8192 +) \ No newline at end of file diff --git a/config/getters.go b/config/getters.go new file mode 100644 index 0000000..4bcc1b4 --- /dev/null +++ b/config/getters.go @@ -0,0 +1,157 @@ +package config + +import ( + "strings" + + "github.com/spf13/viper" +) + +// GetFileSizeLimit returns the file size limit from configuration. +func GetFileSizeLimit() int64 { + return viper.GetInt64("fileSizeLimit") +} + +// GetIgnoredDirectories returns the list of directories to ignore. +func GetIgnoredDirectories() []string { + return viper.GetStringSlice("ignoreDirectories") +} + +// GetMaxConcurrency returns the maximum concurrency level. +func GetMaxConcurrency() int { + return viper.GetInt("maxConcurrency") +} + +// GetSupportedFormats returns the list of supported output formats. +func GetSupportedFormats() []string { + return viper.GetStringSlice("supportedFormats") +} + +// GetFilePatterns returns the list of file patterns. +func GetFilePatterns() []string { + return viper.GetStringSlice("filePatterns") +} + +// IsValidFormat checks if the given format is valid. +func IsValidFormat(format string) bool { + format = strings.ToLower(strings.TrimSpace(format)) + supportedFormats := map[string]bool{ + "json": true, + "yaml": true, + "markdown": true, + } + return supportedFormats[format] +} + +// GetFileTypesEnabled returns whether file types are enabled. +func GetFileTypesEnabled() bool { + return viper.GetBool("fileTypes.enabled") +} + +// GetCustomImageExtensions returns custom image extensions. +func GetCustomImageExtensions() []string { + return viper.GetStringSlice("fileTypes.customImageExtensions") +} + +// GetCustomBinaryExtensions returns custom binary extensions. +func GetCustomBinaryExtensions() []string { + return viper.GetStringSlice("fileTypes.customBinaryExtensions") +} + +// GetCustomLanguages returns custom language mappings. +func GetCustomLanguages() map[string]string { + return viper.GetStringMapString("fileTypes.customLanguages") +} + +// GetDisabledImageExtensions returns disabled image extensions. +func GetDisabledImageExtensions() []string { + return viper.GetStringSlice("fileTypes.disabledImageExtensions") +} + +// GetDisabledBinaryExtensions returns disabled binary extensions. +func GetDisabledBinaryExtensions() []string { + return viper.GetStringSlice("fileTypes.disabledBinaryExtensions") +} + +// GetDisabledLanguageExtensions returns disabled language extensions. +func GetDisabledLanguageExtensions() []string { + return viper.GetStringSlice("fileTypes.disabledLanguageExtensions") +} + +// Backpressure getters + +// GetBackpressureEnabled returns whether backpressure is enabled. +func GetBackpressureEnabled() bool { + return viper.GetBool("backpressure.enabled") +} + +// GetMaxPendingFiles returns the maximum pending files. +func GetMaxPendingFiles() int { + return viper.GetInt("backpressure.maxPendingFiles") +} + +// GetMaxPendingWrites returns the maximum pending writes. +func GetMaxPendingWrites() int { + return viper.GetInt("backpressure.maxPendingWrites") +} + +// GetMaxMemoryUsage returns the maximum memory usage. +func GetMaxMemoryUsage() int64 { + return viper.GetInt64("backpressure.maxMemoryUsage") +} + +// GetMemoryCheckInterval returns the memory check interval. +func GetMemoryCheckInterval() int { + return viper.GetInt("backpressure.memoryCheckInterval") +} + +// Resource limits getters + +// GetResourceLimitsEnabled returns whether resource limits are enabled. +func GetResourceLimitsEnabled() bool { + return viper.GetBool("resourceLimits.enabled") +} + +// GetMaxFiles returns the maximum number of files. +func GetMaxFiles() int { + return viper.GetInt("resourceLimits.maxFiles") +} + +// GetMaxTotalSize returns the maximum total size. +func GetMaxTotalSize() int64 { + return viper.GetInt64("resourceLimits.maxTotalSize") +} + +// GetFileProcessingTimeoutSec returns the file processing timeout in seconds. +func GetFileProcessingTimeoutSec() int { + return viper.GetInt("resourceLimits.fileProcessingTimeoutSec") +} + +// GetOverallTimeoutSec returns the overall timeout in seconds. +func GetOverallTimeoutSec() int { + return viper.GetInt("resourceLimits.overallTimeoutSec") +} + +// GetMaxConcurrentReads returns the maximum concurrent reads. +func GetMaxConcurrentReads() int { + return viper.GetInt("resourceLimits.maxConcurrentReads") +} + +// GetRateLimitFilesPerSec returns the rate limit files per second. +func GetRateLimitFilesPerSec() int { + return viper.GetInt("resourceLimits.rateLimitFilesPerSec") +} + +// GetHardMemoryLimitMB returns the hard memory limit in MB. +func GetHardMemoryLimitMB() int { + return viper.GetInt("resourceLimits.hardMemoryLimitMB") +} + +// GetEnableGracefulDegradation returns whether graceful degradation is enabled. +func GetEnableGracefulDegradation() bool { + return viper.GetBool("resourceLimits.enableGracefulDegradation") +} + +// GetEnableResourceMonitoring returns whether resource monitoring is enabled. +func GetEnableResourceMonitoring() bool { + return viper.GetBool("resourceLimits.enableResourceMonitoring") +} \ No newline at end of file diff --git a/config/loader.go b/config/loader.go new file mode 100644 index 0000000..c7490b5 --- /dev/null +++ b/config/loader.go @@ -0,0 +1,90 @@ +package config + +import ( + "os" + "path/filepath" + + "github.com/sirupsen/logrus" + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/utils" +) + +// LoadConfig reads configuration from a YAML file. +// It looks for config in the following order: +// 1. $XDG_CONFIG_HOME/gibidify/config.yaml +// 2. $HOME/.config/gibidify/config.yaml +// 3. The current directory as fallback. +func LoadConfig() { + viper.SetConfigName("config") + viper.SetConfigType("yaml") + + if xdgConfig := os.Getenv("XDG_CONFIG_HOME"); xdgConfig != "" { + // Validate XDG_CONFIG_HOME for path traversal attempts + if err := utils.ValidateConfigPath(xdgConfig); err != nil { + logrus.Warnf("Invalid XDG_CONFIG_HOME path, using default config: %v", err) + } else { + configPath := filepath.Join(xdgConfig, "gibidify") + viper.AddConfigPath(configPath) + } + } else if home, err := os.UserHomeDir(); err == nil { + viper.AddConfigPath(filepath.Join(home, ".config", "gibidify")) + } + // Only add current directory if no config file named gibidify.yaml exists + // to avoid conflicts with the project's output file + if _, err := os.Stat("gibidify.yaml"); os.IsNotExist(err) { + viper.AddConfigPath(".") + } + + if err := viper.ReadInConfig(); err != nil { + logrus.Infof("Config file not found, using default values: %v", err) + setDefaultConfig() + } else { + logrus.Infof("Using config file: %s", viper.ConfigFileUsed()) + // Validate configuration after loading + if err := ValidateConfig(); err != nil { + logrus.Warnf("Configuration validation failed: %v", err) + logrus.Info("Falling back to default configuration") + // Reset viper and set defaults when validation fails + viper.Reset() + setDefaultConfig() + } + } +} + +// setDefaultConfig sets default configuration values. +func setDefaultConfig() { + viper.SetDefault("fileSizeLimit", DefaultFileSizeLimit) + // Default ignored directories. + viper.SetDefault("ignoreDirectories", []string{ + "vendor", "node_modules", ".git", "dist", "build", "target", "bower_components", "cache", "tmp", + }) + + // FileTypeRegistry defaults + viper.SetDefault("fileTypes.enabled", true) + viper.SetDefault("fileTypes.customImageExtensions", []string{}) + viper.SetDefault("fileTypes.customBinaryExtensions", []string{}) + viper.SetDefault("fileTypes.customLanguages", map[string]string{}) + viper.SetDefault("fileTypes.disabledImageExtensions", []string{}) + viper.SetDefault("fileTypes.disabledBinaryExtensions", []string{}) + viper.SetDefault("fileTypes.disabledLanguageExtensions", []string{}) + + // Back-pressure and memory management defaults + viper.SetDefault("backpressure.enabled", true) + viper.SetDefault("backpressure.maxPendingFiles", 1000) // Max files in file channel buffer + viper.SetDefault("backpressure.maxPendingWrites", 100) // Max writes in write channel buffer + viper.SetDefault("backpressure.maxMemoryUsage", 104857600) // 100MB max memory usage + viper.SetDefault("backpressure.memoryCheckInterval", 1000) // Check memory every 1000 files + + // Resource limit defaults + viper.SetDefault("resourceLimits.enabled", true) + viper.SetDefault("resourceLimits.maxFiles", DefaultMaxFiles) + viper.SetDefault("resourceLimits.maxTotalSize", DefaultMaxTotalSize) + viper.SetDefault("resourceLimits.fileProcessingTimeoutSec", DefaultFileProcessingTimeoutSec) + viper.SetDefault("resourceLimits.overallTimeoutSec", DefaultOverallTimeoutSec) + viper.SetDefault("resourceLimits.maxConcurrentReads", DefaultMaxConcurrentReads) + viper.SetDefault("resourceLimits.rateLimitFilesPerSec", DefaultRateLimitFilesPerSec) + viper.SetDefault("resourceLimits.hardMemoryLimitMB", DefaultHardMemoryLimitMB) + viper.SetDefault("resourceLimits.enableGracefulDegradation", true) + viper.SetDefault("resourceLimits.enableResourceMonitoring", true) +} \ No newline at end of file diff --git a/config/loader_test.go b/config/loader_test.go new file mode 100644 index 0000000..d1c5295 --- /dev/null +++ b/config/loader_test.go @@ -0,0 +1,120 @@ +package config_test + +import ( + "os" + "testing" + + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/testutil" +) + +const ( + defaultFileSizeLimit = 5242880 + testFileSizeLimit = 123456 +) + +// TestDefaultConfig verifies that if no config file is found, +// the default configuration values are correctly set. +func TestDefaultConfig(t *testing.T) { + // Create a temporary directory to ensure no config file is present. + tmpDir := t.TempDir() + + // Point Viper to the temp directory with no config file. + originalConfigPaths := viper.ConfigFileUsed() + testutil.ResetViperConfig(t, tmpDir) + + // Check defaults + defaultSizeLimit := config.GetFileSizeLimit() + if defaultSizeLimit != defaultFileSizeLimit { + t.Errorf("Expected default file size limit of 5242880, got %d", defaultSizeLimit) + } + + ignoredDirs := config.GetIgnoredDirectories() + if len(ignoredDirs) == 0 { + t.Errorf("Expected some default ignored directories, got none") + } + + // Restore Viper state + viper.SetConfigFile(originalConfigPaths) +} + +// TestLoadConfigFile verifies that when a valid config file is present, +// viper loads the specified values correctly. +func TestLoadConfigFile(t *testing.T) { + tmpDir := t.TempDir() + + // Prepare a minimal config file + configContent := []byte(`--- +fileSizeLimit: 123456 +ignoreDirectories: +- "testdir1" +- "testdir2" +`) + + testutil.CreateTestFile(t, tmpDir, "config.yaml", configContent) + + // Reset viper and point to the new config path + viper.Reset() + viper.AddConfigPath(tmpDir) + + // Force Viper to read our config file + testutil.MustSucceed(t, viper.ReadInConfig(), "reading config file") + + // Validate loaded data + if got := viper.GetInt64("fileSizeLimit"); got != testFileSizeLimit { + t.Errorf("Expected fileSizeLimit=123456, got %d", got) + } + + ignored := viper.GetStringSlice("ignoreDirectories") + if len(ignored) != 2 || ignored[0] != "testdir1" || ignored[1] != "testdir2" { + t.Errorf("Expected [\"testdir1\", \"testdir2\"], got %v", ignored) + } +} + +// TestLoadConfigWithValidation tests that invalid config files fall back to defaults. +func TestLoadConfigWithValidation(t *testing.T) { + // Create a temporary config file with invalid content + configContent := ` +fileSizeLimit: 100 +ignoreDirectories: + - node_modules + - "" + - .git +` + + tempDir := t.TempDir() + configFile := tempDir + "/config.yaml" + + err := os.WriteFile(configFile, []byte(configContent), 0o644) + if err != nil { + t.Fatalf("Failed to write config file: %v", err) + } + + // Reset viper and set config path + viper.Reset() + viper.AddConfigPath(tempDir) + + // This should load the config but validation should fail and fall back to defaults + config.LoadConfig() + + // Should have fallen back to defaults due to validation failure + if config.GetFileSizeLimit() != int64(config.DefaultFileSizeLimit) { + t.Errorf("Expected default file size limit after validation failure, got %d", config.GetFileSizeLimit()) + } + if containsString(config.GetIgnoredDirectories(), "") { + t.Errorf("Expected ignored directories not to contain empty string after validation failure, got %v", config.GetIgnoredDirectories()) + } +} + +// Helper functions + +func containsString(slice []string, item string) bool { + for _, s := range slice { + if s == item { + return true + } + } + return false +} \ No newline at end of file diff --git a/config/validation.go b/config/validation.go new file mode 100644 index 0000000..ed13319 --- /dev/null +++ b/config/validation.go @@ -0,0 +1,307 @@ +package config + +import ( + "fmt" + "strings" + + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/utils" +) + +// ValidateConfig validates the loaded configuration. +func ValidateConfig() error { + var validationErrors []string + + // Validate file size limit + fileSizeLimit := viper.GetInt64("fileSizeLimit") + if fileSizeLimit < MinFileSizeLimit { + validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) is below minimum (%d)", fileSizeLimit, MinFileSizeLimit)) + } + if fileSizeLimit > MaxFileSizeLimit { + validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) exceeds maximum (%d)", fileSizeLimit, MaxFileSizeLimit)) + } + + // Validate ignore directories + ignoreDirectories := viper.GetStringSlice("ignoreDirectories") + for i, dir := range ignoreDirectories { + dir = strings.TrimSpace(dir) + if dir == "" { + validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] is empty", i)) + continue + } + if strings.Contains(dir, "/") { + validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) contains path separator - only directory names are allowed", i, dir)) + } + if strings.HasPrefix(dir, ".") && dir != ".git" && dir != ".vscode" && dir != ".idea" { + validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) starts with dot - this may cause unexpected behavior", i, dir)) + } + } + + // Validate supported output formats if configured + if viper.IsSet("supportedFormats") { + supportedFormats := viper.GetStringSlice("supportedFormats") + validFormats := map[string]bool{"json": true, "yaml": true, "markdown": true} + for i, format := range supportedFormats { + format = strings.ToLower(strings.TrimSpace(format)) + if !validFormats[format] { + validationErrors = append(validationErrors, fmt.Sprintf("supportedFormats[%d] (%s) is not a valid format (json, yaml, markdown)", i, format)) + } + } + } + + // Validate concurrency settings if configured + if viper.IsSet("maxConcurrency") { + maxConcurrency := viper.GetInt("maxConcurrency") + if maxConcurrency < 1 { + validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) must be at least 1", maxConcurrency)) + } + if maxConcurrency > 100 { + validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) is unreasonably high (max 100)", maxConcurrency)) + } + } + + // Validate file patterns if configured + if viper.IsSet("filePatterns") { + filePatterns := viper.GetStringSlice("filePatterns") + for i, pattern := range filePatterns { + pattern = strings.TrimSpace(pattern) + if pattern == "" { + validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] is empty", i)) + continue + } + // Basic validation - patterns should contain at least one alphanumeric character + if !strings.ContainsAny(pattern, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") { + validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] (%s) appears to be invalid", i, pattern)) + } + } + } + + // Validate FileTypeRegistry configuration + if viper.IsSet("fileTypes.customImageExtensions") { + customImages := viper.GetStringSlice("fileTypes.customImageExtensions") + for i, ext := range customImages { + ext = strings.TrimSpace(ext) + if ext == "" { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] is empty", i)) + continue + } + if !strings.HasPrefix(ext, ".") { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] (%s) must start with a dot", i, ext)) + } + } + } + + if viper.IsSet("fileTypes.customBinaryExtensions") { + customBinary := viper.GetStringSlice("fileTypes.customBinaryExtensions") + for i, ext := range customBinary { + ext = strings.TrimSpace(ext) + if ext == "" { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] is empty", i)) + continue + } + if !strings.HasPrefix(ext, ".") { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] (%s) must start with a dot", i, ext)) + } + } + } + + if viper.IsSet("fileTypes.customLanguages") { + customLangs := viper.GetStringMapString("fileTypes.customLanguages") + for ext, lang := range customLangs { + ext = strings.TrimSpace(ext) + lang = strings.TrimSpace(lang) + if ext == "" { + validationErrors = append(validationErrors, "fileTypes.customLanguages contains empty extension key") + continue + } + if !strings.HasPrefix(ext, ".") { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages extension (%s) must start with a dot", ext)) + } + if lang == "" { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages[%s] has empty language value", ext)) + } + } + } + + // Validate back-pressure configuration + if viper.IsSet("backpressure.maxPendingFiles") { + maxPendingFiles := viper.GetInt("backpressure.maxPendingFiles") + if maxPendingFiles < 1 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) must be at least 1", maxPendingFiles)) + } + if maxPendingFiles > 100000 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) is unreasonably high (max 100000)", maxPendingFiles)) + } + } + + if viper.IsSet("backpressure.maxPendingWrites") { + maxPendingWrites := viper.GetInt("backpressure.maxPendingWrites") + if maxPendingWrites < 1 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) must be at least 1", maxPendingWrites)) + } + if maxPendingWrites > 10000 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) is unreasonably high (max 10000)", maxPendingWrites)) + } + } + + if viper.IsSet("backpressure.maxMemoryUsage") { + maxMemoryUsage := viper.GetInt64("backpressure.maxMemoryUsage") + if maxMemoryUsage < 1048576 { // 1MB minimum + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) must be at least 1MB (1048576 bytes)", maxMemoryUsage)) + } + if maxMemoryUsage > 10737418240 { // 10GB maximum + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) is unreasonably high (max 10GB)", maxMemoryUsage)) + } + } + + if viper.IsSet("backpressure.memoryCheckInterval") { + interval := viper.GetInt("backpressure.memoryCheckInterval") + if interval < 1 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) must be at least 1", interval)) + } + if interval > 100000 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) is unreasonably high (max 100000)", interval)) + } + } + + // Validate resource limits configuration + if viper.IsSet("resourceLimits.maxFiles") { + maxFiles := viper.GetInt("resourceLimits.maxFiles") + if maxFiles < MinMaxFiles { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxFiles (%d) must be at least %d", maxFiles, MinMaxFiles)) + } + if maxFiles > MaxMaxFiles { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxFiles (%d) exceeds maximum (%d)", maxFiles, MaxMaxFiles)) + } + } + + if viper.IsSet("resourceLimits.maxTotalSize") { + maxTotalSize := viper.GetInt64("resourceLimits.maxTotalSize") + if maxTotalSize < MinMaxTotalSize { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxTotalSize (%d) must be at least %d", maxTotalSize, MinMaxTotalSize)) + } + if maxTotalSize > MaxMaxTotalSize { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxTotalSize (%d) exceeds maximum (%d)", maxTotalSize, MaxMaxTotalSize)) + } + } + + if viper.IsSet("resourceLimits.fileProcessingTimeoutSec") { + timeout := viper.GetInt("resourceLimits.fileProcessingTimeoutSec") + if timeout < MinFileProcessingTimeoutSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.fileProcessingTimeoutSec (%d) must be at least %d", timeout, MinFileProcessingTimeoutSec)) + } + if timeout > MaxFileProcessingTimeoutSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.fileProcessingTimeoutSec (%d) exceeds maximum (%d)", timeout, MaxFileProcessingTimeoutSec)) + } + } + + if viper.IsSet("resourceLimits.overallTimeoutSec") { + timeout := viper.GetInt("resourceLimits.overallTimeoutSec") + if timeout < MinOverallTimeoutSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.overallTimeoutSec (%d) must be at least %d", timeout, MinOverallTimeoutSec)) + } + if timeout > MaxOverallTimeoutSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.overallTimeoutSec (%d) exceeds maximum (%d)", timeout, MaxOverallTimeoutSec)) + } + } + + if viper.IsSet("resourceLimits.maxConcurrentReads") { + maxReads := viper.GetInt("resourceLimits.maxConcurrentReads") + if maxReads < MinMaxConcurrentReads { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxConcurrentReads (%d) must be at least %d", maxReads, MinMaxConcurrentReads)) + } + if maxReads > MaxMaxConcurrentReads { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxConcurrentReads (%d) exceeds maximum (%d)", maxReads, MaxMaxConcurrentReads)) + } + } + + if viper.IsSet("resourceLimits.rateLimitFilesPerSec") { + rateLimit := viper.GetInt("resourceLimits.rateLimitFilesPerSec") + if rateLimit < MinRateLimitFilesPerSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.rateLimitFilesPerSec (%d) must be at least %d", rateLimit, MinRateLimitFilesPerSec)) + } + if rateLimit > MaxRateLimitFilesPerSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.rateLimitFilesPerSec (%d) exceeds maximum (%d)", rateLimit, MaxRateLimitFilesPerSec)) + } + } + + if viper.IsSet("resourceLimits.hardMemoryLimitMB") { + memLimit := viper.GetInt("resourceLimits.hardMemoryLimitMB") + if memLimit < MinHardMemoryLimitMB { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.hardMemoryLimitMB (%d) must be at least %d", memLimit, MinHardMemoryLimitMB)) + } + if memLimit > MaxHardMemoryLimitMB { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.hardMemoryLimitMB (%d) exceeds maximum (%d)", memLimit, MaxHardMemoryLimitMB)) + } + } + + if len(validationErrors) > 0 { + return utils.NewStructuredError( + utils.ErrorTypeConfiguration, + utils.CodeConfigValidation, + "configuration validation failed: "+strings.Join(validationErrors, "; "), + "", + map[string]interface{}{"validation_errors": validationErrors}, + ) + } + + return nil +} + +// ValidateFileSize checks if a file size is within the configured limit. +func ValidateFileSize(size int64) error { + limit := GetFileSizeLimit() + if size > limit { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeValidationSize, + fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", size, limit), + "", + map[string]interface{}{"file_size": size, "size_limit": limit}, + ) + } + return nil +} + +// ValidateOutputFormat checks if an output format is valid. +func ValidateOutputFormat(format string) error { + if !IsValidFormat(format) { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeValidationFormat, + fmt.Sprintf("unsupported output format: %s (supported: json, yaml, markdown)", format), + "", + map[string]interface{}{"format": format}, + ) + } + return nil +} + +// ValidateConcurrency checks if a concurrency level is valid. +func ValidateConcurrency(concurrency int) error { + if concurrency < 1 { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeValidationFormat, + fmt.Sprintf("concurrency (%d) must be at least 1", concurrency), + "", + map[string]interface{}{"concurrency": concurrency}, + ) + } + + if viper.IsSet("maxConcurrency") { + maxConcurrency := GetMaxConcurrency() + if concurrency > maxConcurrency { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeValidationFormat, + fmt.Sprintf("concurrency (%d) exceeds maximum (%d)", concurrency, maxConcurrency), + "", + map[string]interface{}{"concurrency": concurrency, "max_concurrency": maxConcurrency}, + ) + } + } + + return nil +} \ No newline at end of file diff --git a/config/config_test.go b/config/validation_test.go similarity index 65% rename from config/config_test.go rename to config/validation_test.go index 55fc55c..9b3cfb4 100644 --- a/config/config_test.go +++ b/config/validation_test.go @@ -1,80 +1,15 @@ package config_test import ( - "os" "strings" "testing" "github.com/spf13/viper" "github.com/ivuorinen/gibidify/config" - "github.com/ivuorinen/gibidify/testutil" "github.com/ivuorinen/gibidify/utils" ) -const ( - defaultFileSizeLimit = 5242880 - testFileSizeLimit = 123456 -) - -// TestDefaultConfig verifies that if no config file is found, -// the default configuration values are correctly set. -func TestDefaultConfig(t *testing.T) { - // Create a temporary directory to ensure no config file is present. - tmpDir := t.TempDir() - - // Point Viper to the temp directory with no config file. - originalConfigPaths := viper.ConfigFileUsed() - testutil.ResetViperConfig(t, tmpDir) - - // Check defaults - defaultSizeLimit := config.GetFileSizeLimit() - if defaultSizeLimit != defaultFileSizeLimit { - t.Errorf("Expected default file size limit of 5242880, got %d", defaultSizeLimit) - } - - ignoredDirs := config.GetIgnoredDirectories() - if len(ignoredDirs) == 0 { - t.Errorf("Expected some default ignored directories, got none") - } - - // Restore Viper state - viper.SetConfigFile(originalConfigPaths) -} - -// TestLoadConfigFile verifies that when a valid config file is present, -// viper loads the specified values correctly. -func TestLoadConfigFile(t *testing.T) { - tmpDir := t.TempDir() - - // Prepare a minimal config file - configContent := []byte(`--- -fileSizeLimit: 123456 -ignoreDirectories: -- "testdir1" -- "testdir2" -`) - - testutil.CreateTestFile(t, tmpDir, "config.yaml", configContent) - - // Reset viper and point to the new config path - viper.Reset() - viper.AddConfigPath(tmpDir) - - // Force Viper to read our config file - testutil.MustSucceed(t, viper.ReadInConfig(), "reading config file") - - // Validate loaded data - if got := viper.GetInt64("fileSizeLimit"); got != testFileSizeLimit { - t.Errorf("Expected fileSizeLimit=123456, got %d", got) - } - - ignored := viper.GetStringSlice("ignoreDirectories") - if len(ignored) != 2 || ignored[0] != "testdir1" || ignored[1] != "testdir2" { - t.Errorf("Expected [\"testdir1\", \"testdir2\"], got %v", ignored) - } -} - // TestValidateConfig tests the configuration validation functionality. func TestValidateConfig(t *testing.T) { tests := []struct { @@ -296,52 +231,6 @@ func TestValidationFunctions(t *testing.T) { }) } -// TestLoadConfigWithValidation tests that invalid config files fall back to defaults. -func TestLoadConfigWithValidation(t *testing.T) { - // Create a temporary config file with invalid content - configContent := ` -fileSizeLimit: 100 -ignoreDirectories: - - node_modules - - "" - - .git -` - - tempDir := t.TempDir() - configFile := tempDir + "/config.yaml" - - err := os.WriteFile(configFile, []byte(configContent), 0o644) - if err != nil { - t.Fatalf("Failed to write config file: %v", err) - } - - // Reset viper and set config path - viper.Reset() - viper.AddConfigPath(tempDir) - - // This should load the config but validation should fail and fall back to defaults - config.LoadConfig() - - // Should have fallen back to defaults due to validation failure - if config.GetFileSizeLimit() != int64(config.DefaultFileSizeLimit) { - t.Errorf("Expected default file size limit after validation failure, got %d", config.GetFileSizeLimit()) - } - if containsString(config.GetIgnoredDirectories(), "") { - t.Errorf("Expected ignored directories not to contain empty string after validation failure, got %v", config.GetIgnoredDirectories()) - } -} - -// Helper functions - -func containsString(slice []string, item string) bool { - for _, s := range slice { - if s == item { - return true - } - } - return false -} - func errorAs(err error, target interface{}) bool { if err == nil { return false @@ -353,4 +242,4 @@ func errorAs(err error, target interface{}) bool { } } return false -} +} \ No newline at end of file diff --git a/fileproc/filetypes_concurrency_test.go b/fileproc/filetypes_concurrency_test.go new file mode 100644 index 0000000..9478aac --- /dev/null +++ b/fileproc/filetypes_concurrency_test.go @@ -0,0 +1,105 @@ +package fileproc + +import ( + "fmt" + "sync" + "testing" +) + +// TestFileTypeRegistry_ThreadSafety tests thread safety of the FileTypeRegistry. +func TestFileTypeRegistry_ThreadSafety(t *testing.T) { + const numGoroutines = 100 + const numOperationsPerGoroutine = 100 + + var wg sync.WaitGroup + + // Test concurrent read operations + t.Run("ConcurrentReads", func(t *testing.T) { + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + registry := GetDefaultRegistry() + + for j := 0; j < numOperationsPerGoroutine; j++ { + // Test various file detection operations + _ = registry.IsImage("test.png") + _ = registry.IsBinary("test.exe") + _ = registry.GetLanguage("test.go") + + // Test global functions too + _ = IsImage("image.jpg") + _ = IsBinary("binary.dll") + _ = GetLanguage("script.py") + } + }(i) + } + wg.Wait() + }) + + // Test concurrent registry access (singleton creation) + t.Run("ConcurrentRegistryAccess", func(t *testing.T) { + // Reset the registry to test concurrent initialization + // Note: This is not safe in a real application, but needed for testing + registryOnce = sync.Once{} + registry = nil + + registries := make([]*FileTypeRegistry, numGoroutines) + + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + registries[id] = GetDefaultRegistry() + }(i) + } + wg.Wait() + + // Verify all goroutines got the same registry instance + firstRegistry := registries[0] + for i := 1; i < numGoroutines; i++ { + if registries[i] != firstRegistry { + t.Errorf("Registry %d is different from registry 0", i) + } + } + }) + + // Test concurrent modifications on separate registry instances + t.Run("ConcurrentModifications", func(t *testing.T) { + // Create separate registry instances for each goroutine to test modification thread safety + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + + // Create a new registry instance for this goroutine + registry := &FileTypeRegistry{ + imageExts: make(map[string]bool), + binaryExts: make(map[string]bool), + languageMap: make(map[string]string), + } + + for j := 0; j < numOperationsPerGoroutine; j++ { + // Add unique extensions for this goroutine + extSuffix := fmt.Sprintf("_%d_%d", id, j) + + registry.AddImageExtension(".img" + extSuffix) + registry.AddBinaryExtension(".bin" + extSuffix) + registry.AddLanguageMapping(".lang"+extSuffix, "lang"+extSuffix) + + // Verify the additions worked + if !registry.IsImage("test.img" + extSuffix) { + t.Errorf("Failed to add image extension .img%s", extSuffix) + } + if !registry.IsBinary("test.bin" + extSuffix) { + t.Errorf("Failed to add binary extension .bin%s", extSuffix) + } + if registry.GetLanguage("test.lang"+extSuffix) != "lang"+extSuffix { + t.Errorf("Failed to add language mapping .lang%s", extSuffix) + } + } + }(i) + } + wg.Wait() + }) +} \ No newline at end of file diff --git a/fileproc/filetypes_config_test.go b/fileproc/filetypes_config_test.go new file mode 100644 index 0000000..9690a7f --- /dev/null +++ b/fileproc/filetypes_config_test.go @@ -0,0 +1,258 @@ +package fileproc + +import ( + "sync" + "testing" +) + +// TestFileTypeRegistry_Configuration tests the configuration functionality. +func TestFileTypeRegistry_Configuration(t *testing.T) { + // Create a new registry instance for testing + registry := &FileTypeRegistry{ + imageExts: make(map[string]bool), + binaryExts: make(map[string]bool), + languageMap: make(map[string]string), + } + + // Test ApplyCustomExtensions + t.Run("ApplyCustomExtensions", func(t *testing.T) { + customImages := []string{".webp", ".avif", ".heic"} + customBinary := []string{".custom", ".mybin"} + customLanguages := map[string]string{ + ".zig": "zig", + ".odin": "odin", + ".v": "vlang", + } + + registry.ApplyCustomExtensions(customImages, customBinary, customLanguages) + + // Test custom image extensions + for _, ext := range customImages { + if !registry.IsImage("test" + ext) { + t.Errorf("Expected %s to be recognized as image", ext) + } + } + + // Test custom binary extensions + for _, ext := range customBinary { + if !registry.IsBinary("test" + ext) { + t.Errorf("Expected %s to be recognized as binary", ext) + } + } + + // Test custom language mappings + for ext, expectedLang := range customLanguages { + if lang := registry.GetLanguage("test" + ext); lang != expectedLang { + t.Errorf("Expected %s to map to %s, got %s", ext, expectedLang, lang) + } + } + }) + + // Test DisableExtensions + t.Run("DisableExtensions", func(t *testing.T) { + // Add some extensions first + registry.AddImageExtension(".png") + registry.AddImageExtension(".jpg") + registry.AddBinaryExtension(".exe") + registry.AddBinaryExtension(".dll") + registry.AddLanguageMapping(".go", "go") + registry.AddLanguageMapping(".py", "python") + + // Verify they work + if !registry.IsImage("test.png") { + t.Error("Expected .png to be image before disabling") + } + if !registry.IsBinary("test.exe") { + t.Error("Expected .exe to be binary before disabling") + } + if registry.GetLanguage("test.go") != "go" { + t.Error("Expected .go to map to go before disabling") + } + + // Disable some extensions + disabledImages := []string{".png"} + disabledBinary := []string{".exe"} + disabledLanguages := []string{".go"} + + registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages) + + // Test that disabled extensions no longer work + if registry.IsImage("test.png") { + t.Error("Expected .png to not be image after disabling") + } + if registry.IsBinary("test.exe") { + t.Error("Expected .exe to not be binary after disabling") + } + if registry.GetLanguage("test.go") != "" { + t.Error("Expected .go to not map to language after disabling") + } + + // Test that non-disabled extensions still work + if !registry.IsImage("test.jpg") { + t.Error("Expected .jpg to still be image after disabling .png") + } + if !registry.IsBinary("test.dll") { + t.Error("Expected .dll to still be binary after disabling .exe") + } + if registry.GetLanguage("test.py") != "python" { + t.Error("Expected .py to still map to python after disabling .go") + } + }) + + // Test empty values handling + t.Run("EmptyValuesHandling", func(t *testing.T) { + registry := &FileTypeRegistry{ + imageExts: make(map[string]bool), + binaryExts: make(map[string]bool), + languageMap: make(map[string]string), + } + + // Test with empty values + customImages := []string{"", ".valid", ""} + customBinary := []string{"", ".valid"} + customLanguages := map[string]string{ + "": "invalid", + ".valid": "", + ".good": "good", + } + + registry.ApplyCustomExtensions(customImages, customBinary, customLanguages) + + // Only valid entries should be added + if registry.IsImage("test.") { + t.Error("Expected empty extension to not be added as image") + } + if !registry.IsImage("test.valid") { + t.Error("Expected .valid to be added as image") + } + if registry.IsBinary("test.") { + t.Error("Expected empty extension to not be added as binary") + } + if !registry.IsBinary("test.valid") { + t.Error("Expected .valid to be added as binary") + } + if registry.GetLanguage("test.") != "" { + t.Error("Expected empty extension to not be added as language") + } + if registry.GetLanguage("test.valid") != "" { + t.Error("Expected .valid with empty language to not be added") + } + if registry.GetLanguage("test.good") != "good" { + t.Error("Expected .good to map to good") + } + }) + + // Test case insensitive handling + t.Run("CaseInsensitiveHandling", func(t *testing.T) { + registry := &FileTypeRegistry{ + imageExts: make(map[string]bool), + binaryExts: make(map[string]bool), + languageMap: make(map[string]string), + } + + customImages := []string{".WEBP", ".Avif"} + customBinary := []string{".CUSTOM", ".MyBin"} + customLanguages := map[string]string{ + ".ZIG": "zig", + ".Odin": "odin", + } + + registry.ApplyCustomExtensions(customImages, customBinary, customLanguages) + + // Test that both upper and lower case work + if !registry.IsImage("test.webp") { + t.Error("Expected .webp (lowercase) to work after adding .WEBP") + } + if !registry.IsImage("test.WEBP") { + t.Error("Expected .WEBP (uppercase) to work") + } + if !registry.IsBinary("test.custom") { + t.Error("Expected .custom (lowercase) to work after adding .CUSTOM") + } + if !registry.IsBinary("test.CUSTOM") { + t.Error("Expected .CUSTOM (uppercase) to work") + } + if registry.GetLanguage("test.zig") != "zig" { + t.Error("Expected .zig (lowercase) to work after adding .ZIG") + } + if registry.GetLanguage("test.ZIG") != "zig" { + t.Error("Expected .ZIG (uppercase) to work") + } + }) +} + +// TestConfigureFromSettings tests the global configuration function. +func TestConfigureFromSettings(t *testing.T) { + // Reset registry to ensure clean state + registryOnce = sync.Once{} + registry = nil + + // Test configuration application + customImages := []string{".webp", ".avif"} + customBinary := []string{".custom"} + customLanguages := map[string]string{".zig": "zig"} + disabledImages := []string{".gif"} // Disable default extension + disabledBinary := []string{".exe"} // Disable default extension + disabledLanguages := []string{".rb"} // Disable default extension + + ConfigureFromSettings( + customImages, + customBinary, + customLanguages, + disabledImages, + disabledBinary, + disabledLanguages, + ) + + // Test that custom extensions work + if !IsImage("test.webp") { + t.Error("Expected custom image extension .webp to work") + } + if !IsBinary("test.custom") { + t.Error("Expected custom binary extension .custom to work") + } + if GetLanguage("test.zig") != "zig" { + t.Error("Expected custom language .zig to work") + } + + // Test that disabled extensions don't work + if IsImage("test.gif") { + t.Error("Expected disabled image extension .gif to not work") + } + if IsBinary("test.exe") { + t.Error("Expected disabled binary extension .exe to not work") + } + if GetLanguage("test.rb") != "" { + t.Error("Expected disabled language extension .rb to not work") + } + + // Test that non-disabled defaults still work + if !IsImage("test.png") { + t.Error("Expected non-disabled image extension .png to still work") + } + if !IsBinary("test.dll") { + t.Error("Expected non-disabled binary extension .dll to still work") + } + if GetLanguage("test.go") != "go" { + t.Error("Expected non-disabled language extension .go to still work") + } + + // Test multiple calls don't override previous configuration + ConfigureFromSettings( + []string{".extra"}, + []string{}, + map[string]string{}, + []string{}, + []string{}, + []string{}, + ) + + // Previous configuration should still work + if !IsImage("test.webp") { + t.Error("Expected previous configuration to persist") + } + // New configuration should also work + if !IsImage("test.extra") { + t.Error("Expected new configuration to be applied") + } +} \ No newline at end of file diff --git a/fileproc/filetypes_detection_test.go b/fileproc/filetypes_detection_test.go new file mode 100644 index 0000000..d3a9acd --- /dev/null +++ b/fileproc/filetypes_detection_test.go @@ -0,0 +1,226 @@ +package fileproc + +import ( + "testing" +) + +// TestFileTypeRegistry_LanguageDetection tests the language detection functionality. +func TestFileTypeRegistry_LanguageDetection(t *testing.T) { + registry := GetDefaultRegistry() + + tests := []struct { + filename string + expected string + }{ + // Programming languages + {"main.go", "go"}, + {"script.py", "python"}, + {"app.js", "javascript"}, + {"component.tsx", "typescript"}, + {"service.ts", "typescript"}, + {"App.java", "java"}, + {"program.c", "c"}, + {"program.cpp", "cpp"}, + {"header.h", "c"}, + {"header.hpp", "cpp"}, + {"main.rs", "rust"}, + {"script.rb", "ruby"}, + {"index.php", "php"}, + {"app.swift", "swift"}, + {"MainActivity.kt", "kotlin"}, + {"Main.scala", "scala"}, + {"analysis.r", "r"}, + {"ViewController.m", "objc"}, + {"ViewController.mm", "objcpp"}, + {"Program.cs", "csharp"}, + {"Module.vb", "vbnet"}, + {"program.fs", "fsharp"}, + {"script.lua", "lua"}, + {"script.pl", "perl"}, + + // Shell scripts + {"script.sh", "bash"}, + {"script.bash", "bash"}, + {"script.zsh", "zsh"}, + {"script.fish", "fish"}, + {"script.ps1", "powershell"}, + {"script.bat", "batch"}, + {"script.cmd", "batch"}, + + // Data and markup + {"query.sql", "sql"}, + {"index.html", "html"}, + {"page.htm", "html"}, + {"data.xml", "xml"}, + {"style.css", "css"}, + {"style.scss", "scss"}, + {"style.sass", "sass"}, + {"style.less", "less"}, + {"config.json", "json"}, + {"config.yaml", "yaml"}, + {"config.yml", "yaml"}, + {"data.toml", "toml"}, + {"page.md", "markdown"}, + {"readme.markdown", ""}, + {"doc.rst", "rst"}, + {"book.tex", "latex"}, + + // Configuration files + {"Dockerfile", ""}, + {"Makefile", ""}, + {"GNUmakefile", ""}, + + // Case sensitivity tests + {"MAIN.GO", "go"}, + {"SCRIPT.PY", "python"}, + {"APP.JS", "javascript"}, + + // Unknown extensions + {"unknown.xyz", ""}, + {"file.unknown", ""}, + {"noextension", ""}, + {"", ""}, + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + result := registry.GetLanguage(tt.filename) + if result != tt.expected { + t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected) + } + }) + } +} + +// TestFileTypeRegistry_ImageDetection tests the image detection functionality. +func TestFileTypeRegistry_ImageDetection(t *testing.T) { + registry := GetDefaultRegistry() + + tests := []struct { + filename string + expected bool + }{ + // Common image formats + {"photo.png", true}, + {"image.jpg", true}, + {"picture.jpeg", true}, + {"animation.gif", true}, + {"bitmap.bmp", true}, + {"image.tiff", true}, + {"scan.tif", true}, + {"vector.svg", true}, + {"modern.webp", true}, + {"favicon.ico", true}, + + // Case sensitivity tests + {"PHOTO.PNG", true}, + {"IMAGE.JPG", true}, + {"PICTURE.JPEG", true}, + + // Non-image files + {"document.txt", false}, + {"script.js", false}, + {"data.json", false}, + {"archive.zip", false}, + {"executable.exe", false}, + + // Edge cases + {"", false}, // Empty filename + {"image", false}, // No extension + {".png", true}, // Just extension + {"file.png.bak", false}, // Multiple extensions + {"image.unknown", false}, // Unknown extension + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + result := registry.IsImage(tt.filename) + if result != tt.expected { + t.Errorf("IsImage(%q) = %t, expected %t", tt.filename, result, tt.expected) + } + }) + } +} + +// TestFileTypeRegistry_BinaryDetection tests the binary detection functionality. +func TestFileTypeRegistry_BinaryDetection(t *testing.T) { + registry := GetDefaultRegistry() + + tests := []struct { + filename string + expected bool + }{ + // Executable files + {"program.exe", true}, + {"library.dll", true}, + {"libfoo.so", true}, + {"framework.dylib", true}, + {"data.bin", true}, + + // Object and library files + {"object.o", true}, + {"archive.a", true}, + {"library.lib", true}, + {"application.jar", true}, + {"bytecode.class", true}, + {"compiled.pyc", true}, + {"optimized.pyo", true}, + + // System files + {".DS_Store", true}, + + // Document files (treated as binary) + {"document.pdf", true}, + + // Archive files + {"archive.zip", true}, + {"backup.tar", true}, + {"compressed.gz", true}, + {"data.bz2", true}, + {"package.xz", true}, + {"archive.7z", true}, + {"backup.rar", true}, + + // Font files + {"font.ttf", true}, + {"font.otf", true}, + {"font.woff", true}, + {"font.woff2", true}, + + // Media files (video/audio) + {"video.mp4", true}, + {"movie.avi", true}, + {"clip.mov", true}, + {"song.mp3", true}, + {"audio.wav", true}, + {"music.flac", true}, + + // Case sensitivity tests + {"PROGRAM.EXE", true}, + {"LIBRARY.DLL", true}, + {"ARCHIVE.ZIP", true}, + + // Non-binary files + {"document.txt", false}, + {"script.py", false}, + {"config.json", false}, + {"style.css", false}, + {"page.html", false}, + + // Edge cases + {"", false}, // Empty filename + {"binary", false}, // No extension + {".exe", true}, // Just extension + {"file.exe.txt", false}, // Multiple extensions + {"file.unknown", false}, // Unknown extension + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + result := registry.IsBinary(tt.filename) + if result != tt.expected { + t.Errorf("IsBinary(%q) = %t, expected %t", tt.filename, result, tt.expected) + } + }) + } +} \ No newline at end of file diff --git a/fileproc/filetypes_edge_cases_test.go b/fileproc/filetypes_edge_cases_test.go new file mode 100644 index 0000000..ce9ee84 --- /dev/null +++ b/fileproc/filetypes_edge_cases_test.go @@ -0,0 +1,128 @@ +package fileproc + +import ( + "testing" +) + +// TestFileTypeRegistry_EdgeCases tests edge cases and boundary conditions. +func TestFileTypeRegistry_EdgeCases(t *testing.T) { + registry := GetDefaultRegistry() + + // Test various edge cases for filename handling + edgeCases := []struct { + name string + filename string + desc string + }{ + {"empty", "", "empty filename"}, + {"single_char", "a", "single character filename"}, + {"just_dot", ".", "just a dot"}, + {"double_dot", "..", "double dot"}, + {"hidden_file", ".hidden", "hidden file"}, + {"hidden_with_ext", ".hidden.txt", "hidden file with extension"}, + {"multiple_dots", "file.tar.gz", "multiple extensions"}, + {"trailing_dot", "file.", "trailing dot"}, + {"unicode", "файл.txt", "unicode filename"}, + {"spaces", "my file.txt", "filename with spaces"}, + {"special_chars", "file@#$.txt", "filename with special characters"}, + {"very_long", "very_long_filename_with_many_characters_in_it.extension", "very long filename"}, + {"no_basename", ".gitignore", "dotfile with no basename"}, + {"case_mixed", "FiLe.ExT", "mixed case"}, + } + + for _, tc := range edgeCases { + t.Run(tc.name, func(t *testing.T) { + // These should not panic + _ = registry.IsImage(tc.filename) + _ = registry.IsBinary(tc.filename) + _ = registry.GetLanguage(tc.filename) + + // Global functions should also not panic + _ = IsImage(tc.filename) + _ = IsBinary(tc.filename) + _ = GetLanguage(tc.filename) + }) + } +} + +// TestFileTypeRegistry_MinimumExtensionLength tests the minimum extension length requirement. +func TestFileTypeRegistry_MinimumExtensionLength(t *testing.T) { + registry := GetDefaultRegistry() + + tests := []struct { + filename string + expected string + }{ + {"", ""}, // Empty filename + {"a", ""}, // Single character (less than minExtensionLength) + {"ab", ""}, // Two characters, no extension + {"a.b", ""}, // Extension too short, but filename too short anyway + {"ab.c", "c"}, // Valid: filename >= minExtensionLength and .c is valid extension + {"a.go", "go"}, // Valid extension + {"ab.py", "python"}, // Valid extension + {"a.unknown", ""}, // Valid length but unknown extension + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + result := registry.GetLanguage(tt.filename) + if result != tt.expected { + t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected) + } + }) + } +} + +// Benchmark tests for performance validation +func BenchmarkFileTypeRegistry_IsImage(b *testing.B) { + registry := GetDefaultRegistry() + filename := "test.png" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = registry.IsImage(filename) + } +} + +func BenchmarkFileTypeRegistry_IsBinary(b *testing.B) { + registry := GetDefaultRegistry() + filename := "test.exe" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = registry.IsBinary(filename) + } +} + +func BenchmarkFileTypeRegistry_GetLanguage(b *testing.B) { + registry := GetDefaultRegistry() + filename := "test.go" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = registry.GetLanguage(filename) + } +} + +func BenchmarkFileTypeRegistry_GlobalFunctions(b *testing.B) { + filename := "test.go" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = IsImage(filename) + _ = IsBinary(filename) + _ = GetLanguage(filename) + } +} + +func BenchmarkFileTypeRegistry_ConcurrentAccess(b *testing.B) { + filename := "test.go" + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _ = IsImage(filename) + _ = IsBinary(filename) + _ = GetLanguage(filename) + } + }) +} \ No newline at end of file diff --git a/fileproc/filetypes_registry_test.go b/fileproc/filetypes_registry_test.go new file mode 100644 index 0000000..0b9954a --- /dev/null +++ b/fileproc/filetypes_registry_test.go @@ -0,0 +1,137 @@ +package fileproc + +import ( + "testing" +) + +// TestFileTypeRegistry_ModificationMethods tests the modification methods of FileTypeRegistry. +func TestFileTypeRegistry_ModificationMethods(t *testing.T) { + // Create a new registry instance for testing + registry := &FileTypeRegistry{ + imageExts: make(map[string]bool), + binaryExts: make(map[string]bool), + languageMap: make(map[string]string), + } + + // Test AddImageExtension + t.Run("AddImageExtension", func(t *testing.T) { + // Add a new image extension + registry.AddImageExtension(".webp") + if !registry.IsImage("test.webp") { + t.Errorf("Expected .webp to be recognized as image after adding") + } + + // Test case insensitive addition + registry.AddImageExtension(".AVIF") + if !registry.IsImage("test.avif") { + t.Errorf("Expected .avif to be recognized as image after adding .AVIF") + } + if !registry.IsImage("test.AVIF") { + t.Errorf("Expected .AVIF to be recognized as image") + } + + // Test with dot prefix + registry.AddImageExtension("heic") + if registry.IsImage("test.heic") { + t.Errorf("Expected extension without dot to not work") + } + + // Test with proper dot prefix + registry.AddImageExtension(".heic") + if !registry.IsImage("test.heic") { + t.Errorf("Expected .heic to be recognized as image") + } + }) + + // Test AddBinaryExtension + t.Run("AddBinaryExtension", func(t *testing.T) { + // Add a new binary extension + registry.AddBinaryExtension(".custom") + if !registry.IsBinary("file.custom") { + t.Errorf("Expected .custom to be recognized as binary after adding") + } + + // Test case insensitive addition + registry.AddBinaryExtension(".SPECIAL") + if !registry.IsBinary("file.special") { + t.Errorf("Expected .special to be recognized as binary after adding .SPECIAL") + } + if !registry.IsBinary("file.SPECIAL") { + t.Errorf("Expected .SPECIAL to be recognized as binary") + } + + // Test with dot prefix + registry.AddBinaryExtension("bin") + if registry.IsBinary("file.bin") { + t.Errorf("Expected extension without dot to not work") + } + + // Test with proper dot prefix + registry.AddBinaryExtension(".bin") + if !registry.IsBinary("file.bin") { + t.Errorf("Expected .bin to be recognized as binary") + } + }) + + // Test AddLanguageMapping + t.Run("AddLanguageMapping", func(t *testing.T) { + // Add a new language mapping + registry.AddLanguageMapping(".xyz", "CustomLang") + if lang := registry.GetLanguage("file.xyz"); lang != "CustomLang" { + t.Errorf("Expected CustomLang, got %s", lang) + } + + // Test case insensitive addition + registry.AddLanguageMapping(".ABC", "UpperLang") + if lang := registry.GetLanguage("file.abc"); lang != "UpperLang" { + t.Errorf("Expected UpperLang, got %s", lang) + } + if lang := registry.GetLanguage("file.ABC"); lang != "UpperLang" { + t.Errorf("Expected UpperLang for uppercase, got %s", lang) + } + + // Test with dot prefix + registry.AddLanguageMapping("nolang", "NoLang") + if lang := registry.GetLanguage("file.nolang"); lang == "NoLang" { + t.Errorf("Expected extension without dot to not work") + } + + // Test with proper dot prefix + registry.AddLanguageMapping(".nolang", "NoLang") + if lang := registry.GetLanguage("file.nolang"); lang != "NoLang" { + t.Errorf("Expected NoLang, got %s", lang) + } + + // Test overriding existing mapping + registry.AddLanguageMapping(".xyz", "NewCustomLang") + if lang := registry.GetLanguage("file.xyz"); lang != "NewCustomLang" { + t.Errorf("Expected NewCustomLang after override, got %s", lang) + } + }) +} + +// TestFileTypeRegistry_DefaultRegistryConsistency tests default registry behavior. +func TestFileTypeRegistry_DefaultRegistryConsistency(t *testing.T) { + registry := GetDefaultRegistry() + + // Test that registry methods work consistently + if !registry.IsImage("test.png") { + t.Error("Expected .png to be recognized as image") + } + if !registry.IsBinary("test.exe") { + t.Error("Expected .exe to be recognized as binary") + } + if lang := registry.GetLanguage("test.go"); lang != "go" { + t.Errorf("Expected go, got %s", lang) + } + + // Test that multiple calls return consistent results + for i := 0; i < 5; i++ { + if !registry.IsImage("test.jpg") { + t.Errorf("Iteration %d: Expected .jpg to be recognized as image", i) + } + if registry.IsBinary("test.txt") { + t.Errorf("Iteration %d: Expected .txt to not be recognized as binary", i) + } + } +} \ No newline at end of file diff --git a/fileproc/filetypes_test.go b/fileproc/filetypes_test.go deleted file mode 100644 index 3053068..0000000 --- a/fileproc/filetypes_test.go +++ /dev/null @@ -1,827 +0,0 @@ -package fileproc - -import ( - "fmt" - "sync" - "testing" -) - -// TestFileTypeRegistry_ModificationMethods tests the modification methods of FileTypeRegistry. -func TestFileTypeRegistry_ModificationMethods(t *testing.T) { - // Create a new registry instance for testing - registry := &FileTypeRegistry{ - imageExts: make(map[string]bool), - binaryExts: make(map[string]bool), - languageMap: make(map[string]string), - } - - // Test AddImageExtension - t.Run("AddImageExtension", func(t *testing.T) { - // Add a new image extension - registry.AddImageExtension(".webp") - if !registry.IsImage("test.webp") { - t.Errorf("Expected .webp to be recognized as image after adding") - } - - // Test case insensitive addition - registry.AddImageExtension(".AVIF") - if !registry.IsImage("test.avif") { - t.Errorf("Expected .avif to be recognized as image after adding .AVIF") - } - if !registry.IsImage("test.AVIF") { - t.Errorf("Expected .AVIF to be recognized as image") - } - - // Test with dot prefix - registry.AddImageExtension("heic") - if registry.IsImage("test.heic") { - t.Errorf("Expected extension without dot to not work") - } - - // Test with proper dot prefix - registry.AddImageExtension(".heic") - if !registry.IsImage("test.heic") { - t.Errorf("Expected .heic to be recognized as image") - } - }) - - // Test AddBinaryExtension - t.Run("AddBinaryExtension", func(t *testing.T) { - // Add a new binary extension - registry.AddBinaryExtension(".custom") - if !registry.IsBinary("test.custom") { - t.Errorf("Expected .custom to be recognized as binary after adding") - } - - // Test case insensitive addition - registry.AddBinaryExtension(".NEWBIN") - if !registry.IsBinary("test.newbin") { - t.Errorf("Expected .newbin to be recognized as binary after adding .NEWBIN") - } - if !registry.IsBinary("test.NEWBIN") { - t.Errorf("Expected .NEWBIN to be recognized as binary") - } - - // Test overwriting existing extension - registry.AddBinaryExtension(".custom") - if !registry.IsBinary("test.custom") { - t.Errorf("Expected .custom to still be recognized as binary after re-adding") - } - }) - - // Test AddLanguageMapping - t.Run("AddLanguageMapping", func(t *testing.T) { - // Add a new language mapping - registry.AddLanguageMapping(".zig", "zig") - if registry.GetLanguage("test.zig") != "zig" { - t.Errorf("Expected .zig to map to 'zig', got '%s'", registry.GetLanguage("test.zig")) - } - - // Test case insensitive addition - registry.AddLanguageMapping(".V", "vlang") - if registry.GetLanguage("test.v") != "vlang" { - t.Errorf("Expected .v to map to 'vlang' after adding .V, got '%s'", registry.GetLanguage("test.v")) - } - if registry.GetLanguage("test.V") != "vlang" { - t.Errorf("Expected .V to map to 'vlang', got '%s'", registry.GetLanguage("test.V")) - } - - // Test overwriting existing mapping - registry.AddLanguageMapping(".zig", "ziglang") - if registry.GetLanguage("test.zig") != "ziglang" { - t.Errorf("Expected .zig to map to 'ziglang' after update, got '%s'", registry.GetLanguage("test.zig")) - } - - // Test empty language - registry.AddLanguageMapping(".empty", "") - if registry.GetLanguage("test.empty") != "" { - t.Errorf("Expected .empty to map to empty string, got '%s'", registry.GetLanguage("test.empty")) - } - }) -} - -// TestFileTypeRegistry_LanguageDetection tests the language detection functionality. -func TestFileTypeRegistry_LanguageDetection(t *testing.T) { - registry := GetDefaultRegistry() - - tests := []struct { - filename string - expected string - }{ - // Programming languages - {"main.go", "go"}, - {"script.py", "python"}, - {"app.js", "javascript"}, - {"component.tsx", "typescript"}, - {"service.ts", "typescript"}, - {"App.java", "java"}, - {"program.c", "c"}, - {"program.cpp", "cpp"}, - {"header.h", "c"}, - {"header.hpp", "cpp"}, - {"main.rs", "rust"}, - {"script.rb", "ruby"}, - {"index.php", "php"}, - {"app.swift", "swift"}, - {"MainActivity.kt", "kotlin"}, - {"Main.scala", "scala"}, - {"analysis.r", "r"}, - {"ViewController.m", "objc"}, - {"ViewController.mm", "objcpp"}, - {"Program.cs", "csharp"}, - {"Module.vb", "vbnet"}, - {"program.fs", "fsharp"}, - {"script.lua", "lua"}, - {"script.pl", "perl"}, - - // Shell scripts - {"script.sh", "bash"}, - {"script.bash", "bash"}, - {"script.zsh", "zsh"}, - {"script.fish", "fish"}, - {"script.ps1", "powershell"}, - {"script.bat", "batch"}, - {"script.cmd", "batch"}, - - // Data and markup - {"query.sql", "sql"}, - {"index.html", "html"}, - {"page.htm", "html"}, - {"data.xml", "xml"}, - {"style.css", "css"}, - {"style.scss", "scss"}, - {"style.sass", "sass"}, - {"style.less", "less"}, - {"data.json", "json"}, - {"config.yaml", "yaml"}, - {"config.yml", "yaml"}, - {"config.toml", "toml"}, - {"README.md", "markdown"}, - {"doc.rst", "rst"}, - {"paper.tex", "latex"}, - - // Modern languages - {"main.dart", "dart"}, - {"Main.elm", "elm"}, - {"core.clj", "clojure"}, - {"server.ex", "elixir"}, - {"test.exs", "elixir"}, - {"server.erl", "erlang"}, - {"header.hrl", "erlang"}, - {"main.hs", "haskell"}, - {"module.ml", "ocaml"}, - {"interface.mli", "ocaml"}, - {"main.nim", "nim"}, - {"config.nims", "nim"}, - - // Web frameworks - {"Component.vue", "vue"}, - {"Component.jsx", "javascript"}, - - // Case sensitivity tests - {"MAIN.GO", "go"}, - {"Script.PY", "python"}, - {"APP.JS", "javascript"}, - - // Edge cases - {"", ""}, // Empty filename - {"a", ""}, // Too short (less than minExtensionLength) - {"noext", ""}, // No extension - {".hidden", ""}, // Hidden file with no name - {"file.", ""}, // Extension is just a dot - {"file.unknown", ""}, // Unknown extension - {"file.123", ""}, // Numeric extension - {"a.b", ""}, // Very short filename and extension - } - - for _, tt := range tests { - t.Run(tt.filename, func(t *testing.T) { - result := registry.GetLanguage(tt.filename) - if result != tt.expected { - t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected) - } - }) - } -} - -// TestFileTypeRegistry_ImageDetection tests the image detection functionality. -func TestFileTypeRegistry_ImageDetection(t *testing.T) { - registry := GetDefaultRegistry() - - tests := []struct { - filename string - expected bool - }{ - // Common image formats - {"photo.png", true}, - {"image.jpg", true}, - {"picture.jpeg", true}, - {"animation.gif", true}, - {"bitmap.bmp", true}, - {"image.tiff", true}, - {"scan.tif", true}, - {"vector.svg", true}, - {"modern.webp", true}, - {"favicon.ico", true}, - - // Case sensitivity tests - {"PHOTO.PNG", true}, - {"IMAGE.JPG", true}, - {"PICTURE.JPEG", true}, - - // Non-image files - {"document.txt", false}, - {"script.js", false}, - {"data.json", false}, - {"archive.zip", false}, - {"executable.exe", false}, - - // Edge cases - {"", false}, // Empty filename - {"image", false}, // No extension - {".png", true}, // Just extension - {"file.png.bak", false}, // Multiple extensions - {"image.unknown", false}, // Unknown extension - } - - for _, tt := range tests { - t.Run(tt.filename, func(t *testing.T) { - result := registry.IsImage(tt.filename) - if result != tt.expected { - t.Errorf("IsImage(%q) = %t, expected %t", tt.filename, result, tt.expected) - } - }) - } -} - -// TestFileTypeRegistry_BinaryDetection tests the binary detection functionality. -func TestFileTypeRegistry_BinaryDetection(t *testing.T) { - registry := GetDefaultRegistry() - - tests := []struct { - filename string - expected bool - }{ - // Executable files - {"program.exe", true}, - {"library.dll", true}, - {"libfoo.so", true}, - {"framework.dylib", true}, - {"data.bin", true}, - - // Object and library files - {"object.o", true}, - {"archive.a", true}, - {"library.lib", true}, - {"application.jar", true}, - {"bytecode.class", true}, - {"compiled.pyc", true}, - {"optimized.pyo", true}, - - // System files - {".DS_Store", true}, - - // Document files (treated as binary) - {"document.pdf", true}, - - // Archive files - {"archive.zip", true}, - {"backup.tar", true}, - {"compressed.gz", true}, - {"data.bz2", true}, - {"package.xz", true}, - {"archive.7z", true}, - {"backup.rar", true}, - - // Font files - {"font.ttf", true}, - {"font.otf", true}, - {"font.woff", true}, - {"font.woff2", true}, - - // Media files - {"song.mp3", true}, - {"video.mp4", true}, - {"movie.avi", true}, - {"clip.mov", true}, - {"video.wmv", true}, - {"animation.flv", true}, - {"modern.webm", true}, - {"audio.ogg", true}, - {"sound.wav", true}, - {"music.flac", true}, - - // Database files - {"data.dat", true}, - {"database.db", true}, - {"app.sqlite", true}, - - // Case sensitivity tests - {"PROGRAM.EXE", true}, - {"LIBRARY.DLL", true}, - - // Non-binary files - {"document.txt", false}, - {"script.js", false}, - {"data.json", false}, - {"style.css", false}, - {"page.html", false}, - - // Edge cases - {"", false}, // Empty filename - {"binary", false}, // No extension - {".exe", true}, // Just extension - {"file.exe.bak", false}, // Multiple extensions - {"file.unknown", false}, // Unknown extension - } - - for _, tt := range tests { - t.Run(tt.filename, func(t *testing.T) { - result := registry.IsBinary(tt.filename) - if result != tt.expected { - t.Errorf("IsBinary(%q) = %t, expected %t", tt.filename, result, tt.expected) - } - }) - } -} - -// TestFileTypeRegistry_DefaultRegistryConsistency tests that the default registry is consistent. -func TestFileTypeRegistry_DefaultRegistryConsistency(t *testing.T) { - // Get registry multiple times and ensure it's the same instance - registry1 := GetDefaultRegistry() - registry2 := GetDefaultRegistry() - registry3 := getRegistry() - - if registry1 != registry2 { - t.Error("GetDefaultRegistry() should return the same instance") - } - if registry1 != registry3 { - t.Error("getRegistry() should return the same instance as GetDefaultRegistry()") - } - - // Test that global functions use the same registry - filename := "test.go" - if IsImage(filename) != registry1.IsImage(filename) { - t.Error("IsImage() global function should match registry method") - } - if IsBinary(filename) != registry1.IsBinary(filename) { - t.Error("IsBinary() global function should match registry method") - } - if GetLanguage(filename) != registry1.GetLanguage(filename) { - t.Error("GetLanguage() global function should match registry method") - } -} - -// TestFileTypeRegistry_ThreadSafety tests the thread safety of the FileTypeRegistry. -func TestFileTypeRegistry_ThreadSafety(t *testing.T) { - const numGoroutines = 100 - const numOperationsPerGoroutine = 100 - - var wg sync.WaitGroup - - // Test concurrent read operations - t.Run("ConcurrentReads", func(t *testing.T) { - for i := 0; i < numGoroutines; i++ { - wg.Add(1) - go func(id int) { - defer wg.Done() - registry := GetDefaultRegistry() - - for j := 0; j < numOperationsPerGoroutine; j++ { - // Test various file detection operations - _ = registry.IsImage("test.png") - _ = registry.IsBinary("test.exe") - _ = registry.GetLanguage("test.go") - - // Test global functions too - _ = IsImage("image.jpg") - _ = IsBinary("binary.dll") - _ = GetLanguage("script.py") - } - }(i) - } - wg.Wait() - }) - - // Test concurrent registry access (singleton creation) - t.Run("ConcurrentRegistryAccess", func(t *testing.T) { - // Reset the registry to test concurrent initialization - // Note: This is not safe in a real application, but needed for testing - registryOnce = sync.Once{} - registry = nil - - registries := make([]*FileTypeRegistry, numGoroutines) - - for i := 0; i < numGoroutines; i++ { - wg.Add(1) - go func(id int) { - defer wg.Done() - registries[id] = GetDefaultRegistry() - }(i) - } - wg.Wait() - - // Verify all goroutines got the same registry instance - firstRegistry := registries[0] - for i := 1; i < numGoroutines; i++ { - if registries[i] != firstRegistry { - t.Errorf("Registry %d is different from registry 0", i) - } - } - }) - - // Test concurrent modifications on separate registry instances - t.Run("ConcurrentModifications", func(t *testing.T) { - // Create separate registry instances for each goroutine to test modification thread safety - for i := 0; i < numGoroutines; i++ { - wg.Add(1) - go func(id int) { - defer wg.Done() - - // Create a new registry instance for this goroutine - registry := &FileTypeRegistry{ - imageExts: make(map[string]bool), - binaryExts: make(map[string]bool), - languageMap: make(map[string]string), - } - - for j := 0; j < numOperationsPerGoroutine; j++ { - // Add unique extensions for this goroutine - extSuffix := fmt.Sprintf("_%d_%d", id, j) - - registry.AddImageExtension(".img" + extSuffix) - registry.AddBinaryExtension(".bin" + extSuffix) - registry.AddLanguageMapping(".lang"+extSuffix, "lang"+extSuffix) - - // Verify the additions worked - if !registry.IsImage("test.img" + extSuffix) { - t.Errorf("Failed to add image extension .img%s", extSuffix) - } - if !registry.IsBinary("test.bin" + extSuffix) { - t.Errorf("Failed to add binary extension .bin%s", extSuffix) - } - if registry.GetLanguage("test.lang"+extSuffix) != "lang"+extSuffix { - t.Errorf("Failed to add language mapping .lang%s", extSuffix) - } - } - }(i) - } - wg.Wait() - }) -} - -// TestFileTypeRegistry_EdgeCases tests edge cases and boundary conditions. -func TestFileTypeRegistry_EdgeCases(t *testing.T) { - registry := GetDefaultRegistry() - - // Test various edge cases for filename handling - edgeCases := []struct { - name string - filename string - desc string - }{ - {"empty", "", "empty filename"}, - {"single_char", "a", "single character filename"}, - {"just_dot", ".", "just a dot"}, - {"double_dot", "..", "double dot"}, - {"hidden_file", ".hidden", "hidden file"}, - {"hidden_with_ext", ".hidden.txt", "hidden file with extension"}, - {"multiple_dots", "file.tar.gz", "multiple extensions"}, - {"trailing_dot", "file.", "trailing dot"}, - {"unicode", "файл.txt", "unicode filename"}, - {"spaces", "my file.txt", "filename with spaces"}, - {"special_chars", "file@#$.txt", "filename with special characters"}, - {"very_long", "very_long_filename_with_many_characters_in_it.extension", "very long filename"}, - {"no_basename", ".gitignore", "dotfile with no basename"}, - {"case_mixed", "FiLe.ExT", "mixed case"}, - } - - for _, tc := range edgeCases { - t.Run(tc.name, func(t *testing.T) { - // These should not panic - _ = registry.IsImage(tc.filename) - _ = registry.IsBinary(tc.filename) - _ = registry.GetLanguage(tc.filename) - - // Global functions should also not panic - _ = IsImage(tc.filename) - _ = IsBinary(tc.filename) - _ = GetLanguage(tc.filename) - }) - } -} - -// TestFileTypeRegistry_MinimumExtensionLength tests the minimum extension length requirement. -func TestFileTypeRegistry_MinimumExtensionLength(t *testing.T) { - registry := GetDefaultRegistry() - - tests := []struct { - filename string - expected string - }{ - {"", ""}, // Empty filename - {"a", ""}, // Single character (less than minExtensionLength) - {"ab", ""}, // Two characters, no extension - {"a.b", ""}, // Extension too short, but filename too short anyway - {"ab.c", "c"}, // Valid: filename >= minExtensionLength and .c is valid extension - {"a.go", "go"}, // Valid extension - {"ab.py", "python"}, // Valid extension - {"a.unknown", ""}, // Valid length but unknown extension - } - - for _, tt := range tests { - t.Run(tt.filename, func(t *testing.T) { - result := registry.GetLanguage(tt.filename) - if result != tt.expected { - t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected) - } - }) - } -} - -// BenchmarkFileTypeRegistry tests performance of the registry operations. -func BenchmarkFileTypeRegistry_IsImage(b *testing.B) { - registry := GetDefaultRegistry() - filename := "test.png" - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = registry.IsImage(filename) - } -} - -func BenchmarkFileTypeRegistry_IsBinary(b *testing.B) { - registry := GetDefaultRegistry() - filename := "test.exe" - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = registry.IsBinary(filename) - } -} - -func BenchmarkFileTypeRegistry_GetLanguage(b *testing.B) { - registry := GetDefaultRegistry() - filename := "test.go" - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = registry.GetLanguage(filename) - } -} - -func BenchmarkFileTypeRegistry_GlobalFunctions(b *testing.B) { - filename := "test.go" - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = IsImage(filename) - _ = IsBinary(filename) - _ = GetLanguage(filename) - } -} - -func BenchmarkFileTypeRegistry_ConcurrentAccess(b *testing.B) { - filename := "test.go" - - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - _ = IsImage(filename) - _ = IsBinary(filename) - _ = GetLanguage(filename) - } - }) -} - -// TestFileTypeRegistry_Configuration tests the configuration functionality. -func TestFileTypeRegistry_Configuration(t *testing.T) { - // Create a new registry instance for testing - registry := &FileTypeRegistry{ - imageExts: make(map[string]bool), - binaryExts: make(map[string]bool), - languageMap: make(map[string]string), - } - - // Test ApplyCustomExtensions - t.Run("ApplyCustomExtensions", func(t *testing.T) { - customImages := []string{".webp", ".avif", ".heic"} - customBinary := []string{".custom", ".mybin"} - customLanguages := map[string]string{ - ".zig": "zig", - ".odin": "odin", - ".v": "vlang", - } - - registry.ApplyCustomExtensions(customImages, customBinary, customLanguages) - - // Test custom image extensions - for _, ext := range customImages { - if !registry.IsImage("test" + ext) { - t.Errorf("Expected %s to be recognized as image", ext) - } - } - - // Test custom binary extensions - for _, ext := range customBinary { - if !registry.IsBinary("test" + ext) { - t.Errorf("Expected %s to be recognized as binary", ext) - } - } - - // Test custom language mappings - for ext, expectedLang := range customLanguages { - if lang := registry.GetLanguage("test" + ext); lang != expectedLang { - t.Errorf("Expected %s to map to %s, got %s", ext, expectedLang, lang) - } - } - }) - - // Test DisableExtensions - t.Run("DisableExtensions", func(t *testing.T) { - // Add some extensions first - registry.AddImageExtension(".png") - registry.AddImageExtension(".jpg") - registry.AddBinaryExtension(".exe") - registry.AddBinaryExtension(".dll") - registry.AddLanguageMapping(".go", "go") - registry.AddLanguageMapping(".py", "python") - - // Verify they work - if !registry.IsImage("test.png") { - t.Error("Expected .png to be image before disabling") - } - if !registry.IsBinary("test.exe") { - t.Error("Expected .exe to be binary before disabling") - } - if registry.GetLanguage("test.go") != "go" { - t.Error("Expected .go to map to go before disabling") - } - - // Disable some extensions - disabledImages := []string{".png"} - disabledBinary := []string{".exe"} - disabledLanguages := []string{".go"} - - registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages) - - // Test that disabled extensions no longer work - if registry.IsImage("test.png") { - t.Error("Expected .png to not be image after disabling") - } - if registry.IsBinary("test.exe") { - t.Error("Expected .exe to not be binary after disabling") - } - if registry.GetLanguage("test.go") != "" { - t.Error("Expected .go to not map to language after disabling") - } - - // Test that non-disabled extensions still work - if !registry.IsImage("test.jpg") { - t.Error("Expected .jpg to still be image after disabling .png") - } - if !registry.IsBinary("test.dll") { - t.Error("Expected .dll to still be binary after disabling .exe") - } - if registry.GetLanguage("test.py") != "python" { - t.Error("Expected .py to still map to python after disabling .go") - } - }) - - // Test empty values handling - t.Run("EmptyValuesHandling", func(t *testing.T) { - registry := &FileTypeRegistry{ - imageExts: make(map[string]bool), - binaryExts: make(map[string]bool), - languageMap: make(map[string]string), - } - - // Test with empty values - customImages := []string{"", ".valid", ""} - customBinary := []string{"", ".valid"} - customLanguages := map[string]string{ - "": "invalid", - ".valid": "", - ".good": "good", - } - - registry.ApplyCustomExtensions(customImages, customBinary, customLanguages) - - // Only valid entries should be added - if registry.IsImage("test.") { - t.Error("Expected empty extension to not be added as image") - } - if !registry.IsImage("test.valid") { - t.Error("Expected .valid to be added as image") - } - if registry.IsBinary("test.") { - t.Error("Expected empty extension to not be added as binary") - } - if !registry.IsBinary("test.valid") { - t.Error("Expected .valid to be added as binary") - } - if registry.GetLanguage("test.") != "" { - t.Error("Expected empty extension to not be added as language") - } - if registry.GetLanguage("test.valid") != "" { - t.Error("Expected .valid with empty language to not be added") - } - if registry.GetLanguage("test.good") != "good" { - t.Error("Expected .good to map to good") - } - }) - - // Test case insensitive handling - t.Run("CaseInsensitiveHandling", func(t *testing.T) { - registry := &FileTypeRegistry{ - imageExts: make(map[string]bool), - binaryExts: make(map[string]bool), - languageMap: make(map[string]string), - } - - customImages := []string{".WEBP", ".Avif"} - customBinary := []string{".CUSTOM", ".MyBin"} - customLanguages := map[string]string{ - ".ZIG": "zig", - ".Odin": "odin", - } - - registry.ApplyCustomExtensions(customImages, customBinary, customLanguages) - - // Test that both upper and lower case work - if !registry.IsImage("test.webp") { - t.Error("Expected .webp (lowercase) to work after adding .WEBP") - } - if !registry.IsImage("test.WEBP") { - t.Error("Expected .WEBP (uppercase) to work") - } - if !registry.IsBinary("test.custom") { - t.Error("Expected .custom (lowercase) to work after adding .CUSTOM") - } - if !registry.IsBinary("test.CUSTOM") { - t.Error("Expected .CUSTOM (uppercase) to work") - } - if registry.GetLanguage("test.zig") != "zig" { - t.Error("Expected .zig (lowercase) to work after adding .ZIG") - } - if registry.GetLanguage("test.ZIG") != "zig" { - t.Error("Expected .ZIG (uppercase) to work") - } - }) -} - -// TestConfigureFromSettings tests the global configuration function. -func TestConfigureFromSettings(t *testing.T) { - // Reset registry to ensure clean state - registryOnce = sync.Once{} - registry = nil - - // Test configuration application - customImages := []string{".webp", ".avif"} - customBinary := []string{".custom"} - customLanguages := map[string]string{".zig": "zig"} - disabledImages := []string{".gif"} // Disable default extension - disabledBinary := []string{".exe"} // Disable default extension - disabledLanguages := []string{".rb"} // Disable default extension - - ConfigureFromSettings( - customImages, - customBinary, - customLanguages, - disabledImages, - disabledBinary, - disabledLanguages, - ) - - // Test that custom extensions work - if !IsImage("test.webp") { - t.Error("Expected custom image extension .webp to work") - } - if !IsBinary("test.custom") { - t.Error("Expected custom binary extension .custom to work") - } - if GetLanguage("test.zig") != "zig" { - t.Error("Expected custom language .zig to work") - } - - // Test that disabled extensions don't work - if IsImage("test.gif") { - t.Error("Expected disabled image extension .gif to not work") - } - if IsBinary("test.exe") { - t.Error("Expected disabled binary extension .exe to not work") - } - if GetLanguage("test.rb") != "" { - t.Error("Expected disabled language extension .rb to not work") - } - - // Test that non-disabled defaults still work - if !IsImage("test.png") { - t.Error("Expected non-disabled image extension .png to still work") - } - if !IsBinary("test.dll") { - t.Error("Expected non-disabled binary extension .dll to still work") - } - if GetLanguage("test.go") != "go" { - t.Error("Expected non-disabled language extension .go to still work") - } -} diff --git a/fileproc/json_writer.go b/fileproc/json_writer.go index 57bc6d9..5c9ab46 100644 --- a/fileproc/json_writer.go +++ b/fileproc/json_writer.go @@ -31,9 +31,9 @@ func (w *JSONWriter) Start(prefix, suffix string) error { } // Write escaped prefix - escapedPrefix := escapeJSONString(prefix) - if _, err := w.outFile.WriteString(escapedPrefix); err != nil { - return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON prefix") + escapedPrefix := utils.EscapeForJSON(prefix) + if err := utils.WriteWithErrorWrap(w.outFile, escapedPrefix, "failed to write JSON prefix", ""); err != nil { + return err } if _, err := w.outFile.WriteString(`","suffix":"`); err != nil { @@ -41,9 +41,9 @@ func (w *JSONWriter) Start(prefix, suffix string) error { } // Write escaped suffix - escapedSuffix := escapeJSONString(suffix) - if _, err := w.outFile.WriteString(escapedSuffix); err != nil { - return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON suffix") + escapedSuffix := utils.EscapeForJSON(suffix) + if err := utils.WriteWithErrorWrap(w.outFile, escapedSuffix, "failed to write JSON suffix", ""); err != nil { + return err } if _, err := w.outFile.WriteString(`","files":[`); err != nil { @@ -79,12 +79,12 @@ func (w *JSONWriter) Close() error { // writeStreaming writes a large file as JSON in streaming chunks. func (w *JSONWriter) writeStreaming(req WriteRequest) error { - defer w.closeReader(req.Reader, req.Path) + defer utils.SafeCloseReader(req.Reader, req.Path) language := detectLanguage(req.Path) // Write file start - escapedPath := escapeJSONString(req.Path) + escapedPath := utils.EscapeForJSON(req.Path) if _, err := fmt.Fprintf(w.outFile, `{"path":"%s","language":"%s","content":"`, escapedPath, language); err != nil { return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file start").WithFilePath(req.Path) } @@ -124,43 +124,13 @@ func (w *JSONWriter) writeInline(req WriteRequest) error { // streamJSONContent streams content with JSON escaping. func (w *JSONWriter) streamJSONContent(reader io.Reader, path string) error { - buf := make([]byte, StreamChunkSize) - for { - n, err := reader.Read(buf) - if n > 0 { - escaped := escapeJSONString(string(buf[:n])) - if _, writeErr := w.outFile.WriteString(escaped); writeErr != nil { - return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON chunk").WithFilePath(path) - } - } - if err == io.EOF { - break - } - if err != nil { - return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read JSON chunk").WithFilePath(path) - } - } - return nil + return utils.StreamContent(reader, w.outFile, StreamChunkSize, path, func(chunk []byte) []byte { + escaped := utils.EscapeForJSON(string(chunk)) + return []byte(escaped) + }) } -// closeReader safely closes a reader if it implements io.Closer. -func (w *JSONWriter) closeReader(reader io.Reader, path string) { - if closer, ok := reader.(io.Closer); ok { - if err := closer.Close(); err != nil { - utils.LogError( - "Failed to close file reader", - utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path), - ) - } - } -} -// escapeJSONString escapes a string for JSON output. -func escapeJSONString(s string) string { - // Use json.Marshal to properly escape the string, then remove the quotes - escaped, _ := json.Marshal(s) - return string(escaped[1 : len(escaped)-1]) // Remove surrounding quotes -} // startJSONWriter handles JSON format output with streaming support. func startJSONWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) { diff --git a/fileproc/resource_monitor.go b/fileproc/resource_monitor.go deleted file mode 100644 index e409aa9..0000000 --- a/fileproc/resource_monitor.go +++ /dev/null @@ -1,423 +0,0 @@ -// Package fileproc provides resource monitoring and limit enforcement for security. -package fileproc - -import ( - "context" - "runtime" - "sync" - "sync/atomic" - "time" - - "github.com/sirupsen/logrus" - - "github.com/ivuorinen/gibidify/config" - "github.com/ivuorinen/gibidify/utils" -) - -// ResourceMonitor monitors resource usage and enforces limits to prevent DoS attacks. -type ResourceMonitor struct { - enabled bool - maxFiles int - maxTotalSize int64 - fileProcessingTimeout time.Duration - overallTimeout time.Duration - maxConcurrentReads int - rateLimitFilesPerSec int - hardMemoryLimitMB int - enableGracefulDegr bool - enableResourceMon bool - - // Current state tracking - filesProcessed int64 - totalSizeProcessed int64 - concurrentReads int64 - startTime time.Time - lastRateLimitCheck time.Time - hardMemoryLimitBytes int64 - - // Rate limiting - rateLimiter *time.Ticker - rateLimitChan chan struct{} - - // Synchronization - mu sync.RWMutex - violationLogged map[string]bool - degradationActive bool - emergencyStopRequested bool -} - -// ResourceMetrics holds comprehensive resource usage metrics. -type ResourceMetrics struct { - FilesProcessed int64 `json:"files_processed"` - TotalSizeProcessed int64 `json:"total_size_processed"` - ConcurrentReads int64 `json:"concurrent_reads"` - ProcessingDuration time.Duration `json:"processing_duration"` - AverageFileSize float64 `json:"average_file_size"` - ProcessingRate float64 `json:"processing_rate_files_per_sec"` - MemoryUsageMB int64 `json:"memory_usage_mb"` - MaxMemoryUsageMB int64 `json:"max_memory_usage_mb"` - ViolationsDetected []string `json:"violations_detected"` - DegradationActive bool `json:"degradation_active"` - EmergencyStopActive bool `json:"emergency_stop_active"` - LastUpdated time.Time `json:"last_updated"` -} - -// ResourceViolation represents a detected resource limit violation. -type ResourceViolation struct { - Type string `json:"type"` - Message string `json:"message"` - Current interface{} `json:"current"` - Limit interface{} `json:"limit"` - Timestamp time.Time `json:"timestamp"` - Context map[string]interface{} `json:"context"` -} - -// NewResourceMonitor creates a new resource monitor with configuration. -func NewResourceMonitor() *ResourceMonitor { - rm := &ResourceMonitor{ - enabled: config.GetResourceLimitsEnabled(), - maxFiles: config.GetMaxFiles(), - maxTotalSize: config.GetMaxTotalSize(), - fileProcessingTimeout: time.Duration(config.GetFileProcessingTimeoutSec()) * time.Second, - overallTimeout: time.Duration(config.GetOverallTimeoutSec()) * time.Second, - maxConcurrentReads: config.GetMaxConcurrentReads(), - rateLimitFilesPerSec: config.GetRateLimitFilesPerSec(), - hardMemoryLimitMB: config.GetHardMemoryLimitMB(), - enableGracefulDegr: config.GetEnableGracefulDegradation(), - enableResourceMon: config.GetEnableResourceMonitoring(), - startTime: time.Now(), - lastRateLimitCheck: time.Now(), - violationLogged: make(map[string]bool), - hardMemoryLimitBytes: int64(config.GetHardMemoryLimitMB()) * 1024 * 1024, - } - - // Initialize rate limiter if rate limiting is enabled - if rm.enabled && rm.rateLimitFilesPerSec > 0 { - interval := time.Second / time.Duration(rm.rateLimitFilesPerSec) - rm.rateLimiter = time.NewTicker(interval) - rm.rateLimitChan = make(chan struct{}, rm.rateLimitFilesPerSec) - - // Pre-fill the rate limit channel - for i := 0; i < rm.rateLimitFilesPerSec; i++ { - select { - case rm.rateLimitChan <- struct{}{}: - default: - goto rateLimitFull - } - } - rateLimitFull: - - // Start rate limiter refill goroutine - go rm.rateLimiterRefill() - } - - return rm -} - -// ValidateFileProcessing checks if a file can be processed based on resource limits. -func (rm *ResourceMonitor) ValidateFileProcessing(filePath string, fileSize int64) error { - if !rm.enabled { - return nil - } - - rm.mu.RLock() - defer rm.mu.RUnlock() - - // Check if emergency stop is active - if rm.emergencyStopRequested { - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeResourceLimitMemory, - "processing stopped due to emergency memory condition", - filePath, - map[string]interface{}{ - "emergency_stop_active": true, - }, - ) - } - - // Check file count limit - currentFiles := atomic.LoadInt64(&rm.filesProcessed) - if int(currentFiles) >= rm.maxFiles { - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeResourceLimitFiles, - "maximum file count limit exceeded", - filePath, - map[string]interface{}{ - "current_files": currentFiles, - "max_files": rm.maxFiles, - }, - ) - } - - // Check total size limit - currentTotalSize := atomic.LoadInt64(&rm.totalSizeProcessed) - if currentTotalSize+fileSize > rm.maxTotalSize { - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeResourceLimitTotalSize, - "maximum total size limit would be exceeded", - filePath, - map[string]interface{}{ - "current_total_size": currentTotalSize, - "file_size": fileSize, - "max_total_size": rm.maxTotalSize, - }, - ) - } - - // Check overall timeout - if time.Since(rm.startTime) > rm.overallTimeout { - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeResourceLimitTimeout, - "overall processing timeout exceeded", - filePath, - map[string]interface{}{ - "processing_duration": time.Since(rm.startTime), - "overall_timeout": rm.overallTimeout, - }, - ) - } - - return nil -} - -// AcquireReadSlot attempts to acquire a slot for concurrent file reading. -func (rm *ResourceMonitor) AcquireReadSlot(ctx context.Context) error { - if !rm.enabled { - return nil - } - - // Wait for available read slot - for { - currentReads := atomic.LoadInt64(&rm.concurrentReads) - if currentReads < int64(rm.maxConcurrentReads) { - if atomic.CompareAndSwapInt64(&rm.concurrentReads, currentReads, currentReads+1) { - break - } - // CAS failed, retry - continue - } - - // Wait and retry - select { - case <-ctx.Done(): - return ctx.Err() - case <-time.After(time.Millisecond): - // Continue loop - } - } - - return nil -} - -// ReleaseReadSlot releases a concurrent reading slot. -func (rm *ResourceMonitor) ReleaseReadSlot() { - if rm.enabled { - atomic.AddInt64(&rm.concurrentReads, -1) - } -} - -// WaitForRateLimit waits for rate limiting if enabled. -func (rm *ResourceMonitor) WaitForRateLimit(ctx context.Context) error { - if !rm.enabled || rm.rateLimitFilesPerSec <= 0 { - return nil - } - - select { - case <-ctx.Done(): - return ctx.Err() - case <-rm.rateLimitChan: - return nil - case <-time.After(time.Second): // Fallback timeout - logrus.Warn("Rate limiting timeout exceeded, continuing without rate limit") - return nil - } -} - -// CheckHardMemoryLimit checks if hard memory limit is exceeded and takes action. -func (rm *ResourceMonitor) CheckHardMemoryLimit() error { - if !rm.enabled || rm.hardMemoryLimitMB <= 0 { - return nil - } - - var m runtime.MemStats - runtime.ReadMemStats(&m) - currentMemory := int64(m.Alloc) - - if currentMemory > rm.hardMemoryLimitBytes { - rm.mu.Lock() - defer rm.mu.Unlock() - - // Log violation if not already logged - violationKey := "hard_memory_limit" - if !rm.violationLogged[violationKey] { - logrus.Errorf("Hard memory limit exceeded: %dMB > %dMB", - currentMemory/1024/1024, rm.hardMemoryLimitMB) - rm.violationLogged[violationKey] = true - } - - if rm.enableGracefulDegr { - // Force garbage collection - runtime.GC() - - // Check again after GC - runtime.ReadMemStats(&m) - currentMemory = int64(m.Alloc) - - if currentMemory > rm.hardMemoryLimitBytes { - // Still over limit, activate emergency stop - rm.emergencyStopRequested = true - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeResourceLimitMemory, - "hard memory limit exceeded, emergency stop activated", - "", - map[string]interface{}{ - "current_memory_mb": currentMemory / 1024 / 1024, - "limit_mb": rm.hardMemoryLimitMB, - "emergency_stop": true, - }, - ) - } else { - // Memory freed by GC, continue with degradation - rm.degradationActive = true - logrus.Info("Memory freed by garbage collection, continuing with degradation mode") - } - } else { - // No graceful degradation, hard stop - return utils.NewStructuredError( - utils.ErrorTypeValidation, - utils.CodeResourceLimitMemory, - "hard memory limit exceeded", - "", - map[string]interface{}{ - "current_memory_mb": currentMemory / 1024 / 1024, - "limit_mb": rm.hardMemoryLimitMB, - }, - ) - } - } - - return nil -} - -// RecordFileProcessed records that a file has been successfully processed. -func (rm *ResourceMonitor) RecordFileProcessed(fileSize int64) { - if rm.enabled { - atomic.AddInt64(&rm.filesProcessed, 1) - atomic.AddInt64(&rm.totalSizeProcessed, fileSize) - } -} - -// GetMetrics returns current resource usage metrics. -func (rm *ResourceMonitor) GetMetrics() ResourceMetrics { - if !rm.enableResourceMon { - return ResourceMetrics{} - } - - rm.mu.RLock() - defer rm.mu.RUnlock() - - var m runtime.MemStats - runtime.ReadMemStats(&m) - - filesProcessed := atomic.LoadInt64(&rm.filesProcessed) - totalSize := atomic.LoadInt64(&rm.totalSizeProcessed) - duration := time.Since(rm.startTime) - - avgFileSize := float64(0) - if filesProcessed > 0 { - avgFileSize = float64(totalSize) / float64(filesProcessed) - } - - processingRate := float64(0) - if duration.Seconds() > 0 { - processingRate = float64(filesProcessed) / duration.Seconds() - } - - // Collect violations - violations := make([]string, 0, len(rm.violationLogged)) - for violation := range rm.violationLogged { - violations = append(violations, violation) - } - - return ResourceMetrics{ - FilesProcessed: filesProcessed, - TotalSizeProcessed: totalSize, - ConcurrentReads: atomic.LoadInt64(&rm.concurrentReads), - ProcessingDuration: duration, - AverageFileSize: avgFileSize, - ProcessingRate: processingRate, - MemoryUsageMB: int64(m.Alloc) / 1024 / 1024, - MaxMemoryUsageMB: int64(rm.hardMemoryLimitMB), - ViolationsDetected: violations, - DegradationActive: rm.degradationActive, - EmergencyStopActive: rm.emergencyStopRequested, - LastUpdated: time.Now(), - } -} - -// IsEmergencyStopActive returns whether emergency stop is active. -func (rm *ResourceMonitor) IsEmergencyStopActive() bool { - rm.mu.RLock() - defer rm.mu.RUnlock() - return rm.emergencyStopRequested -} - -// IsDegradationActive returns whether degradation mode is active. -func (rm *ResourceMonitor) IsDegradationActive() bool { - rm.mu.RLock() - defer rm.mu.RUnlock() - return rm.degradationActive -} - -// LogResourceInfo logs current resource limit configuration. -func (rm *ResourceMonitor) LogResourceInfo() { - if rm.enabled { - logrus.Infof("Resource limits enabled: maxFiles=%d, maxTotalSize=%dMB, fileTimeout=%ds, overallTimeout=%ds", - rm.maxFiles, rm.maxTotalSize/1024/1024, int(rm.fileProcessingTimeout.Seconds()), int(rm.overallTimeout.Seconds())) - logrus.Infof("Resource limits: maxConcurrentReads=%d, rateLimitFPS=%d, hardMemoryMB=%d", - rm.maxConcurrentReads, rm.rateLimitFilesPerSec, rm.hardMemoryLimitMB) - logrus.Infof("Resource features: gracefulDegradation=%v, monitoring=%v", - rm.enableGracefulDegr, rm.enableResourceMon) - } else { - logrus.Info("Resource limits disabled") - } -} - -// Close cleans up the resource monitor. -func (rm *ResourceMonitor) Close() { - if rm.rateLimiter != nil { - rm.rateLimiter.Stop() - } -} - -// rateLimiterRefill refills the rate limiting channel periodically. -func (rm *ResourceMonitor) rateLimiterRefill() { - for range rm.rateLimiter.C { - select { - case rm.rateLimitChan <- struct{}{}: - default: - // Channel is full, skip - } - } -} - -// CreateFileProcessingContext creates a context with file processing timeout. -func (rm *ResourceMonitor) CreateFileProcessingContext(parent context.Context) (context.Context, context.CancelFunc) { - if !rm.enabled || rm.fileProcessingTimeout <= 0 { - return parent, func() {} - } - return context.WithTimeout(parent, rm.fileProcessingTimeout) -} - -// CreateOverallProcessingContext creates a context with overall processing timeout. -func (rm *ResourceMonitor) CreateOverallProcessingContext(parent context.Context) (context.Context, context.CancelFunc) { - if !rm.enabled || rm.overallTimeout <= 0 { - return parent, func() {} - } - return context.WithTimeout(parent, rm.overallTimeout) -} diff --git a/fileproc/resource_monitor_concurrency.go b/fileproc/resource_monitor_concurrency.go new file mode 100644 index 0000000..4d1789b --- /dev/null +++ b/fileproc/resource_monitor_concurrency.go @@ -0,0 +1,59 @@ +package fileproc + +import ( + "context" + "sync/atomic" + "time" +) + +// AcquireReadSlot attempts to acquire a slot for concurrent file reading. +func (rm *ResourceMonitor) AcquireReadSlot(ctx context.Context) error { + if !rm.enabled { + return nil + } + + // Wait for available read slot + for { + currentReads := atomic.LoadInt64(&rm.concurrentReads) + if currentReads < int64(rm.maxConcurrentReads) { + if atomic.CompareAndSwapInt64(&rm.concurrentReads, currentReads, currentReads+1) { + break + } + // CAS failed, retry + continue + } + + // Wait and retry + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(time.Millisecond): + // Continue loop + } + } + + return nil +} + +// ReleaseReadSlot releases a concurrent reading slot. +func (rm *ResourceMonitor) ReleaseReadSlot() { + if rm.enabled { + atomic.AddInt64(&rm.concurrentReads, -1) + } +} + +// CreateFileProcessingContext creates a context with file processing timeout. +func (rm *ResourceMonitor) CreateFileProcessingContext(parent context.Context) (context.Context, context.CancelFunc) { + if !rm.enabled || rm.fileProcessingTimeout <= 0 { + return parent, func() {} + } + return context.WithTimeout(parent, rm.fileProcessingTimeout) +} + +// CreateOverallProcessingContext creates a context with overall processing timeout. +func (rm *ResourceMonitor) CreateOverallProcessingContext(parent context.Context) (context.Context, context.CancelFunc) { + if !rm.enabled || rm.overallTimeout <= 0 { + return parent, func() {} + } + return context.WithTimeout(parent, rm.overallTimeout) +} \ No newline at end of file diff --git a/fileproc/resource_monitor_concurrency_test.go b/fileproc/resource_monitor_concurrency_test.go new file mode 100644 index 0000000..566b037 --- /dev/null +++ b/fileproc/resource_monitor_concurrency_test.go @@ -0,0 +1,95 @@ +package fileproc + +import ( + "context" + "testing" + "time" + + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/testutil" +) + +func TestResourceMonitor_ConcurrentReadsLimit(t *testing.T) { + testutil.ResetViperConfig(t, "") + + // Set a low concurrent reads limit for testing + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.maxConcurrentReads", 2) + + rm := NewResourceMonitor() + defer rm.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + // First read slot should succeed + err := rm.AcquireReadSlot(ctx) + if err != nil { + t.Errorf("Expected no error for first read slot, got %v", err) + } + + // Second read slot should succeed + err = rm.AcquireReadSlot(ctx) + if err != nil { + t.Errorf("Expected no error for second read slot, got %v", err) + } + + // Third read slot should timeout (context deadline exceeded) + err = rm.AcquireReadSlot(ctx) + if err == nil { + t.Error("Expected timeout error for third read slot, got nil") + } + + // Release one slot and try again + rm.ReleaseReadSlot() + + // Create new context for the next attempt + ctx2, cancel2 := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel2() + + err = rm.AcquireReadSlot(ctx2) + if err != nil { + t.Errorf("Expected no error after releasing a slot, got %v", err) + } + + // Clean up remaining slots + rm.ReleaseReadSlot() + rm.ReleaseReadSlot() +} + +func TestResourceMonitor_TimeoutContexts(t *testing.T) { + testutil.ResetViperConfig(t, "") + + // Set short timeouts for testing + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.fileProcessingTimeoutSec", 1) // 1 second + viper.Set("resourceLimits.overallTimeoutSec", 2) // 2 seconds + + rm := NewResourceMonitor() + defer rm.Close() + + parentCtx := context.Background() + + // Test file processing context + fileCtx, fileCancel := rm.CreateFileProcessingContext(parentCtx) + defer fileCancel() + + deadline, ok := fileCtx.Deadline() + if !ok { + t.Error("Expected file processing context to have a deadline") + } else if time.Until(deadline) > time.Second+100*time.Millisecond { + t.Error("File processing timeout appears to be too long") + } + + // Test overall processing context + overallCtx, overallCancel := rm.CreateOverallProcessingContext(parentCtx) + defer overallCancel() + + deadline, ok = overallCtx.Deadline() + if !ok { + t.Error("Expected overall processing context to have a deadline") + } else if time.Until(deadline) > 2*time.Second+100*time.Millisecond { + t.Error("Overall processing timeout appears to be too long") + } +} \ No newline at end of file diff --git a/fileproc/resource_monitor_integration_test.go b/fileproc/resource_monitor_integration_test.go new file mode 100644 index 0000000..4dd2915 --- /dev/null +++ b/fileproc/resource_monitor_integration_test.go @@ -0,0 +1,81 @@ +package fileproc + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/testutil" +) + +func TestResourceMonitor_Integration(t *testing.T) { + // Create temporary test directory + tempDir := t.TempDir() + + // Create test files + testFiles := []string{"test1.txt", "test2.txt", "test3.txt"} + for _, filename := range testFiles { + testutil.CreateTestFile(t, tempDir, filename, []byte("test content")) + } + + testutil.ResetViperConfig(t, "") + + // Configure resource limits + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.maxFiles", 5) + viper.Set("resourceLimits.maxTotalSize", 1024*1024) // 1MB + viper.Set("resourceLimits.fileProcessingTimeoutSec", 10) + viper.Set("resourceLimits.maxConcurrentReads", 3) + + rm := NewResourceMonitor() + defer rm.Close() + + ctx := context.Background() + + // Test file processing workflow + for _, filename := range testFiles { + filePath := filepath.Join(tempDir, filename) + fileInfo, err := os.Stat(filePath) + if err != nil { + t.Fatalf("Failed to stat test file %s: %v", filePath, err) + } + + // Validate file can be processed + err = rm.ValidateFileProcessing(filePath, fileInfo.Size()) + if err != nil { + t.Errorf("Failed to validate file %s: %v", filePath, err) + continue + } + + // Acquire read slot + err = rm.AcquireReadSlot(ctx) + if err != nil { + t.Errorf("Failed to acquire read slot for %s: %v", filePath, err) + continue + } + + // Check memory limits + err = rm.CheckHardMemoryLimit() + if err != nil { + t.Errorf("Memory limit check failed for %s: %v", filePath, err) + } + + // Record processing + rm.RecordFileProcessed(fileInfo.Size()) + + // Release read slot + rm.ReleaseReadSlot() + } + + // Verify final metrics + metrics := rm.GetMetrics() + if metrics.FilesProcessed != int64(len(testFiles)) { + t.Errorf("Expected %d files processed, got %d", len(testFiles), metrics.FilesProcessed) + } + + // Test resource limit logging + rm.LogResourceInfo() +} \ No newline at end of file diff --git a/fileproc/resource_monitor_metrics.go b/fileproc/resource_monitor_metrics.go new file mode 100644 index 0000000..75e767c --- /dev/null +++ b/fileproc/resource_monitor_metrics.go @@ -0,0 +1,79 @@ +package fileproc + +import ( + "runtime" + "sync/atomic" + "time" + + "github.com/sirupsen/logrus" +) + +// RecordFileProcessed records that a file has been successfully processed. +func (rm *ResourceMonitor) RecordFileProcessed(fileSize int64) { + if rm.enabled { + atomic.AddInt64(&rm.filesProcessed, 1) + atomic.AddInt64(&rm.totalSizeProcessed, fileSize) + } +} + +// GetMetrics returns current resource usage metrics. +func (rm *ResourceMonitor) GetMetrics() ResourceMetrics { + if !rm.enableResourceMon { + return ResourceMetrics{} + } + + rm.mu.RLock() + defer rm.mu.RUnlock() + + var m runtime.MemStats + runtime.ReadMemStats(&m) + + filesProcessed := atomic.LoadInt64(&rm.filesProcessed) + totalSize := atomic.LoadInt64(&rm.totalSizeProcessed) + duration := time.Since(rm.startTime) + + avgFileSize := float64(0) + if filesProcessed > 0 { + avgFileSize = float64(totalSize) / float64(filesProcessed) + } + + processingRate := float64(0) + if duration.Seconds() > 0 { + processingRate = float64(filesProcessed) / duration.Seconds() + } + + // Collect violations + violations := make([]string, 0, len(rm.violationLogged)) + for violation := range rm.violationLogged { + violations = append(violations, violation) + } + + return ResourceMetrics{ + FilesProcessed: filesProcessed, + TotalSizeProcessed: totalSize, + ConcurrentReads: atomic.LoadInt64(&rm.concurrentReads), + ProcessingDuration: duration, + AverageFileSize: avgFileSize, + ProcessingRate: processingRate, + MemoryUsageMB: int64(m.Alloc) / 1024 / 1024, + MaxMemoryUsageMB: int64(rm.hardMemoryLimitMB), + ViolationsDetected: violations, + DegradationActive: rm.degradationActive, + EmergencyStopActive: rm.emergencyStopRequested, + LastUpdated: time.Now(), + } +} + +// LogResourceInfo logs current resource limit configuration. +func (rm *ResourceMonitor) LogResourceInfo() { + if rm.enabled { + logrus.Infof("Resource limits enabled: maxFiles=%d, maxTotalSize=%dMB, fileTimeout=%ds, overallTimeout=%ds", + rm.maxFiles, rm.maxTotalSize/1024/1024, int(rm.fileProcessingTimeout.Seconds()), int(rm.overallTimeout.Seconds())) + logrus.Infof("Resource limits: maxConcurrentReads=%d, rateLimitFPS=%d, hardMemoryMB=%d", + rm.maxConcurrentReads, rm.rateLimitFilesPerSec, rm.hardMemoryLimitMB) + logrus.Infof("Resource features: gracefulDegradation=%v, monitoring=%v", + rm.enableGracefulDegr, rm.enableResourceMon) + } else { + logrus.Info("Resource limits disabled") + } +} \ No newline at end of file diff --git a/fileproc/resource_monitor_metrics_test.go b/fileproc/resource_monitor_metrics_test.go new file mode 100644 index 0000000..b804581 --- /dev/null +++ b/fileproc/resource_monitor_metrics_test.go @@ -0,0 +1,49 @@ +package fileproc + +import ( + "testing" + "time" + + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/testutil" +) + +func TestResourceMonitor_Metrics(t *testing.T) { + testutil.ResetViperConfig(t, "") + + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.enableResourceMonitoring", true) + + rm := NewResourceMonitor() + defer rm.Close() + + // Process some files to generate metrics + rm.RecordFileProcessed(1000) + rm.RecordFileProcessed(2000) + rm.RecordFileProcessed(500) + + metrics := rm.GetMetrics() + + // Verify metrics + if metrics.FilesProcessed != 3 { + t.Errorf("Expected 3 files processed, got %d", metrics.FilesProcessed) + } + + if metrics.TotalSizeProcessed != 3500 { + t.Errorf("Expected total size 3500, got %d", metrics.TotalSizeProcessed) + } + + expectedAvgSize := float64(3500) / float64(3) + if metrics.AverageFileSize != expectedAvgSize { + t.Errorf("Expected average file size %.2f, got %.2f", expectedAvgSize, metrics.AverageFileSize) + } + + if metrics.ProcessingRate <= 0 { + t.Error("Expected positive processing rate") + } + + if !metrics.LastUpdated.After(time.Now().Add(-time.Second)) { + t.Error("Expected recent LastUpdated timestamp") + } +} \ No newline at end of file diff --git a/fileproc/resource_monitor_rate_limiting.go b/fileproc/resource_monitor_rate_limiting.go new file mode 100644 index 0000000..640eab4 --- /dev/null +++ b/fileproc/resource_monitor_rate_limiting.go @@ -0,0 +1,36 @@ +package fileproc + +import ( + "context" + "time" + + "github.com/sirupsen/logrus" +) + +// WaitForRateLimit waits for rate limiting if enabled. +func (rm *ResourceMonitor) WaitForRateLimit(ctx context.Context) error { + if !rm.enabled || rm.rateLimitFilesPerSec <= 0 { + return nil + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-rm.rateLimitChan: + return nil + case <-time.After(time.Second): // Fallback timeout + logrus.Warn("Rate limiting timeout exceeded, continuing without rate limit") + return nil + } +} + +// rateLimiterRefill refills the rate limiting channel periodically. +func (rm *ResourceMonitor) rateLimiterRefill() { + for range rm.rateLimiter.C { + select { + case rm.rateLimitChan <- struct{}{}: + default: + // Channel is full, skip + } + } +} \ No newline at end of file diff --git a/fileproc/resource_monitor_rate_limiting_test.go b/fileproc/resource_monitor_rate_limiting_test.go new file mode 100644 index 0000000..d5c791d --- /dev/null +++ b/fileproc/resource_monitor_rate_limiting_test.go @@ -0,0 +1,40 @@ +package fileproc + +import ( + "context" + "testing" + "time" + + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/testutil" +) + +func TestResourceMonitor_RateLimiting(t *testing.T) { + testutil.ResetViperConfig(t, "") + + // Enable rate limiting with a low rate for testing + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.rateLimitFilesPerSec", 5) // 5 files per second + + rm := NewResourceMonitor() + defer rm.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + // First few requests should succeed quickly + start := time.Now() + for i := 0; i < 3; i++ { + err := rm.WaitForRateLimit(ctx) + if err != nil { + t.Errorf("Expected no error for rate limit wait %d, got %v", i, err) + } + } + + // Should have taken some time due to rate limiting + duration := time.Since(start) + if duration < 200*time.Millisecond { + t.Logf("Rate limiting may not be working as expected, took only %v", duration) + } +} \ No newline at end of file diff --git a/fileproc/resource_monitor_state.go b/fileproc/resource_monitor_state.go new file mode 100644 index 0000000..e1abef7 --- /dev/null +++ b/fileproc/resource_monitor_state.go @@ -0,0 +1,22 @@ +package fileproc + +// IsEmergencyStopActive returns whether emergency stop is active. +func (rm *ResourceMonitor) IsEmergencyStopActive() bool { + rm.mu.RLock() + defer rm.mu.RUnlock() + return rm.emergencyStopRequested +} + +// IsDegradationActive returns whether degradation mode is active. +func (rm *ResourceMonitor) IsDegradationActive() bool { + rm.mu.RLock() + defer rm.mu.RUnlock() + return rm.degradationActive +} + +// Close cleans up the resource monitor. +func (rm *ResourceMonitor) Close() { + if rm.rateLimiter != nil { + rm.rateLimiter.Stop() + } +} \ No newline at end of file diff --git a/fileproc/resource_monitor_test.go b/fileproc/resource_monitor_test.go deleted file mode 100644 index 14348ec..0000000 --- a/fileproc/resource_monitor_test.go +++ /dev/null @@ -1,377 +0,0 @@ -// Package fileproc provides tests for resource monitoring functionality. -package fileproc - -import ( - "context" - "os" - "path/filepath" - "testing" - "time" - - "github.com/spf13/viper" - - "github.com/ivuorinen/gibidify/config" - "github.com/ivuorinen/gibidify/testutil" - "github.com/ivuorinen/gibidify/utils" -) - -func TestResourceMonitor_NewResourceMonitor(t *testing.T) { - // Reset viper for clean test state - testutil.ResetViperConfig(t, "") - - rm := NewResourceMonitor() - if rm == nil { - t.Fatal("NewResourceMonitor() returned nil") - } - - // Test default values are set correctly - if !rm.enabled { - t.Error("Expected resource monitor to be enabled by default") - } - - if rm.maxFiles != config.DefaultMaxFiles { - t.Errorf("Expected maxFiles to be %d, got %d", config.DefaultMaxFiles, rm.maxFiles) - } - - if rm.maxTotalSize != config.DefaultMaxTotalSize { - t.Errorf("Expected maxTotalSize to be %d, got %d", config.DefaultMaxTotalSize, rm.maxTotalSize) - } - - if rm.fileProcessingTimeout != time.Duration(config.DefaultFileProcessingTimeoutSec)*time.Second { - t.Errorf("Expected fileProcessingTimeout to be %v, got %v", - time.Duration(config.DefaultFileProcessingTimeoutSec)*time.Second, rm.fileProcessingTimeout) - } - - // Clean up - rm.Close() -} - -func TestResourceMonitor_DisabledResourceLimits(t *testing.T) { - // Reset viper for clean test state - testutil.ResetViperConfig(t, "") - - // Set resource limits disabled - viper.Set("resourceLimits.enabled", false) - - rm := NewResourceMonitor() - defer rm.Close() - - // Test that validation passes when disabled - err := rm.ValidateFileProcessing("/tmp/test.txt", 1000) - if err != nil { - t.Errorf("Expected no error when resource limits disabled, got %v", err) - } - - // Test that read slot acquisition works when disabled - ctx := context.Background() - err = rm.AcquireReadSlot(ctx) - if err != nil { - t.Errorf("Expected no error when acquiring read slot with disabled limits, got %v", err) - } - rm.ReleaseReadSlot() - - // Test that rate limiting is bypassed when disabled - err = rm.WaitForRateLimit(ctx) - if err != nil { - t.Errorf("Expected no error when rate limiting disabled, got %v", err) - } -} - -func TestResourceMonitor_FileCountLimit(t *testing.T) { - testutil.ResetViperConfig(t, "") - - // Set a very low file count limit for testing - viper.Set("resourceLimits.enabled", true) - viper.Set("resourceLimits.maxFiles", 2) - - rm := NewResourceMonitor() - defer rm.Close() - - // First file should pass - err := rm.ValidateFileProcessing("/tmp/file1.txt", 100) - if err != nil { - t.Errorf("Expected no error for first file, got %v", err) - } - rm.RecordFileProcessed(100) - - // Second file should pass - err = rm.ValidateFileProcessing("/tmp/file2.txt", 100) - if err != nil { - t.Errorf("Expected no error for second file, got %v", err) - } - rm.RecordFileProcessed(100) - - // Third file should fail - err = rm.ValidateFileProcessing("/tmp/file3.txt", 100) - if err == nil { - t.Error("Expected error for third file (exceeds limit), got nil") - } - - // Verify it's the correct error type - structErr, ok := err.(*utils.StructuredError) - if !ok { - t.Errorf("Expected StructuredError, got %T", err) - } else if structErr.Code != utils.CodeResourceLimitFiles { - t.Errorf("Expected error code %s, got %s", utils.CodeResourceLimitFiles, structErr.Code) - } -} - -func TestResourceMonitor_TotalSizeLimit(t *testing.T) { - testutil.ResetViperConfig(t, "") - - // Set a low total size limit for testing (1KB) - viper.Set("resourceLimits.enabled", true) - viper.Set("resourceLimits.maxTotalSize", 1024) - - rm := NewResourceMonitor() - defer rm.Close() - - // First small file should pass - err := rm.ValidateFileProcessing("/tmp/small.txt", 500) - if err != nil { - t.Errorf("Expected no error for small file, got %v", err) - } - rm.RecordFileProcessed(500) - - // Second small file should pass - err = rm.ValidateFileProcessing("/tmp/small2.txt", 400) - if err != nil { - t.Errorf("Expected no error for second small file, got %v", err) - } - rm.RecordFileProcessed(400) - - // Large file that would exceed limit should fail - err = rm.ValidateFileProcessing("/tmp/large.txt", 200) - if err == nil { - t.Error("Expected error for file that would exceed size limit, got nil") - } - - // Verify it's the correct error type - structErr, ok := err.(*utils.StructuredError) - if !ok { - t.Errorf("Expected StructuredError, got %T", err) - } else if structErr.Code != utils.CodeResourceLimitTotalSize { - t.Errorf("Expected error code %s, got %s", utils.CodeResourceLimitTotalSize, structErr.Code) - } -} - -func TestResourceMonitor_ConcurrentReadsLimit(t *testing.T) { - testutil.ResetViperConfig(t, "") - - // Set a low concurrent reads limit for testing - viper.Set("resourceLimits.enabled", true) - viper.Set("resourceLimits.maxConcurrentReads", 2) - - rm := NewResourceMonitor() - defer rm.Close() - - ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) - defer cancel() - - // First read slot should succeed - err := rm.AcquireReadSlot(ctx) - if err != nil { - t.Errorf("Expected no error for first read slot, got %v", err) - } - - // Second read slot should succeed - err = rm.AcquireReadSlot(ctx) - if err != nil { - t.Errorf("Expected no error for second read slot, got %v", err) - } - - // Third read slot should timeout (context deadline exceeded) - err = rm.AcquireReadSlot(ctx) - if err == nil { - t.Error("Expected timeout error for third read slot, got nil") - } - - // Release one slot and try again - rm.ReleaseReadSlot() - - // Create new context for the next attempt - ctx2, cancel2 := context.WithTimeout(context.Background(), 100*time.Millisecond) - defer cancel2() - - err = rm.AcquireReadSlot(ctx2) - if err != nil { - t.Errorf("Expected no error after releasing a slot, got %v", err) - } - - // Clean up remaining slots - rm.ReleaseReadSlot() - rm.ReleaseReadSlot() -} - -func TestResourceMonitor_TimeoutContexts(t *testing.T) { - testutil.ResetViperConfig(t, "") - - // Set short timeouts for testing - viper.Set("resourceLimits.enabled", true) - viper.Set("resourceLimits.fileProcessingTimeoutSec", 1) // 1 second - viper.Set("resourceLimits.overallTimeoutSec", 2) // 2 seconds - - rm := NewResourceMonitor() - defer rm.Close() - - parentCtx := context.Background() - - // Test file processing context - fileCtx, fileCancel := rm.CreateFileProcessingContext(parentCtx) - defer fileCancel() - - deadline, ok := fileCtx.Deadline() - if !ok { - t.Error("Expected file processing context to have a deadline") - } else if time.Until(deadline) > time.Second+100*time.Millisecond { - t.Error("File processing timeout appears to be too long") - } - - // Test overall processing context - overallCtx, overallCancel := rm.CreateOverallProcessingContext(parentCtx) - defer overallCancel() - - deadline, ok = overallCtx.Deadline() - if !ok { - t.Error("Expected overall processing context to have a deadline") - } else if time.Until(deadline) > 2*time.Second+100*time.Millisecond { - t.Error("Overall processing timeout appears to be too long") - } -} - -func TestResourceMonitor_RateLimiting(t *testing.T) { - testutil.ResetViperConfig(t, "") - - // Enable rate limiting with a low rate for testing - viper.Set("resourceLimits.enabled", true) - viper.Set("resourceLimits.rateLimitFilesPerSec", 5) // 5 files per second - - rm := NewResourceMonitor() - defer rm.Close() - - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) - defer cancel() - - // First few requests should succeed quickly - start := time.Now() - for i := 0; i < 3; i++ { - err := rm.WaitForRateLimit(ctx) - if err != nil { - t.Errorf("Expected no error for rate limit wait %d, got %v", i, err) - } - } - - // Should have taken some time due to rate limiting - duration := time.Since(start) - if duration < 200*time.Millisecond { - t.Logf("Rate limiting may not be working as expected, took only %v", duration) - } -} - -func TestResourceMonitor_Metrics(t *testing.T) { - testutil.ResetViperConfig(t, "") - - viper.Set("resourceLimits.enabled", true) - viper.Set("resourceLimits.enableResourceMonitoring", true) - - rm := NewResourceMonitor() - defer rm.Close() - - // Process some files to generate metrics - rm.RecordFileProcessed(1000) - rm.RecordFileProcessed(2000) - rm.RecordFileProcessed(500) - - metrics := rm.GetMetrics() - - // Verify metrics - if metrics.FilesProcessed != 3 { - t.Errorf("Expected 3 files processed, got %d", metrics.FilesProcessed) - } - - if metrics.TotalSizeProcessed != 3500 { - t.Errorf("Expected total size 3500, got %d", metrics.TotalSizeProcessed) - } - - expectedAvgSize := float64(3500) / float64(3) - if metrics.AverageFileSize != expectedAvgSize { - t.Errorf("Expected average file size %.2f, got %.2f", expectedAvgSize, metrics.AverageFileSize) - } - - if metrics.ProcessingRate <= 0 { - t.Error("Expected positive processing rate") - } - - if !metrics.LastUpdated.After(time.Now().Add(-time.Second)) { - t.Error("Expected recent LastUpdated timestamp") - } -} - -func TestResourceMonitor_Integration(t *testing.T) { - // Create temporary test directory - tempDir := t.TempDir() - - // Create test files - testFiles := []string{"test1.txt", "test2.txt", "test3.txt"} - for _, filename := range testFiles { - testutil.CreateTestFile(t, tempDir, filename, []byte("test content")) - } - - testutil.ResetViperConfig(t, "") - - // Configure resource limits - viper.Set("resourceLimits.enabled", true) - viper.Set("resourceLimits.maxFiles", 5) - viper.Set("resourceLimits.maxTotalSize", 1024*1024) // 1MB - viper.Set("resourceLimits.fileProcessingTimeoutSec", 10) - viper.Set("resourceLimits.maxConcurrentReads", 3) - - rm := NewResourceMonitor() - defer rm.Close() - - ctx := context.Background() - - // Test file processing workflow - for _, filename := range testFiles { - filePath := filepath.Join(tempDir, filename) - fileInfo, err := os.Stat(filePath) - if err != nil { - t.Fatalf("Failed to stat test file %s: %v", filePath, err) - } - - // Validate file can be processed - err = rm.ValidateFileProcessing(filePath, fileInfo.Size()) - if err != nil { - t.Errorf("Failed to validate file %s: %v", filePath, err) - continue - } - - // Acquire read slot - err = rm.AcquireReadSlot(ctx) - if err != nil { - t.Errorf("Failed to acquire read slot for %s: %v", filePath, err) - continue - } - - // Check memory limits - err = rm.CheckHardMemoryLimit() - if err != nil { - t.Errorf("Memory limit check failed for %s: %v", filePath, err) - } - - // Record processing - rm.RecordFileProcessed(fileInfo.Size()) - - // Release read slot - rm.ReleaseReadSlot() - } - - // Verify final metrics - metrics := rm.GetMetrics() - if metrics.FilesProcessed != int64(len(testFiles)) { - t.Errorf("Expected %d files processed, got %d", len(testFiles), metrics.FilesProcessed) - } - - // Test resource limit logging - rm.LogResourceInfo() -} \ No newline at end of file diff --git a/fileproc/resource_monitor_types.go b/fileproc/resource_monitor_types.go new file mode 100644 index 0000000..90461b9 --- /dev/null +++ b/fileproc/resource_monitor_types.go @@ -0,0 +1,108 @@ +package fileproc + +import ( + "sync" + "time" + + "github.com/ivuorinen/gibidify/config" +) + +// ResourceMonitor monitors resource usage and enforces limits to prevent DoS attacks. +type ResourceMonitor struct { + enabled bool + maxFiles int + maxTotalSize int64 + fileProcessingTimeout time.Duration + overallTimeout time.Duration + maxConcurrentReads int + rateLimitFilesPerSec int + hardMemoryLimitMB int + enableGracefulDegr bool + enableResourceMon bool + + // Current state tracking + filesProcessed int64 + totalSizeProcessed int64 + concurrentReads int64 + startTime time.Time + lastRateLimitCheck time.Time + hardMemoryLimitBytes int64 + + // Rate limiting + rateLimiter *time.Ticker + rateLimitChan chan struct{} + + // Synchronization + mu sync.RWMutex + violationLogged map[string]bool + degradationActive bool + emergencyStopRequested bool +} + +// ResourceMetrics holds comprehensive resource usage metrics. +type ResourceMetrics struct { + FilesProcessed int64 `json:"files_processed"` + TotalSizeProcessed int64 `json:"total_size_processed"` + ConcurrentReads int64 `json:"concurrent_reads"` + ProcessingDuration time.Duration `json:"processing_duration"` + AverageFileSize float64 `json:"average_file_size"` + ProcessingRate float64 `json:"processing_rate_files_per_sec"` + MemoryUsageMB int64 `json:"memory_usage_mb"` + MaxMemoryUsageMB int64 `json:"max_memory_usage_mb"` + ViolationsDetected []string `json:"violations_detected"` + DegradationActive bool `json:"degradation_active"` + EmergencyStopActive bool `json:"emergency_stop_active"` + LastUpdated time.Time `json:"last_updated"` +} + +// ResourceViolation represents a detected resource limit violation. +type ResourceViolation struct { + Type string `json:"type"` + Message string `json:"message"` + Current interface{} `json:"current"` + Limit interface{} `json:"limit"` + Timestamp time.Time `json:"timestamp"` + Context map[string]interface{} `json:"context"` +} + +// NewResourceMonitor creates a new resource monitor with configuration. +func NewResourceMonitor() *ResourceMonitor { + rm := &ResourceMonitor{ + enabled: config.GetResourceLimitsEnabled(), + maxFiles: config.GetMaxFiles(), + maxTotalSize: config.GetMaxTotalSize(), + fileProcessingTimeout: time.Duration(config.GetFileProcessingTimeoutSec()) * time.Second, + overallTimeout: time.Duration(config.GetOverallTimeoutSec()) * time.Second, + maxConcurrentReads: config.GetMaxConcurrentReads(), + rateLimitFilesPerSec: config.GetRateLimitFilesPerSec(), + hardMemoryLimitMB: config.GetHardMemoryLimitMB(), + enableGracefulDegr: config.GetEnableGracefulDegradation(), + enableResourceMon: config.GetEnableResourceMonitoring(), + startTime: time.Now(), + lastRateLimitCheck: time.Now(), + violationLogged: make(map[string]bool), + hardMemoryLimitBytes: int64(config.GetHardMemoryLimitMB()) * 1024 * 1024, + } + + // Initialize rate limiter if rate limiting is enabled + if rm.enabled && rm.rateLimitFilesPerSec > 0 { + interval := time.Second / time.Duration(rm.rateLimitFilesPerSec) + rm.rateLimiter = time.NewTicker(interval) + rm.rateLimitChan = make(chan struct{}, rm.rateLimitFilesPerSec) + + // Pre-fill the rate limit channel + for i := 0; i < rm.rateLimitFilesPerSec; i++ { + select { + case rm.rateLimitChan <- struct{}{}: + default: + goto rateLimitFull + } + } + rateLimitFull: + + // Start rate limiter refill goroutine + go rm.rateLimiterRefill() + } + + return rm +} \ No newline at end of file diff --git a/fileproc/resource_monitor_types_test.go b/fileproc/resource_monitor_types_test.go new file mode 100644 index 0000000..8686aca --- /dev/null +++ b/fileproc/resource_monitor_types_test.go @@ -0,0 +1,74 @@ +package fileproc + +import ( + "context" + "testing" + "time" + + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/testutil" +) + +func TestResourceMonitor_NewResourceMonitor(t *testing.T) { + // Reset viper for clean test state + testutil.ResetViperConfig(t, "") + + rm := NewResourceMonitor() + if rm == nil { + t.Fatal("NewResourceMonitor() returned nil") + } + + // Test default values are set correctly + if !rm.enabled { + t.Error("Expected resource monitor to be enabled by default") + } + + if rm.maxFiles != config.DefaultMaxFiles { + t.Errorf("Expected maxFiles to be %d, got %d", config.DefaultMaxFiles, rm.maxFiles) + } + + if rm.maxTotalSize != config.DefaultMaxTotalSize { + t.Errorf("Expected maxTotalSize to be %d, got %d", config.DefaultMaxTotalSize, rm.maxTotalSize) + } + + if rm.fileProcessingTimeout != time.Duration(config.DefaultFileProcessingTimeoutSec)*time.Second { + t.Errorf("Expected fileProcessingTimeout to be %v, got %v", + time.Duration(config.DefaultFileProcessingTimeoutSec)*time.Second, rm.fileProcessingTimeout) + } + + // Clean up + rm.Close() +} + +func TestResourceMonitor_DisabledResourceLimits(t *testing.T) { + // Reset viper for clean test state + testutil.ResetViperConfig(t, "") + + // Set resource limits disabled + viper.Set("resourceLimits.enabled", false) + + rm := NewResourceMonitor() + defer rm.Close() + + // Test that validation passes when disabled + err := rm.ValidateFileProcessing("/tmp/test.txt", 1000) + if err != nil { + t.Errorf("Expected no error when resource limits disabled, got %v", err) + } + + // Test that read slot acquisition works when disabled + ctx := context.Background() + err = rm.AcquireReadSlot(ctx) + if err != nil { + t.Errorf("Expected no error when acquiring read slot with disabled limits, got %v", err) + } + rm.ReleaseReadSlot() + + // Test that rate limiting is bypassed when disabled + err = rm.WaitForRateLimit(ctx) + if err != nil { + t.Errorf("Expected no error when rate limiting disabled, got %v", err) + } +} \ No newline at end of file diff --git a/fileproc/resource_monitor_validation.go b/fileproc/resource_monitor_validation.go new file mode 100644 index 0000000..f24dad8 --- /dev/null +++ b/fileproc/resource_monitor_validation.go @@ -0,0 +1,148 @@ +package fileproc + +import ( + "runtime" + "sync/atomic" + "time" + + "github.com/sirupsen/logrus" + + "github.com/ivuorinen/gibidify/utils" +) + +// ValidateFileProcessing checks if a file can be processed based on resource limits. +func (rm *ResourceMonitor) ValidateFileProcessing(filePath string, fileSize int64) error { + if !rm.enabled { + return nil + } + + rm.mu.RLock() + defer rm.mu.RUnlock() + + // Check if emergency stop is active + if rm.emergencyStopRequested { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitMemory, + "processing stopped due to emergency memory condition", + filePath, + map[string]interface{}{ + "emergency_stop_active": true, + }, + ) + } + + // Check file count limit + currentFiles := atomic.LoadInt64(&rm.filesProcessed) + if int(currentFiles) >= rm.maxFiles { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitFiles, + "maximum file count limit exceeded", + filePath, + map[string]interface{}{ + "current_files": currentFiles, + "max_files": rm.maxFiles, + }, + ) + } + + // Check total size limit + currentTotalSize := atomic.LoadInt64(&rm.totalSizeProcessed) + if currentTotalSize+fileSize > rm.maxTotalSize { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitTotalSize, + "maximum total size limit would be exceeded", + filePath, + map[string]interface{}{ + "current_total_size": currentTotalSize, + "file_size": fileSize, + "max_total_size": rm.maxTotalSize, + }, + ) + } + + // Check overall timeout + if time.Since(rm.startTime) > rm.overallTimeout { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitTimeout, + "overall processing timeout exceeded", + filePath, + map[string]interface{}{ + "processing_duration": time.Since(rm.startTime), + "overall_timeout": rm.overallTimeout, + }, + ) + } + + return nil +} + +// CheckHardMemoryLimit checks if hard memory limit is exceeded and takes action. +func (rm *ResourceMonitor) CheckHardMemoryLimit() error { + if !rm.enabled || rm.hardMemoryLimitMB <= 0 { + return nil + } + + var m runtime.MemStats + runtime.ReadMemStats(&m) + currentMemory := int64(m.Alloc) + + if currentMemory > rm.hardMemoryLimitBytes { + rm.mu.Lock() + defer rm.mu.Unlock() + + // Log violation if not already logged + violationKey := "hard_memory_limit" + if !rm.violationLogged[violationKey] { + logrus.Errorf("Hard memory limit exceeded: %dMB > %dMB", + currentMemory/1024/1024, rm.hardMemoryLimitMB) + rm.violationLogged[violationKey] = true + } + + if rm.enableGracefulDegr { + // Force garbage collection + runtime.GC() + + // Check again after GC + runtime.ReadMemStats(&m) + currentMemory = int64(m.Alloc) + + if currentMemory > rm.hardMemoryLimitBytes { + // Still over limit, activate emergency stop + rm.emergencyStopRequested = true + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitMemory, + "hard memory limit exceeded, emergency stop activated", + "", + map[string]interface{}{ + "current_memory_mb": currentMemory / 1024 / 1024, + "limit_mb": rm.hardMemoryLimitMB, + "emergency_stop": true, + }, + ) + } else { + // Memory freed by GC, continue with degradation + rm.degradationActive = true + logrus.Info("Memory freed by garbage collection, continuing with degradation mode") + } + } else { + // No graceful degradation, hard stop + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitMemory, + "hard memory limit exceeded", + "", + map[string]interface{}{ + "current_memory_mb": currentMemory / 1024 / 1024, + "limit_mb": rm.hardMemoryLimitMB, + }, + ) + } + } + + return nil +} \ No newline at end of file diff --git a/fileproc/resource_monitor_validation_test.go b/fileproc/resource_monitor_validation_test.go new file mode 100644 index 0000000..d45002c --- /dev/null +++ b/fileproc/resource_monitor_validation_test.go @@ -0,0 +1,88 @@ +package fileproc + +import ( + "testing" + + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/testutil" + "github.com/ivuorinen/gibidify/utils" +) + +func TestResourceMonitor_FileCountLimit(t *testing.T) { + testutil.ResetViperConfig(t, "") + + // Set a very low file count limit for testing + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.maxFiles", 2) + + rm := NewResourceMonitor() + defer rm.Close() + + // First file should pass + err := rm.ValidateFileProcessing("/tmp/file1.txt", 100) + if err != nil { + t.Errorf("Expected no error for first file, got %v", err) + } + rm.RecordFileProcessed(100) + + // Second file should pass + err = rm.ValidateFileProcessing("/tmp/file2.txt", 100) + if err != nil { + t.Errorf("Expected no error for second file, got %v", err) + } + rm.RecordFileProcessed(100) + + // Third file should fail + err = rm.ValidateFileProcessing("/tmp/file3.txt", 100) + if err == nil { + t.Error("Expected error for third file (exceeds limit), got nil") + } + + // Verify it's the correct error type + structErr, ok := err.(*utils.StructuredError) + if !ok { + t.Errorf("Expected StructuredError, got %T", err) + } else if structErr.Code != utils.CodeResourceLimitFiles { + t.Errorf("Expected error code %s, got %s", utils.CodeResourceLimitFiles, structErr.Code) + } +} + +func TestResourceMonitor_TotalSizeLimit(t *testing.T) { + testutil.ResetViperConfig(t, "") + + // Set a low total size limit for testing (1KB) + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.maxTotalSize", 1024) + + rm := NewResourceMonitor() + defer rm.Close() + + // First small file should pass + err := rm.ValidateFileProcessing("/tmp/small.txt", 500) + if err != nil { + t.Errorf("Expected no error for small file, got %v", err) + } + rm.RecordFileProcessed(500) + + // Second small file should pass + err = rm.ValidateFileProcessing("/tmp/small2.txt", 400) + if err != nil { + t.Errorf("Expected no error for second small file, got %v", err) + } + rm.RecordFileProcessed(400) + + // Large file that would exceed limit should fail + err = rm.ValidateFileProcessing("/tmp/large.txt", 200) + if err == nil { + t.Error("Expected error for file that would exceed size limit, got nil") + } + + // Verify it's the correct error type + structErr, ok := err.(*utils.StructuredError) + if !ok { + t.Errorf("Expected StructuredError, got %T", err) + } else if structErr.Code != utils.CodeResourceLimitTotalSize { + t.Errorf("Expected error code %s, got %s", utils.CodeResourceLimitTotalSize, structErr.Code) + } +} \ No newline at end of file diff --git a/testutil/concurrency_test.go b/testutil/concurrency_test.go new file mode 100644 index 0000000..50e0d35 --- /dev/null +++ b/testutil/concurrency_test.go @@ -0,0 +1,86 @@ +package testutil + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// Test thread safety of functions that might be called concurrently +func TestConcurrentOperations(t *testing.T) { + tempDir := t.TempDir() + done := make(chan bool) + + // Test concurrent file creation + for i := 0; i < 5; i++ { + go func(n int) { + CreateTestFile(t, tempDir, string(rune('a'+n))+".txt", []byte("content")) + done <- true + }(i) + } + + // Test concurrent directory creation + for i := 0; i < 5; i++ { + go func(n int) { + CreateTestDirectory(t, tempDir, "dir"+string(rune('0'+n))) + done <- true + }(i) + } + + // Wait for all goroutines + for i := 0; i < 10; i++ { + <-done + } +} + +// Benchmarks +func BenchmarkCreateTestFile(b *testing.B) { + tempDir := b.TempDir() + content := []byte("benchmark content") + + b.ResetTimer() + for i := 0; i < b.N; i++ { + // Use a unique filename for each iteration to avoid conflicts + filename := "bench" + string(rune(i%26+'a')) + ".txt" + filePath := filepath.Join(tempDir, filename) + if err := os.WriteFile(filePath, content, FilePermission); err != nil { + b.Fatalf("Failed to write file: %v", err) + } + } +} + +func BenchmarkCreateTestFiles(b *testing.B) { + tempDir := b.TempDir() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + // Create specs with unique names for each iteration + specs := []FileSpec{ + {Name: "file1_" + string(rune(i%26+'a')) + ".txt", Content: "content1"}, + {Name: "file2_" + string(rune(i%26+'a')) + ".txt", Content: "content2"}, + {Name: "file3_" + string(rune(i%26+'a')) + ".txt", Content: "content3"}, + } + + for _, spec := range specs { + filePath := filepath.Join(tempDir, spec.Name) + if err := os.WriteFile(filePath, []byte(spec.Content), FilePermission); err != nil { + b.Fatalf("Failed to write file: %v", err) + } + } + } +} + +func BenchmarkVerifyContentContains(b *testing.B) { + content := strings.Repeat("test content with various words ", 100) + expected := []string{"test", "content", "various", "words"} + + b.ResetTimer() + for i := 0; i < b.N; i++ { + // We can't use the actual function in benchmark since it needs testing.T + // So we'll benchmark the core logic + for _, exp := range expected { + _ = strings.Contains(content, exp) + } + } +} \ No newline at end of file diff --git a/testutil/config_test.go b/testutil/config_test.go new file mode 100644 index 0000000..d143ca5 --- /dev/null +++ b/testutil/config_test.go @@ -0,0 +1,132 @@ +package testutil + +import ( + "os" + "testing" + + "github.com/spf13/viper" +) + +func TestResetViperConfig(t *testing.T) { + tests := []struct { + name string + configPath string + preSetup func() + verify func(t *testing.T) + }{ + { + name: "reset with empty config path", + configPath: "", + preSetup: func() { + viper.Set("test.key", "value") + }, + verify: func(t *testing.T) { + if viper.IsSet("test.key") { + t.Error("Viper config not reset properly") + } + }, + }, + { + name: "reset with config path", + configPath: t.TempDir(), + preSetup: func() { + viper.Set("test.key", "value") + }, + verify: func(t *testing.T) { + if viper.IsSet("test.key") { + t.Error("Viper config not reset properly") + } + // Verify config path was added + paths := viper.ConfigFileUsed() + if paths == "" { + // This is expected as no config file exists + return + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.preSetup() + ResetViperConfig(t, tt.configPath) + tt.verify(t) + }) + } +} + +func TestSetupCLIArgs(t *testing.T) { + // Save original args + originalArgs := os.Args + defer func() { + os.Args = originalArgs + }() + + tests := []struct { + name string + srcDir string + outFile string + prefix string + suffix string + concurrency int + wantLen int + }{ + { + name: "basic CLI args", + srcDir: "/src", + outFile: "/out.txt", + prefix: "PREFIX", + suffix: "SUFFIX", + concurrency: 4, + wantLen: 11, + }, + { + name: "empty strings", + srcDir: "", + outFile: "", + prefix: "", + suffix: "", + concurrency: 1, + wantLen: 11, + }, + { + name: "special characters in args", + srcDir: "/path with spaces/src", + outFile: "/path/to/output file.txt", + prefix: "Prefix with\nnewline", + suffix: "Suffix with\ttab", + concurrency: 8, + wantLen: 11, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + SetupCLIArgs(tt.srcDir, tt.outFile, tt.prefix, tt.suffix, tt.concurrency) + + if len(os.Args) != tt.wantLen { + t.Errorf("os.Args length = %d, want %d", len(os.Args), tt.wantLen) + } + + // Verify specific args + if os.Args[0] != "gibidify" { + t.Errorf("Program name = %s, want gibidify", os.Args[0]) + } + if os.Args[2] != tt.srcDir { + t.Errorf("Source dir = %s, want %s", os.Args[2], tt.srcDir) + } + if os.Args[4] != tt.outFile { + t.Errorf("Output file = %s, want %s", os.Args[4], tt.outFile) + } + if os.Args[6] != tt.prefix { + t.Errorf("Prefix = %s, want %s", os.Args[6], tt.prefix) + } + if os.Args[8] != tt.suffix { + t.Errorf("Suffix = %s, want %s", os.Args[8], tt.suffix) + } + if os.Args[10] != string(rune(tt.concurrency+'0')) { + t.Errorf("Concurrency = %s, want %d", os.Args[10], tt.concurrency) + } + }) + } +} \ No newline at end of file diff --git a/testutil/file_creation_test.go b/testutil/file_creation_test.go new file mode 100644 index 0000000..eb0204e --- /dev/null +++ b/testutil/file_creation_test.go @@ -0,0 +1,286 @@ +package testutil + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestCreateTestFile(t *testing.T) { + tests := []struct { + name string + dir string + filename string + content []byte + wantErr bool + }{ + { + name: "create simple test file", + filename: "test.txt", + content: []byte("hello world"), + wantErr: false, + }, + { + name: "create file with empty content", + filename: "empty.txt", + content: []byte{}, + wantErr: false, + }, + { + name: "create file with binary content", + filename: "binary.bin", + content: []byte{0x00, 0xFF, 0x42}, + wantErr: false, + }, + { + name: "create file with subdirectory", + filename: "subdir/test.txt", + content: []byte("nested file"), + wantErr: false, + }, + { + name: "create file with special characters", + filename: "special-file_123.go", + content: []byte("package main"), + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Use a temporary directory for each test + tempDir := t.TempDir() + if tt.dir == "" { + tt.dir = tempDir + } + + // Create subdirectory if needed + if strings.Contains(tt.filename, "/") { + subdir := filepath.Join(tt.dir, filepath.Dir(tt.filename)) + if err := os.MkdirAll(subdir, DirPermission); err != nil { + t.Fatalf("Failed to create subdirectory: %v", err) + } + } + + // Test CreateTestFile + filePath := CreateTestFile(t, tt.dir, tt.filename, tt.content) + + // Verify file exists + info, err := os.Stat(filePath) + if err != nil { + t.Fatalf("Created file does not exist: %v", err) + } + + // Verify it's a regular file + if !info.Mode().IsRegular() { + t.Errorf("Created path is not a regular file") + } + + // Verify permissions + if info.Mode().Perm() != FilePermission { + t.Errorf("File permissions = %v, want %v", info.Mode().Perm(), FilePermission) + } + + // Verify content + readContent, err := os.ReadFile(filePath) + if err != nil { + t.Fatalf("Failed to read created file: %v", err) + } + if string(readContent) != string(tt.content) { + t.Errorf("File content = %q, want %q", readContent, tt.content) + } + }) + } +} + +func TestCreateTempOutputFile(t *testing.T) { + tests := []struct { + name string + pattern string + }{ + { + name: "simple pattern", + pattern: "output-*.txt", + }, + { + name: "pattern with prefix only", + pattern: "test-", + }, + { + name: "pattern with suffix only", + pattern: "*.json", + }, + { + name: "empty pattern", + pattern: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + file, path := CreateTempOutputFile(t, tt.pattern) + defer CloseFile(t, file) + + // Verify file exists + info, err := os.Stat(path) + if err != nil { + t.Fatalf("Temp file does not exist: %v", err) + } + + // Verify it's a regular file + if !info.Mode().IsRegular() { + t.Errorf("Created path is not a regular file") + } + + // Verify we can write to it + testContent := []byte("test content") + if _, err := file.Write(testContent); err != nil { + t.Errorf("Failed to write to temp file: %v", err) + } + + // Verify the path is in a temp directory (any temp directory) + if !strings.Contains(path, os.TempDir()) { + t.Errorf("Temp file not in temp directory: %s", path) + } + }) + } +} + +func TestCreateTestDirectory(t *testing.T) { + tests := []struct { + name string + parent string + dir string + }{ + { + name: "simple directory", + dir: "testdir", + }, + { + name: "directory with special characters", + dir: "test-dir_123", + }, + { + name: "nested directory name", + dir: "nested/dir", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + if tt.parent == "" { + tt.parent = tempDir + } + + // For nested directories, create parent first + if strings.Contains(tt.dir, "/") { + parentPath := filepath.Join(tt.parent, filepath.Dir(tt.dir)) + if err := os.MkdirAll(parentPath, DirPermission); err != nil { + t.Fatalf("Failed to create parent directory: %v", err) + } + tt.dir = filepath.Base(tt.dir) + tt.parent = parentPath + } + + dirPath := CreateTestDirectory(t, tt.parent, tt.dir) + + // Verify directory exists + info, err := os.Stat(dirPath) + if err != nil { + t.Fatalf("Created directory does not exist: %v", err) + } + + // Verify it's a directory + if !info.IsDir() { + t.Errorf("Created path is not a directory") + } + + // Verify permissions + if info.Mode().Perm() != DirPermission { + t.Errorf("Directory permissions = %v, want %v", info.Mode().Perm(), DirPermission) + } + + // Verify we can create files in it + testFile := filepath.Join(dirPath, "test.txt") + if err := os.WriteFile(testFile, []byte("test"), FilePermission); err != nil { + t.Errorf("Cannot create file in directory: %v", err) + } + }) + } +} + +func TestCreateTestFiles(t *testing.T) { + tests := []struct { + name string + fileSpecs []FileSpec + wantCount int + }{ + { + name: "create multiple files", + fileSpecs: []FileSpec{ + {Name: "file1.txt", Content: "content1"}, + {Name: "file2.go", Content: "package main"}, + {Name: "file3.json", Content: `{"key": "value"}`}, + }, + wantCount: 3, + }, + { + name: "create files with subdirectories", + fileSpecs: []FileSpec{ + {Name: "src/main.go", Content: "package main"}, + {Name: "test/test.go", Content: "package test"}, + }, + wantCount: 2, + }, + { + name: "empty file specs", + fileSpecs: []FileSpec{}, + wantCount: 0, + }, + { + name: "files with empty content", + fileSpecs: []FileSpec{ + {Name: "empty1.txt", Content: ""}, + {Name: "empty2.txt", Content: ""}, + }, + wantCount: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rootDir := t.TempDir() + + // Create necessary subdirectories + for _, spec := range tt.fileSpecs { + if strings.Contains(spec.Name, "/") { + subdir := filepath.Join(rootDir, filepath.Dir(spec.Name)) + if err := os.MkdirAll(subdir, DirPermission); err != nil { + t.Fatalf("Failed to create subdirectory: %v", err) + } + } + } + + createdFiles := CreateTestFiles(t, rootDir, tt.fileSpecs) + + // Verify count + if len(createdFiles) != tt.wantCount { + t.Errorf("Created %d files, want %d", len(createdFiles), tt.wantCount) + } + + // Verify each file + for i, filePath := range createdFiles { + content, err := os.ReadFile(filePath) + if err != nil { + t.Errorf("Failed to read file %s: %v", filePath, err) + continue + } + if string(content) != tt.fileSpecs[i].Content { + t.Errorf("File %s content = %q, want %q", filePath, content, tt.fileSpecs[i].Content) + } + } + }) + } +} \ No newline at end of file diff --git a/testutil/testutil_test.go b/testutil/testutil_test.go deleted file mode 100644 index b2eaf30..0000000 --- a/testutil/testutil_test.go +++ /dev/null @@ -1,591 +0,0 @@ -package testutil - -import ( - "errors" - "os" - "path/filepath" - "strings" - "testing" - - "github.com/spf13/viper" -) - -func TestCreateTestFile(t *testing.T) { - tests := []struct { - name string - dir string - filename string - content []byte - wantErr bool - }{ - { - name: "create simple test file", - filename: "test.txt", - content: []byte("hello world"), - wantErr: false, - }, - { - name: "create file with empty content", - filename: "empty.txt", - content: []byte{}, - wantErr: false, - }, - { - name: "create file with binary content", - filename: "binary.bin", - content: []byte{0x00, 0xFF, 0x42}, - wantErr: false, - }, - { - name: "create file with subdirectory", - filename: "subdir/test.txt", - content: []byte("nested file"), - wantErr: false, - }, - { - name: "create file with special characters", - filename: "special-file_123.go", - content: []byte("package main"), - wantErr: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Use a temporary directory for each test - tempDir := t.TempDir() - if tt.dir == "" { - tt.dir = tempDir - } - - // Create subdirectory if needed - if strings.Contains(tt.filename, "/") { - subdir := filepath.Join(tt.dir, filepath.Dir(tt.filename)) - if err := os.MkdirAll(subdir, DirPermission); err != nil { - t.Fatalf("Failed to create subdirectory: %v", err) - } - } - - // Test CreateTestFile - filePath := CreateTestFile(t, tt.dir, tt.filename, tt.content) - - // Verify file exists - info, err := os.Stat(filePath) - if err != nil { - t.Fatalf("Created file does not exist: %v", err) - } - - // Verify it's a regular file - if !info.Mode().IsRegular() { - t.Errorf("Created path is not a regular file") - } - - // Verify permissions - if info.Mode().Perm() != FilePermission { - t.Errorf("File permissions = %v, want %v", info.Mode().Perm(), FilePermission) - } - - // Verify content - readContent, err := os.ReadFile(filePath) - if err != nil { - t.Fatalf("Failed to read created file: %v", err) - } - if string(readContent) != string(tt.content) { - t.Errorf("File content = %q, want %q", readContent, tt.content) - } - }) - } -} - -func TestCreateTempOutputFile(t *testing.T) { - tests := []struct { - name string - pattern string - }{ - { - name: "simple pattern", - pattern: "output-*.txt", - }, - { - name: "pattern with prefix only", - pattern: "test-", - }, - { - name: "pattern with suffix only", - pattern: "*.json", - }, - { - name: "empty pattern", - pattern: "", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - file, path := CreateTempOutputFile(t, tt.pattern) - defer CloseFile(t, file) - - // Verify file exists - info, err := os.Stat(path) - if err != nil { - t.Fatalf("Temp file does not exist: %v", err) - } - - // Verify it's a regular file - if !info.Mode().IsRegular() { - t.Errorf("Created path is not a regular file") - } - - // Verify we can write to it - testContent := []byte("test content") - if _, err := file.Write(testContent); err != nil { - t.Errorf("Failed to write to temp file: %v", err) - } - - // Verify the path is in a temp directory (any temp directory) - if !strings.Contains(path, os.TempDir()) { - t.Errorf("Temp file not in temp directory: %s", path) - } - }) - } -} - -func TestCreateTestDirectory(t *testing.T) { - tests := []struct { - name string - parent string - dir string - }{ - { - name: "simple directory", - dir: "testdir", - }, - { - name: "directory with special characters", - dir: "test-dir_123", - }, - { - name: "nested directory name", - dir: "nested/dir", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tempDir := t.TempDir() - if tt.parent == "" { - tt.parent = tempDir - } - - // For nested directories, create parent first - if strings.Contains(tt.dir, "/") { - parentPath := filepath.Join(tt.parent, filepath.Dir(tt.dir)) - if err := os.MkdirAll(parentPath, DirPermission); err != nil { - t.Fatalf("Failed to create parent directory: %v", err) - } - tt.dir = filepath.Base(tt.dir) - tt.parent = parentPath - } - - dirPath := CreateTestDirectory(t, tt.parent, tt.dir) - - // Verify directory exists - info, err := os.Stat(dirPath) - if err != nil { - t.Fatalf("Created directory does not exist: %v", err) - } - - // Verify it's a directory - if !info.IsDir() { - t.Errorf("Created path is not a directory") - } - - // Verify permissions - if info.Mode().Perm() != DirPermission { - t.Errorf("Directory permissions = %v, want %v", info.Mode().Perm(), DirPermission) - } - - // Verify we can create files in it - testFile := filepath.Join(dirPath, "test.txt") - if err := os.WriteFile(testFile, []byte("test"), FilePermission); err != nil { - t.Errorf("Cannot create file in directory: %v", err) - } - }) - } -} - -func TestCreateTestFiles(t *testing.T) { - tests := []struct { - name string - fileSpecs []FileSpec - wantCount int - }{ - { - name: "create multiple files", - fileSpecs: []FileSpec{ - {Name: "file1.txt", Content: "content1"}, - {Name: "file2.go", Content: "package main"}, - {Name: "file3.json", Content: `{"key": "value"}`}, - }, - wantCount: 3, - }, - { - name: "create files with subdirectories", - fileSpecs: []FileSpec{ - {Name: "src/main.go", Content: "package main"}, - {Name: "test/test.go", Content: "package test"}, - }, - wantCount: 2, - }, - { - name: "empty file specs", - fileSpecs: []FileSpec{}, - wantCount: 0, - }, - { - name: "files with empty content", - fileSpecs: []FileSpec{ - {Name: "empty1.txt", Content: ""}, - {Name: "empty2.txt", Content: ""}, - }, - wantCount: 2, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - rootDir := t.TempDir() - - // Create necessary subdirectories - for _, spec := range tt.fileSpecs { - if strings.Contains(spec.Name, "/") { - subdir := filepath.Join(rootDir, filepath.Dir(spec.Name)) - if err := os.MkdirAll(subdir, DirPermission); err != nil { - t.Fatalf("Failed to create subdirectory: %v", err) - } - } - } - - createdFiles := CreateTestFiles(t, rootDir, tt.fileSpecs) - - // Verify count - if len(createdFiles) != tt.wantCount { - t.Errorf("Created %d files, want %d", len(createdFiles), tt.wantCount) - } - - // Verify each file - for i, filePath := range createdFiles { - content, err := os.ReadFile(filePath) - if err != nil { - t.Errorf("Failed to read file %s: %v", filePath, err) - continue - } - if string(content) != tt.fileSpecs[i].Content { - t.Errorf("File %s content = %q, want %q", filePath, content, tt.fileSpecs[i].Content) - } - } - }) - } -} - -func TestResetViperConfig(t *testing.T) { - tests := []struct { - name string - configPath string - preSetup func() - verify func(t *testing.T) - }{ - { - name: "reset with empty config path", - configPath: "", - preSetup: func() { - viper.Set("test.key", "value") - }, - verify: func(t *testing.T) { - if viper.IsSet("test.key") { - t.Error("Viper config not reset properly") - } - }, - }, - { - name: "reset with config path", - configPath: t.TempDir(), - preSetup: func() { - viper.Set("test.key", "value") - }, - verify: func(t *testing.T) { - if viper.IsSet("test.key") { - t.Error("Viper config not reset properly") - } - // Verify config path was added - paths := viper.ConfigFileUsed() - if paths == "" { - // This is expected as no config file exists - return - } - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tt.preSetup() - ResetViperConfig(t, tt.configPath) - tt.verify(t) - }) - } -} - -func TestSetupCLIArgs(t *testing.T) { - // Save original args - originalArgs := os.Args - defer func() { - os.Args = originalArgs - }() - - tests := []struct { - name string - srcDir string - outFile string - prefix string - suffix string - concurrency int - wantLen int - }{ - { - name: "basic CLI args", - srcDir: "/src", - outFile: "/out.txt", - prefix: "PREFIX", - suffix: "SUFFIX", - concurrency: 4, - wantLen: 11, - }, - { - name: "empty strings", - srcDir: "", - outFile: "", - prefix: "", - suffix: "", - concurrency: 1, - wantLen: 11, - }, - { - name: "special characters in args", - srcDir: "/path with spaces/src", - outFile: "/path/to/output file.txt", - prefix: "Prefix with\nnewline", - suffix: "Suffix with\ttab", - concurrency: 8, - wantLen: 11, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - SetupCLIArgs(tt.srcDir, tt.outFile, tt.prefix, tt.suffix, tt.concurrency) - - if len(os.Args) != tt.wantLen { - t.Errorf("os.Args length = %d, want %d", len(os.Args), tt.wantLen) - } - - // Verify specific args - if os.Args[0] != "gibidify" { - t.Errorf("Program name = %s, want gibidify", os.Args[0]) - } - if os.Args[2] != tt.srcDir { - t.Errorf("Source dir = %s, want %s", os.Args[2], tt.srcDir) - } - if os.Args[4] != tt.outFile { - t.Errorf("Output file = %s, want %s", os.Args[4], tt.outFile) - } - if os.Args[6] != tt.prefix { - t.Errorf("Prefix = %s, want %s", os.Args[6], tt.prefix) - } - if os.Args[8] != tt.suffix { - t.Errorf("Suffix = %s, want %s", os.Args[8], tt.suffix) - } - if os.Args[10] != string(rune(tt.concurrency+'0')) { - t.Errorf("Concurrency = %s, want %d", os.Args[10], tt.concurrency) - } - }) - } -} - -func TestVerifyContentContains(t *testing.T) { - // Test successful verification - t.Run("all substrings present", func(t *testing.T) { - content := "This is a test file with multiple lines" - VerifyContentContains(t, content, []string{"test file", "multiple lines"}) - // If we get here, the test passed - }) - - // Test empty expected substrings - t.Run("empty expected substrings", func(t *testing.T) { - content := "Any content" - VerifyContentContains(t, content, []string{}) - // Should pass with no expected strings - }) - - // For failure cases, we'll test indirectly by verifying behavior - t.Run("verify error reporting", func(t *testing.T) { - // We can't easily test the failure case directly since it calls t.Errorf - // But we can at least verify the function doesn't panic - defer func() { - if r := recover(); r != nil { - t.Errorf("VerifyContentContains panicked: %v", r) - } - }() - - // This would normally fail but we're just checking it doesn't panic - content := "test" - expected := []string{"not found"} - // Create a sub-test that we expect to fail - t.Run("expected_failure", func(t *testing.T) { - t.Skip("Skipping actual failure test") - VerifyContentContains(t, content, expected) - }) - }) -} - -func TestMustSucceed(t *testing.T) { - // Test with nil error (should succeed) - t.Run("nil error", func(t *testing.T) { - MustSucceed(t, nil, "successful operation") - // If we get here, the test passed - }) - - // Test error behavior without causing test failure - t.Run("verify error handling", func(t *testing.T) { - // We can't test the failure case directly since it calls t.Fatalf - // But we can verify the function exists and is callable - defer func() { - if r := recover(); r != nil { - t.Errorf("MustSucceed panicked: %v", r) - } - }() - - // Create a sub-test that we expect to fail - t.Run("expected_failure", func(t *testing.T) { - t.Skip("Skipping actual failure test") - MustSucceed(t, errors.New("test error"), "failed operation") - }) - }) -} - -func TestCloseFile(t *testing.T) { - // Test closing a normal file - t.Run("close normal file", func(t *testing.T) { - file, err := os.CreateTemp(t.TempDir(), "test") - if err != nil { - t.Fatalf("Failed to create test file: %v", err) - } - - CloseFile(t, file) - - // Verify file is closed by trying to write to it - _, writeErr := file.Write([]byte("test")) - if writeErr == nil { - t.Error("Expected write to fail on closed file") - } - }) - - // Test that CloseFile doesn't panic on already closed files - // Note: We can't easily test the error case without causing test failure - // since CloseFile calls t.Errorf, which is the expected behavior - t.Run("verify CloseFile function exists and is callable", func(t *testing.T) { - // This test just verifies the function signature and basic functionality - // The error case is tested in integration tests where failures are expected - file, err := os.CreateTemp(t.TempDir(), "test") - if err != nil { - t.Fatalf("Failed to create test file: %v", err) - } - - // Test normal case - file should close successfully - CloseFile(t, file) - - // Verify file is closed - _, writeErr := file.Write([]byte("test")) - if writeErr == nil { - t.Error("Expected write to fail on closed file") - } - }) -} - -// Test thread safety of functions that might be called concurrently -func TestConcurrentOperations(t *testing.T) { - tempDir := t.TempDir() - done := make(chan bool) - - // Test concurrent file creation - for i := 0; i < 5; i++ { - go func(n int) { - CreateTestFile(t, tempDir, string(rune('a'+n))+".txt", []byte("content")) - done <- true - }(i) - } - - // Test concurrent directory creation - for i := 0; i < 5; i++ { - go func(n int) { - CreateTestDirectory(t, tempDir, "dir"+string(rune('0'+n))) - done <- true - }(i) - } - - // Wait for all goroutines - for i := 0; i < 10; i++ { - <-done - } -} - -// Benchmarks -func BenchmarkCreateTestFile(b *testing.B) { - tempDir := b.TempDir() - content := []byte("benchmark content") - - b.ResetTimer() - for i := 0; i < b.N; i++ { - // Use a unique filename for each iteration to avoid conflicts - filename := "bench" + string(rune(i%26+'a')) + ".txt" - filePath := filepath.Join(tempDir, filename) - if err := os.WriteFile(filePath, content, FilePermission); err != nil { - b.Fatalf("Failed to write file: %v", err) - } - } -} - -func BenchmarkCreateTestFiles(b *testing.B) { - tempDir := b.TempDir() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - // Create specs with unique names for each iteration - specs := []FileSpec{ - {Name: "file1_" + string(rune(i%26+'a')) + ".txt", Content: "content1"}, - {Name: "file2_" + string(rune(i%26+'a')) + ".txt", Content: "content2"}, - {Name: "file3_" + string(rune(i%26+'a')) + ".txt", Content: "content3"}, - } - - for _, spec := range specs { - filePath := filepath.Join(tempDir, spec.Name) - if err := os.WriteFile(filePath, []byte(spec.Content), FilePermission); err != nil { - b.Fatalf("Failed to write file: %v", err) - } - } - } -} - -func BenchmarkVerifyContentContains(b *testing.B) { - content := strings.Repeat("test content with various words ", 100) - expected := []string{"test", "content", "various", "words"} - - b.ResetTimer() - for i := 0; i < b.N; i++ { - // We can't use the actual function in benchmark since it needs testing.T - // So we'll benchmark the core logic - for _, exp := range expected { - _ = strings.Contains(content, exp) - } - } -} diff --git a/testutil/verification_test.go b/testutil/verification_test.go new file mode 100644 index 0000000..893b934 --- /dev/null +++ b/testutil/verification_test.go @@ -0,0 +1,107 @@ +package testutil + +import ( + "errors" + "os" + "testing" +) + +func TestVerifyContentContains(t *testing.T) { + // Test successful verification + t.Run("all substrings present", func(t *testing.T) { + content := "This is a test file with multiple lines" + VerifyContentContains(t, content, []string{"test file", "multiple lines"}) + // If we get here, the test passed + }) + + // Test empty expected substrings + t.Run("empty expected substrings", func(t *testing.T) { + content := "Any content" + VerifyContentContains(t, content, []string{}) + // Should pass with no expected strings + }) + + // For failure cases, we'll test indirectly by verifying behavior + t.Run("verify error reporting", func(t *testing.T) { + // We can't easily test the failure case directly since it calls t.Errorf + // But we can at least verify the function doesn't panic + defer func() { + if r := recover(); r != nil { + t.Errorf("VerifyContentContains panicked: %v", r) + } + }() + + // This would normally fail but we're just checking it doesn't panic + content := "test" + expected := []string{"not found"} + // Create a sub-test that we expect to fail + t.Run("expected_failure", func(t *testing.T) { + t.Skip("Skipping actual failure test") + VerifyContentContains(t, content, expected) + }) + }) +} + +func TestMustSucceed(t *testing.T) { + // Test with nil error (should succeed) + t.Run("nil error", func(t *testing.T) { + MustSucceed(t, nil, "successful operation") + // If we get here, the test passed + }) + + // Test error behavior without causing test failure + t.Run("verify error handling", func(t *testing.T) { + // We can't test the failure case directly since it calls t.Fatalf + // But we can verify the function exists and is callable + defer func() { + if r := recover(); r != nil { + t.Errorf("MustSucceed panicked: %v", r) + } + }() + + // Create a sub-test that we expect to fail + t.Run("expected_failure", func(t *testing.T) { + t.Skip("Skipping actual failure test") + MustSucceed(t, errors.New("test error"), "failed operation") + }) + }) +} + +func TestCloseFile(t *testing.T) { + // Test closing a normal file + t.Run("close normal file", func(t *testing.T) { + file, err := os.CreateTemp(t.TempDir(), "test") + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + CloseFile(t, file) + + // Verify file is closed by trying to write to it + _, writeErr := file.Write([]byte("test")) + if writeErr == nil { + t.Error("Expected write to fail on closed file") + } + }) + + // Test that CloseFile doesn't panic on already closed files + // Note: We can't easily test the error case without causing test failure + // since CloseFile calls t.Errorf, which is the expected behavior + t.Run("verify CloseFile function exists and is callable", func(t *testing.T) { + // This test just verifies the function signature and basic functionality + // The error case is tested in integration tests where failures are expected + file, err := os.CreateTemp(t.TempDir(), "test") + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + // Test normal case - file should close successfully + CloseFile(t, file) + + // Verify file is closed + _, writeErr := file.Write([]byte("test")) + if writeErr == nil { + t.Error("Expected write to fail on closed file") + } + }) +} \ No newline at end of file diff --git a/utils/writers.go b/utils/writers.go new file mode 100644 index 0000000..f2667bd --- /dev/null +++ b/utils/writers.go @@ -0,0 +1,138 @@ +package utils + +import ( + "encoding/json" + "io" + "strings" +) + +// SafeCloseReader safely closes a reader if it implements io.Closer. +// This eliminates the duplicated closeReader methods across all writers. +func SafeCloseReader(reader io.Reader, path string) { + if closer, ok := reader.(io.Closer); ok { + if err := closer.Close(); err != nil { + LogError( + "Failed to close file reader", + WrapError(err, ErrorTypeIO, CodeIOClose, "failed to close file reader").WithFilePath(path), + ) + } + } +} + +// WriteWithErrorWrap performs file writing with consistent error handling. +// This centralizes the common pattern of writing strings with error wrapping. +func WriteWithErrorWrap(writer io.Writer, content, errorMsg, filePath string) error { + if _, err := writer.Write([]byte(content)); err != nil { + wrappedErr := WrapError(err, ErrorTypeIO, CodeIOWrite, errorMsg) + if filePath != "" { + wrappedErr = wrappedErr.WithFilePath(filePath) + } + return wrappedErr + } + return nil +} + +// StreamContent provides a common streaming implementation with chunk processing. +// This eliminates the similar streaming patterns across JSON and Markdown writers. +func StreamContent(reader io.Reader, writer io.Writer, chunkSize int, filePath string, processChunk func([]byte) []byte) error { + buf := make([]byte, chunkSize) + for { + n, err := reader.Read(buf) + if n > 0 { + processed := buf[:n] + if processChunk != nil { + processed = processChunk(processed) + } + if _, writeErr := writer.Write(processed); writeErr != nil { + wrappedErr := WrapError(writeErr, ErrorTypeIO, CodeIOWrite, "failed to write content chunk") + if filePath != "" { + wrappedErr = wrappedErr.WithFilePath(filePath) + } + return wrappedErr + } + } + if err == io.EOF { + break + } + if err != nil { + wrappedErr := WrapError(err, ErrorTypeIO, CodeIORead, "failed to read content chunk") + if filePath != "" { + wrappedErr = wrappedErr.WithFilePath(filePath) + } + return wrappedErr + } + } + return nil +} + +// EscapeForJSON escapes content for JSON output using the standard library. +// This replaces the custom escapeJSONString function with a more robust implementation. +func EscapeForJSON(content string) string { + // Use the standard library's JSON marshaling for proper escaping + jsonBytes, _ := json.Marshal(content) + // Remove the surrounding quotes that json.Marshal adds + jsonStr := string(jsonBytes) + if len(jsonStr) >= 2 && jsonStr[0] == '"' && jsonStr[len(jsonStr)-1] == '"' { + return jsonStr[1 : len(jsonStr)-1] + } + return jsonStr +} + +// EscapeForYAML quotes/escapes content for YAML output if needed. +// This centralizes the YAML string quoting logic. +func EscapeForYAML(content string) string { + // Quote if contains special characters, spaces, or starts with special chars + needsQuotes := strings.ContainsAny(content, " \t\n\r:{}[]|>-'\"\\") || + strings.HasPrefix(content, "-") || + strings.HasPrefix(content, "?") || + strings.HasPrefix(content, ":") || + content == "" || + content == "true" || content == "false" || + content == "null" || content == "~" + + if needsQuotes { + // Use double quotes and escape internal quotes + escaped := strings.ReplaceAll(content, "\\", "\\\\") + escaped = strings.ReplaceAll(escaped, "\"", "\\\"") + return "\"" + escaped + "\"" + } + return content +} + +// StreamLines provides line-based streaming for YAML content. +// This provides an alternative streaming approach for YAML writers. +func StreamLines(reader io.Reader, writer io.Writer, filePath string, lineProcessor func(string) string) error { + // Read all content first (for small files this is fine) + content, err := io.ReadAll(reader) + if err != nil { + wrappedErr := WrapError(err, ErrorTypeIO, CodeIORead, "failed to read content for line processing") + if filePath != "" { + wrappedErr = wrappedErr.WithFilePath(filePath) + } + return wrappedErr + } + + // Split into lines and process each + lines := strings.Split(string(content), "\n") + for i, line := range lines { + processedLine := line + if lineProcessor != nil { + processedLine = lineProcessor(line) + } + + // Write line with proper line ending (except for last empty line) + lineToWrite := processedLine + if i < len(lines)-1 || line != "" { + lineToWrite += "\n" + } + + if _, writeErr := writer.Write([]byte(lineToWrite)); writeErr != nil { + wrappedErr := WrapError(writeErr, ErrorTypeIO, CodeIOWrite, "failed to write processed line") + if filePath != "" { + wrappedErr = wrappedErr.WithFilePath(filePath) + } + return wrappedErr + } + } + return nil +} \ No newline at end of file