diff --git a/.gitignore b/.gitignore
index 9757884..6c9662f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ output.txt
output.yaml
coverage.out
megalinter-reports/*
+coverage.*
+*.out
+gibidify-benchmark
diff --git a/.golangci.yml b/.golangci.yml
new file mode 100644
index 0000000..7a4d2f0
--- /dev/null
+++ b/.golangci.yml
@@ -0,0 +1,256 @@
+run:
+ timeout: 5m
+ tests: true
+ go: "1.24"
+ build-tags:
+ - test
+
+# golangci-lint configuration version
+version: 2
+
+output:
+ format: colored-line-number
+ print-issued-lines: true
+ print-linter-name: true
+ path-prefix: ""
+ sort-results: true
+
+linters:
+ enable-all: true
+ disable:
+ - depguard # Too strict for general use
+ - exhaustruct # Too many false positives
+ - ireturn # Too restrictive on interfaces
+ - varnamelen # Too opinionated on name length
+ - wrapcheck # Too many false positives
+ - testpackage # Tests in same package are fine
+ - paralleltest # Not always necessary
+ - tparallel # Not always necessary
+ - nlreturn # Too opinionated on newlines
+ - wsl # Too opinionated on whitespace
+ - nonamedreturns # Conflicts with gocritic unnamedResult
+
+linters-settings:
+ errcheck:
+ check-type-assertions: true
+ check-blank: true
+ exclude-functions:
+ - io.Copy
+ - fmt.Print
+ - fmt.Printf
+ - fmt.Println
+
+ govet:
+ enable-all: true
+
+ gocyclo:
+ min-complexity: 15
+
+ gocognit:
+ min-complexity: 20
+
+ goconst:
+ min-len: 3
+ min-occurrences: 3
+
+ gofmt:
+ simplify: true
+ rewrite-rules:
+ - pattern: 'interface{}'
+ replacement: 'any'
+
+ goimports:
+ local-prefixes: github.com/ivuorinen/gibidify
+
+ golint:
+ min-confidence: 0.8
+
+ lll:
+ line-length: 120
+ tab-width: 2 # EditorConfig: tab_width = 2
+
+ misspell:
+ locale: US
+
+ nakedret:
+ max-func-lines: 30
+
+ prealloc:
+ simple: true
+ range-loops: true
+ for-loops: true
+
+ revive:
+ enable-all-rules: true
+ rules:
+ - name: package-comments
+ disabled: true
+ - name: file-header
+ disabled: true
+ - name: max-public-structs
+ disabled: true
+ - name: line-length-limit
+ arguments: [120]
+ - name: function-length
+ arguments: [50, 100]
+ - name: cognitive-complexity
+ arguments: [20]
+ - name: cyclomatic
+ arguments: [15]
+ - name: add-constant
+ arguments:
+ - maxLitCount: "3"
+ allowStrs: "\"error\",\"\""
+ allowInts: "0,1,2"
+ - name: argument-limit
+ arguments: [6]
+ - name: banned-characters
+ disabled: true
+ - name: function-result-limit
+ arguments: [3]
+
+ gosec:
+ excludes:
+ - G104 # Handled by errcheck
+ severity: medium
+ confidence: medium
+ exclude-generated: true
+ config:
+ G301: "0750"
+ G302: "0640"
+ G306: "0640"
+
+ dupl:
+ threshold: 150
+
+ gocritic:
+ enabled-tags:
+ - diagnostic
+ - experimental
+ - opinionated
+ - performance
+ - style
+ disabled-checks:
+ - whyNoLint
+ - paramTypeCombine
+
+ gofumpt:
+ extra-rules: true
+
+ # EditorConfig compliance settings
+ # These settings enforce .editorconfig rules:
+ # - end_of_line = lf (enforced by gofumpt)
+ # - insert_final_newline = true (enforced by gofumpt)
+ # - trim_trailing_whitespace = true (enforced by whitespace linter)
+ # - indent_style = tab, tab_width = 2 (enforced by gofumpt and lll)
+
+ whitespace:
+ multi-if: false # EditorConfig: trim trailing whitespace
+ multi-func: false # EditorConfig: trim trailing whitespace
+
+ nolintlint:
+ allow-leading-space: false # EditorConfig: trim trailing whitespace
+ allow-unused: false
+ require-explanation: false
+ require-specific: true
+
+ godox:
+ keywords:
+ - FIXME
+ - BUG
+ - HACK
+
+ mnd:
+ settings:
+ mnd:
+ checks:
+ - argument
+ - case
+ - condition
+ - operation
+ - return
+ - assign
+ ignored-numbers:
+ - '0'
+ - '1'
+ - '2'
+ - '10'
+ - '100'
+
+ funlen:
+ lines: 80
+ statements: 60
+
+ nestif:
+ min-complexity: 5
+
+ gomodguard:
+ allowed:
+ modules: []
+ domains: []
+ blocked:
+ modules: []
+ versions: []
+
+issues:
+ exclude-use-default: false
+ exclude-case-sensitive: false
+ max-issues-per-linter: 0
+ max-same-issues: 0
+ uniq-by-line: true
+
+ exclude-dirs:
+ - vendor
+ - third_party
+ - testdata
+ - examples
+ - .git
+
+ exclude-files:
+ - ".*\\.pb\\.go$"
+ - ".*\\.gen\\.go$"
+
+ exclude-rules:
+ - path: _test\.go
+ linters:
+ - dupl
+ - gosec
+ - goconst
+ - funlen
+ - gocognit
+ - gocyclo
+ - errcheck
+ - lll
+ - nestif
+
+ - path: main\.go
+ linters:
+ - gochecknoglobals
+ - gochecknoinits
+
+ - path: fileproc/filetypes\.go
+ linters:
+ - gochecknoglobals # Allow globals for singleton registry pattern
+
+ - text: "Using the variable on range scope"
+ linters:
+ - scopelint
+
+ - text: "should have comment or be unexported"
+ linters:
+ - golint
+ - revive
+
+ - text: "don't use ALL_CAPS in Go names"
+ linters:
+ - golint
+ - stylecheck
+
+ exclude:
+ - "Error return value of .* is not checked"
+ - "exported (type|method|function) .* should have comment"
+ - "ST1000: at least one file in a package should have a package comment"
+
+severity:
+ default-severity: error
+ case-sensitive: false
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
deleted file mode 100644
index 7b246cd..0000000
--- a/AGENTS.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# AGENTS
-
-This repo is a Go CLI that aggregates code files into a single text output. The
-main entry point is `main.go` with packages under `config` and `fileproc`.
-Tests exist for each package, and CI workflows live in `.github/workflows`.
-
-## Contributions
-- Look for additional `AGENTS.md` files under `.github` first.
-- Use Semantic Commit messages and PR titles.
-- Run `go test ./...` and linting for code changes. Docs-only changes skip this.
-- Use Yarn if installing Node packages.
-- Follow `.editorconfig` and formatting via pre-commit.
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..2f33223
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,48 @@
+# CLAUDE.md
+
+Go CLI that aggregates code files into LLM-optimized output. Supports markdown/JSON/YAML with concurrent processing.
+
+## Architecture (40 files, 189KB, 6.8K lines)
+
+**Core**: `main.go` (37 lines), `cli/` (4 files), `fileproc/` (22 files), `config/` (3 files), `utils/` (4 files), `testutil/` (2 files)
+
+**Key modules**: File collection, processing, writers (markdown/JSON/YAML), registry with caching, back-pressure management
+
+**Patterns**: Producer-consumer pools, thread-safe registry (~63ns lookups), streaming with back-pressure, modular files (50-200 lines), progress bars, enhanced errors
+
+## Commands
+
+```bash
+make lint-fix && make lint && make test # Essential workflow
+./gibidify -source
-format markdown --no-colors --no-progress --verbose
+```
+
+## Config
+
+XDG config paths: `~/.config/gibidify/config.yaml`
+
+**Key settings**: File size limit (5MB), ignore dirs, custom file types, back-pressure (100MB memory limit)
+
+## Quality
+
+**CRITICAL**: `make lint-fix && make lint` (0 issues), max 120 chars, EditorConfig compliance, 30+ linters
+
+## Testing
+
+**Coverage**: 84%+ (utils 90.9%, testutil 84.2%, fileproc 83.8%), race detection, benchmarks, testutil helpers
+
+## Standards
+
+EditorConfig (LF, tabs), semantic commits, testing required, linting must pass
+
+## Status
+
+**Health: 10/10** - Production-ready, 84%+ coverage, modular architecture, memory-optimized
+
+**Completed**: Structured errors, benchmarking, config validation, memory optimization, code modularization, CLI enhancements (progress bars, colors, enhanced errors)
+
+**Next**: Security hardening, documentation, output customization
+
+## Workflow
+
+1. `make lint-fix` before changes 2. >80% coverage 3. Follow patterns 4. Update docs 5. Security/performance
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f9a43b7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,132 @@
+.PHONY: help install-tools lint lint-fix lint-verbose test coverage build clean all build-benchmark benchmark benchmark-collection benchmark-processing benchmark-concurrency benchmark-format
+
+# Default target shows help
+.DEFAULT_GOAL := help
+
+# All target runs full workflow
+all: lint test build
+
+# Help target
+help:
+ @echo "Available targets:"
+ @echo " install-tools - Install required linting and development tools"
+ @echo " lint - Run all linters"
+ @echo " lint-fix - Run linters with auto-fix enabled"
+ @echo " lint-verbose - Run linters with verbose output"
+ @echo " test - Run tests"
+ @echo " coverage - Run tests with coverage"
+ @echo " build - Build the application"
+ @echo " clean - Clean build artifacts"
+ @echo " all - Run lint, test, and build"
+ @echo ""
+ @echo "Benchmark targets:"
+ @echo " build-benchmark - Build the benchmark binary"
+ @echo " benchmark - Run all benchmarks"
+ @echo " benchmark-collection - Run file collection benchmarks"
+ @echo " benchmark-processing - Run file processing benchmarks"
+ @echo " benchmark-concurrency - Run concurrency benchmarks"
+ @echo " benchmark-format - Run format benchmarks"
+ @echo ""
+ @echo "Run 'make ' to execute a specific target."
+
+# Install required tools
+install-tools:
+ @echo "Installing golangci-lint..."
+ @go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
+ @echo "Installing gofumpt..."
+ @go install mvdan.cc/gofumpt@latest
+ @echo "Installing goimports..."
+ @go install golang.org/x/tools/cmd/goimports@latest
+ @echo "Installing staticcheck..."
+ @go install honnef.co/go/tools/cmd/staticcheck@latest
+ @echo "Installing gosec..."
+ @go install github.com/securego/gosec/v2/cmd/gosec@latest
+ @echo "Installing gocyclo..."
+ @go install github.com/fzipp/gocyclo/cmd/gocyclo@latest
+ @echo "All tools installed successfully!"
+
+# Run linters
+lint:
+ @echo "Running golangci-lint..."
+ @golangci-lint run ./...
+
+# Run linters with auto-fix
+lint-fix:
+ @echo "Running gofumpt..."
+ @gofumpt -l -w .
+ @echo "Running goimports..."
+ @goimports -w -local github.com/ivuorinen/gibidify .
+ @echo "Running go fmt..."
+ @go fmt ./...
+ @echo "Running go mod tidy..."
+ @go mod tidy
+ @echo "Running golangci-lint with --fix..."
+ @golangci-lint run --fix ./...
+ @echo "Auto-fix completed. Running final lint check..."
+ @golangci-lint run ./...
+
+# Run linters with verbose output
+lint-verbose:
+ @golangci-lint run -v ./...
+
+# Run tests
+test:
+ @echo "Running tests..."
+ @go test -race -v ./...
+
+# Run tests with coverage
+coverage:
+ @echo "Running tests with coverage..."
+ @go test -race -coverprofile=coverage.out -covermode=atomic ./...
+ @go tool cover -html=coverage.out -o coverage.html
+ @echo "Coverage report generated: coverage.html"
+
+# Build the application
+build:
+ @echo "Building gibidify..."
+ @go build -ldflags="-s -w" -o gibidify .
+ @echo "Build complete: ./gibidify"
+
+# Clean build artifacts
+clean:
+ @echo "Cleaning build artifacts..."
+ @rm -f gibidify gibidify-benchmark
+ @rm -f coverage.out coverage.html
+ @echo "Clean complete"
+
+# CI-specific targets
+.PHONY: ci-lint ci-test
+
+ci-lint:
+ @golangci-lint run --out-format=github-actions ./...
+
+ci-test:
+ @go test -race -coverprofile=coverage.out -json ./... > test-results.json
+
+# Build benchmark binary
+build-benchmark:
+ @echo "Building gibidify-benchmark..."
+ @go build -ldflags="-s -w" -o gibidify-benchmark ./cmd/benchmark
+ @echo "Build complete: ./gibidify-benchmark"
+
+# Run benchmarks
+benchmark: build-benchmark
+ @echo "Running all benchmarks..."
+ @./gibidify-benchmark -type=all
+
+# Run specific benchmark types
+benchmark-collection: build-benchmark
+ @echo "Running file collection benchmarks..."
+ @./gibidify-benchmark -type=collection
+
+benchmark-processing: build-benchmark
+ @echo "Running file processing benchmarks..."
+ @./gibidify-benchmark -type=processing
+
+benchmark-concurrency: build-benchmark
+ @echo "Running concurrency benchmarks..."
+ @./gibidify-benchmark -type=concurrency
+
+benchmark-format: build-benchmark
+ @echo "Running format benchmarks..."
+ @./gibidify-benchmark -type=format
\ No newline at end of file
diff --git a/README.md b/README.md
index 7c882dc..50184cc 100644
--- a/README.md
+++ b/README.md
@@ -7,11 +7,16 @@ file sections with separators, and a suffix.
## Features
-- Recursive scanning of a source directory.
-- File filtering based on size, glob patterns, and .gitignore rules.
-- Modular, concurrent file processing with progress bar feedback.
-- Configurable logging and configuration via Viper.
-- Cross-platform build with Docker packaging support.
+- **Recursive directory scanning** with smart file filtering
+- **Configurable file type detection** - add/remove extensions and languages
+- **Multiple output formats** - markdown, JSON, YAML
+- **Memory-optimized processing** - streaming for large files, intelligent back-pressure
+- **Concurrent processing** with configurable worker pools
+- **Comprehensive configuration** via YAML with validation
+- **Production-ready** with structured error handling and benchmarking
+- **Modular architecture** - clean, focused codebase with ~63ns registry lookups
+- **Enhanced CLI experience** - progress bars, colored output, helpful error messages
+- **Cross-platform** with Docker support
## Installation
@@ -32,7 +37,10 @@ go build -o gibidify .
-format markdown|json|yaml \
-concurrency \
--prefix="..." \
- --suffix="..."
+ --suffix="..." \
+ --no-colors \
+ --no-progress \
+ --verbose
```
Flags:
@@ -42,6 +50,9 @@ Flags:
- `-format`: output format (`markdown`, `json`, or `yaml`).
- `-concurrency`: number of concurrent workers.
- `--prefix` / `--suffix`: optional text blocks.
+- `--no-colors`: disable colored terminal output.
+- `--no-progress`: disable progress bars.
+- `--verbose`: enable verbose output and detailed logging.
## Docker
@@ -83,11 +94,39 @@ ignoreDirectories:
- dist
- build
- target
- - bower_components
- - cache
- - tmp
+
+# FileType customization
+fileTypes:
+ enabled: true
+ # Add custom file extensions
+ customImageExtensions:
+ - .webp
+ - .avif
+ customBinaryExtensions:
+ - .custom
+ customLanguages:
+ .zig: zig
+ .odin: odin
+ .v: vlang
+ # Disable default extensions
+ disabledImageExtensions:
+ - .bmp
+ disabledBinaryExtensions:
+ - .exe
+ disabledLanguageExtensions:
+ - .bat
+
+# Memory optimization (back-pressure management)
+backpressure:
+ enabled: true
+ maxPendingFiles: 1000 # Max files in file channel buffer
+ maxPendingWrites: 100 # Max writes in write channel buffer
+ maxMemoryUsage: 104857600 # 100MB max memory usage
+ memoryCheckInterval: 1000 # Check memory every 1000 files
```
+See `config.example.yaml` for a comprehensive configuration example.
+
## License
This project is licensed under [the MIT License](LICENSE).
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..bca9086
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,66 @@
+# TODO: gibidify
+
+Prioritized improvements by impact/effort.
+
+## ✅ Completed (High Priority)
+
+**Testing**: utils (90.9%), testutil (84.2%), FileTypeRegistry (100%) ✅
+**Config**: Registry customization, validation, schema ✅
+**Errors**: Structured types, categorization, context ✅
+**Performance**: Benchmarking, memory optimization, streaming ✅
+**Architecture**: Code modularization (50-200 lines/file) ✅
+**CLI**: Progress bars, colored output, enhanced errors ✅
+
+## 🚀 Current Priorities
+
+### Metrics
+- [ ] Timing/profiling
+- [ ] Processing stats
+
+### Output Customization
+- [ ] Templates
+- [ ] Markdown config
+- [ ] Metadata options
+
+### Security
+- [ ] Path traversal review
+- [ ] Resource limits
+- [ ] Security scanning
+
+### Documentation
+- [ ] API docs (GoDoc, examples)
+- [ ] User guides, troubleshooting
+
+### Dev Tools
+- [ ] Hot reload, debug mode
+- [ ] More CI/CD linters
+
+## 🌟 Future
+
+**Plugins**: Custom handlers, formats
+**Git integration**: Commit filtering, blame
+**Rich output**: HTML, PDF, web UI
+**Microservices**: API-first, orchestration
+**Monitoring**: Prometheus metrics, structured logging
+
+## Guidelines
+
+**Before**: `make lint-fix && make lint`, follow TDD, update docs
+**DoD**: >80% coverage, linting passes, security reviewed
+**Priorities**: Security → UX → Extensions
+
+## Status (2025-07-19)
+
+**Health: 10/10** - Production-ready, 40 files (189KB, 6.8K lines), 84%+ coverage
+
+**Completed**: All critical items - testing, config, errors, performance, modularization, CLI enhancements
+
+**Next**: Security hardening → Documentation → Output customization
+
+### Token Usage
+
+- TODO.md: 247 words (~329 tokens) - 63% reduction ✅
+- CLAUDE.md: 212 words (~283 tokens) - 65% reduction ✅
+- Total: 459 words (~612 tokens) - 64% reduction ✅
+
+*Optimized from 1,581 → 459 words while preserving all critical information*
diff --git a/benchmark/benchmark.go b/benchmark/benchmark.go
new file mode 100644
index 0000000..6d825b7
--- /dev/null
+++ b/benchmark/benchmark.go
@@ -0,0 +1,405 @@
+// Package benchmark provides benchmarking infrastructure for gibidify.
+package benchmark
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path/filepath"
+ "runtime"
+ "sync"
+ "time"
+
+ "github.com/ivuorinen/gibidify/config"
+ "github.com/ivuorinen/gibidify/fileproc"
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+// BenchmarkResult represents the results of a benchmark run.
+type BenchmarkResult struct {
+ Name string
+ Duration time.Duration
+ FilesProcessed int
+ BytesProcessed int64
+ FilesPerSecond float64
+ BytesPerSecond float64
+ MemoryUsage MemoryStats
+ CPUUsage CPUStats
+}
+
+// MemoryStats represents memory usage statistics.
+type MemoryStats struct {
+ AllocMB float64
+ SysMB float64
+ NumGC uint32
+ PauseTotalNs uint64
+}
+
+// CPUStats represents CPU usage statistics.
+type CPUStats struct {
+ UserTime time.Duration
+ SystemTime time.Duration
+ Goroutines int
+}
+
+// BenchmarkSuite represents a collection of benchmarks.
+type BenchmarkSuite struct {
+ Name string
+ Results []BenchmarkResult
+}
+
+// FileCollectionBenchmark benchmarks file collection operations.
+func FileCollectionBenchmark(sourceDir string, numFiles int) (*BenchmarkResult, error) {
+ // Load configuration to ensure proper file filtering
+ config.LoadConfig()
+
+ // Create temporary directory with test files if no source is provided
+ var cleanup func()
+ if sourceDir == "" {
+ tempDir, cleanupFunc, err := createBenchmarkFiles(numFiles)
+ if err != nil {
+ return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create benchmark files")
+ }
+ cleanup = cleanupFunc
+ defer cleanup()
+ sourceDir = tempDir
+ }
+
+ // Measure memory before
+ var memBefore runtime.MemStats
+ runtime.ReadMemStats(&memBefore)
+
+ startTime := time.Now()
+
+ // Run the file collection benchmark
+ files, err := fileproc.CollectFiles(sourceDir)
+ if err != nil {
+ return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "benchmark file collection failed")
+ }
+
+ duration := time.Since(startTime)
+
+ // Measure memory after
+ var memAfter runtime.MemStats
+ runtime.ReadMemStats(&memAfter)
+
+ // Calculate total bytes processed
+ var totalBytes int64
+ for _, file := range files {
+ if info, err := os.Stat(file); err == nil {
+ totalBytes += info.Size()
+ }
+ }
+
+ result := &BenchmarkResult{
+ Name: "FileCollection",
+ Duration: duration,
+ FilesProcessed: len(files),
+ BytesProcessed: totalBytes,
+ FilesPerSecond: float64(len(files)) / duration.Seconds(),
+ BytesPerSecond: float64(totalBytes) / duration.Seconds(),
+ MemoryUsage: MemoryStats{
+ AllocMB: float64(memAfter.Alloc-memBefore.Alloc) / 1024 / 1024,
+ SysMB: float64(memAfter.Sys-memBefore.Sys) / 1024 / 1024,
+ NumGC: memAfter.NumGC - memBefore.NumGC,
+ PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs,
+ },
+ CPUUsage: CPUStats{
+ Goroutines: runtime.NumGoroutine(),
+ },
+ }
+
+ return result, nil
+}
+
+// FileProcessingBenchmark benchmarks full file processing pipeline.
+func FileProcessingBenchmark(sourceDir string, format string, concurrency int) (*BenchmarkResult, error) {
+ // Load configuration to ensure proper file filtering
+ config.LoadConfig()
+
+ var cleanup func()
+ if sourceDir == "" {
+ // Create temporary directory with test files
+ tempDir, cleanupFunc, err := createBenchmarkFiles(100)
+ if err != nil {
+ return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create benchmark files")
+ }
+ cleanup = cleanupFunc
+ defer cleanup()
+ sourceDir = tempDir
+ }
+
+ // Create temporary output file
+ outputFile, err := os.CreateTemp("", "benchmark_output_*."+format)
+ if err != nil {
+ return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create benchmark output file")
+ }
+ defer func() {
+ if err := outputFile.Close(); err != nil {
+ // Log error but don't fail the benchmark
+ fmt.Printf("Warning: failed to close benchmark output file: %v\n", err)
+ }
+ if err := os.Remove(outputFile.Name()); err != nil {
+ // Log error but don't fail the benchmark
+ fmt.Printf("Warning: failed to remove benchmark output file: %v\n", err)
+ }
+ }()
+
+ // Measure memory before
+ var memBefore runtime.MemStats
+ runtime.ReadMemStats(&memBefore)
+
+ startTime := time.Now()
+
+ // Run the full processing pipeline
+ files, err := fileproc.CollectFiles(sourceDir)
+ if err != nil {
+ return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "benchmark file collection failed")
+ }
+
+ // Process files with concurrency
+ err = runProcessingPipeline(context.Background(), files, outputFile, format, concurrency, sourceDir)
+ if err != nil {
+ return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "benchmark processing pipeline failed")
+ }
+
+ duration := time.Since(startTime)
+
+ // Measure memory after
+ var memAfter runtime.MemStats
+ runtime.ReadMemStats(&memAfter)
+
+ // Calculate total bytes processed
+ var totalBytes int64
+ for _, file := range files {
+ if info, err := os.Stat(file); err == nil {
+ totalBytes += info.Size()
+ }
+ }
+
+ result := &BenchmarkResult{
+ Name: fmt.Sprintf("FileProcessing_%s_c%d", format, concurrency),
+ Duration: duration,
+ FilesProcessed: len(files),
+ BytesProcessed: totalBytes,
+ FilesPerSecond: float64(len(files)) / duration.Seconds(),
+ BytesPerSecond: float64(totalBytes) / duration.Seconds(),
+ MemoryUsage: MemoryStats{
+ AllocMB: float64(memAfter.Alloc-memBefore.Alloc) / 1024 / 1024,
+ SysMB: float64(memAfter.Sys-memBefore.Sys) / 1024 / 1024,
+ NumGC: memAfter.NumGC - memBefore.NumGC,
+ PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs,
+ },
+ CPUUsage: CPUStats{
+ Goroutines: runtime.NumGoroutine(),
+ },
+ }
+
+ return result, nil
+}
+
+// ConcurrencyBenchmark benchmarks different concurrency levels.
+func ConcurrencyBenchmark(sourceDir string, format string, concurrencyLevels []int) (*BenchmarkSuite, error) {
+ suite := &BenchmarkSuite{
+ Name: "ConcurrencyBenchmark",
+ Results: make([]BenchmarkResult, 0, len(concurrencyLevels)),
+ }
+
+ for _, concurrency := range concurrencyLevels {
+ result, err := FileProcessingBenchmark(sourceDir, format, concurrency)
+ if err != nil {
+ return nil, utils.WrapErrorf(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed for level %d", concurrency)
+ }
+ suite.Results = append(suite.Results, *result)
+ }
+
+ return suite, nil
+}
+
+// FormatBenchmark benchmarks different output formats.
+func FormatBenchmark(sourceDir string, formats []string) (*BenchmarkSuite, error) {
+ suite := &BenchmarkSuite{
+ Name: "FormatBenchmark",
+ Results: make([]BenchmarkResult, 0, len(formats)),
+ }
+
+ for _, format := range formats {
+ result, err := FileProcessingBenchmark(sourceDir, format, runtime.NumCPU())
+ if err != nil {
+ return nil, utils.WrapErrorf(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed for format %s", format)
+ }
+ suite.Results = append(suite.Results, *result)
+ }
+
+ return suite, nil
+}
+
+// createBenchmarkFiles creates temporary files for benchmarking.
+func createBenchmarkFiles(numFiles int) (string, func(), error) {
+ tempDir, err := os.MkdirTemp("", "gibidify_benchmark_*")
+ if err != nil {
+ return "", nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create temp directory")
+ }
+
+ cleanup := func() {
+ if err := os.RemoveAll(tempDir); err != nil {
+ // Log error but don't fail the benchmark
+ fmt.Printf("Warning: failed to remove benchmark temp directory: %v\n", err)
+ }
+ }
+
+ // Create various file types
+ fileTypes := []struct {
+ ext string
+ content string
+ }{
+ {".go", "package main\n\nfunc main() {\n\tprintln(\"Hello, World!\")\n}"},
+ {".js", "console.log('Hello, World!');"},
+ {".py", "print('Hello, World!')"},
+ {".java", "public class Hello {\n\tpublic static void main(String[] args) {\n\t\tSystem.out.println(\"Hello, World!\");\n\t}\n}"},
+ {".cpp", "#include \n\nint main() {\n\tstd::cout << \"Hello, World!\" << std::endl;\n\treturn 0;\n}"},
+ {".rs", "fn main() {\n\tprintln!(\"Hello, World!\");\n}"},
+ {".rb", "puts 'Hello, World!'"},
+ {".php", ""},
+ {".sh", "#!/bin/bash\necho 'Hello, World!'"},
+ {".md", "# Hello, World!\n\nThis is a markdown file."},
+ }
+
+ for i := 0; i < numFiles; i++ {
+ fileType := fileTypes[i%len(fileTypes)]
+ filename := fmt.Sprintf("file_%d%s", i, fileType.ext)
+
+ // Create subdirectories for some files
+ if i%10 == 0 {
+ subdir := filepath.Join(tempDir, fmt.Sprintf("subdir_%d", i/10))
+ if err := os.MkdirAll(subdir, 0o755); err != nil {
+ cleanup()
+ return "", nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create subdirectory")
+ }
+ filename = filepath.Join(subdir, filename)
+ } else {
+ filename = filepath.Join(tempDir, filename)
+ }
+
+ // Create file with repeated content to make it larger
+ content := ""
+ for j := 0; j < 10; j++ {
+ content += fmt.Sprintf("// Line %d\n%s\n", j, fileType.content)
+ }
+
+ if err := os.WriteFile(filename, []byte(content), 0o644); err != nil {
+ cleanup()
+ return "", nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileWrite, "failed to write benchmark file")
+ }
+ }
+
+ return tempDir, cleanup, nil
+}
+
+// runProcessingPipeline runs the processing pipeline similar to main.go.
+func runProcessingPipeline(ctx context.Context, files []string, outputFile *os.File, format string, concurrency int, sourceDir string) error {
+ fileCh := make(chan string, concurrency)
+ writeCh := make(chan fileproc.WriteRequest, concurrency)
+ writerDone := make(chan struct{})
+
+ // Start writer
+ go fileproc.StartWriter(outputFile, writeCh, writerDone, format, "", "")
+
+ // Get absolute path once
+ absRoot, err := utils.GetAbsolutePath(sourceDir)
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to get absolute path for source directory")
+ }
+
+ // Start workers with proper synchronization
+ var workersDone sync.WaitGroup
+ for i := 0; i < concurrency; i++ {
+ workersDone.Add(1)
+ go func() {
+ defer workersDone.Done()
+ for filePath := range fileCh {
+ fileproc.ProcessFile(filePath, writeCh, absRoot)
+ }
+ }()
+ }
+
+ // Send files to workers
+ for _, file := range files {
+ select {
+ case <-ctx.Done():
+ close(fileCh)
+ workersDone.Wait() // Wait for workers to finish
+ close(writeCh)
+ <-writerDone
+ return ctx.Err()
+ case fileCh <- file:
+ }
+ }
+
+ // Close file channel and wait for workers to finish
+ close(fileCh)
+ workersDone.Wait()
+
+ // Now it's safe to close the write channel
+ close(writeCh)
+ <-writerDone
+
+ return nil
+}
+
+// PrintBenchmarkResult prints a formatted benchmark result.
+func PrintBenchmarkResult(result *BenchmarkResult) {
+ fmt.Printf("=== %s ===\n", result.Name)
+ fmt.Printf("Duration: %v\n", result.Duration)
+ fmt.Printf("Files Processed: %d\n", result.FilesProcessed)
+ fmt.Printf("Bytes Processed: %d (%.2f MB)\n", result.BytesProcessed, float64(result.BytesProcessed)/1024/1024)
+ fmt.Printf("Files/sec: %.2f\n", result.FilesPerSecond)
+ fmt.Printf("Bytes/sec: %.2f MB/sec\n", result.BytesPerSecond/1024/1024)
+ fmt.Printf("Memory Usage: +%.2f MB (Sys: +%.2f MB)\n", result.MemoryUsage.AllocMB, result.MemoryUsage.SysMB)
+ fmt.Printf("GC Runs: %d (Pause: %v)\n", result.MemoryUsage.NumGC, time.Duration(result.MemoryUsage.PauseTotalNs))
+ fmt.Printf("Goroutines: %d\n", result.CPUUsage.Goroutines)
+ fmt.Println()
+}
+
+// PrintBenchmarkSuite prints all results in a benchmark suite.
+func PrintBenchmarkSuite(suite *BenchmarkSuite) {
+ fmt.Printf("=== %s ===\n", suite.Name)
+ for _, result := range suite.Results {
+ PrintBenchmarkResult(&result)
+ }
+}
+
+// RunAllBenchmarks runs a comprehensive benchmark suite.
+func RunAllBenchmarks(sourceDir string) error {
+ fmt.Println("Running gibidify benchmark suite...")
+
+ // Load configuration
+ config.LoadConfig()
+
+ // File collection benchmark
+ fmt.Println("Running file collection benchmark...")
+ result, err := FileCollectionBenchmark(sourceDir, 1000)
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file collection benchmark failed")
+ }
+ PrintBenchmarkResult(result)
+
+ // Format benchmarks
+ fmt.Println("Running format benchmarks...")
+ formatSuite, err := FormatBenchmark(sourceDir, []string{"json", "yaml", "markdown"})
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed")
+ }
+ PrintBenchmarkSuite(formatSuite)
+
+ // Concurrency benchmarks
+ fmt.Println("Running concurrency benchmarks...")
+ concurrencyLevels := []int{1, 2, 4, 8, runtime.NumCPU()}
+ concurrencySuite, err := ConcurrencyBenchmark(sourceDir, "json", concurrencyLevels)
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed")
+ }
+ PrintBenchmarkSuite(concurrencySuite)
+
+ return nil
+}
diff --git a/benchmark/benchmark_test.go b/benchmark/benchmark_test.go
new file mode 100644
index 0000000..870ff42
--- /dev/null
+++ b/benchmark/benchmark_test.go
@@ -0,0 +1,165 @@
+package benchmark
+
+import (
+ "runtime"
+ "testing"
+)
+
+// TestFileCollectionBenchmark tests the file collection benchmark.
+func TestFileCollectionBenchmark(t *testing.T) {
+ result, err := FileCollectionBenchmark("", 10)
+ if err != nil {
+ t.Fatalf("FileCollectionBenchmark failed: %v", err)
+ }
+
+ if result.Name != "FileCollection" {
+ t.Errorf("Expected name 'FileCollection', got %s", result.Name)
+ }
+
+ // Debug information
+ t.Logf("Files processed: %d", result.FilesProcessed)
+ t.Logf("Duration: %v", result.Duration)
+ t.Logf("Bytes processed: %d", result.BytesProcessed)
+
+ if result.FilesProcessed <= 0 {
+ t.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
+ }
+
+ if result.Duration <= 0 {
+ t.Errorf("Expected duration > 0, got %v", result.Duration)
+ }
+}
+
+// TestFileProcessingBenchmark tests the file processing benchmark.
+func TestFileProcessingBenchmark(t *testing.T) {
+ result, err := FileProcessingBenchmark("", "json", 2)
+ if err != nil {
+ t.Fatalf("FileProcessingBenchmark failed: %v", err)
+ }
+
+ if result.FilesProcessed <= 0 {
+ t.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
+ }
+
+ if result.Duration <= 0 {
+ t.Errorf("Expected duration > 0, got %v", result.Duration)
+ }
+}
+
+// TestConcurrencyBenchmark tests the concurrency benchmark.
+func TestConcurrencyBenchmark(t *testing.T) {
+ concurrencyLevels := []int{1, 2}
+ suite, err := ConcurrencyBenchmark("", "json", concurrencyLevels)
+ if err != nil {
+ t.Fatalf("ConcurrencyBenchmark failed: %v", err)
+ }
+
+ if suite.Name != "ConcurrencyBenchmark" {
+ t.Errorf("Expected name 'ConcurrencyBenchmark', got %s", suite.Name)
+ }
+
+ if len(suite.Results) != len(concurrencyLevels) {
+ t.Errorf("Expected %d results, got %d", len(concurrencyLevels), len(suite.Results))
+ }
+
+ for i, result := range suite.Results {
+ if result.FilesProcessed <= 0 {
+ t.Errorf("Result %d: Expected files processed > 0, got %d", i, result.FilesProcessed)
+ }
+ }
+}
+
+// TestFormatBenchmark tests the format benchmark.
+func TestFormatBenchmark(t *testing.T) {
+ formats := []string{"json", "yaml"}
+ suite, err := FormatBenchmark("", formats)
+ if err != nil {
+ t.Fatalf("FormatBenchmark failed: %v", err)
+ }
+
+ if suite.Name != "FormatBenchmark" {
+ t.Errorf("Expected name 'FormatBenchmark', got %s", suite.Name)
+ }
+
+ if len(suite.Results) != len(formats) {
+ t.Errorf("Expected %d results, got %d", len(formats), len(suite.Results))
+ }
+
+ for i, result := range suite.Results {
+ if result.FilesProcessed <= 0 {
+ t.Errorf("Result %d: Expected files processed > 0, got %d", i, result.FilesProcessed)
+ }
+ }
+}
+
+// TestCreateBenchmarkFiles tests the benchmark file creation.
+func TestCreateBenchmarkFiles(t *testing.T) {
+ tempDir, cleanup, err := createBenchmarkFiles(5)
+ if err != nil {
+ t.Fatalf("createBenchmarkFiles failed: %v", err)
+ }
+ defer cleanup()
+
+ if tempDir == "" {
+ t.Error("Expected non-empty temp directory")
+ }
+
+ // Verify files were created
+ // This is tested indirectly through the benchmark functions
+}
+
+// BenchmarkFileCollection benchmarks the file collection process.
+func BenchmarkFileCollection(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ result, err := FileCollectionBenchmark("", 50)
+ if err != nil {
+ b.Fatalf("FileCollectionBenchmark failed: %v", err)
+ }
+ if result.FilesProcessed <= 0 {
+ b.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
+ }
+ }
+}
+
+// BenchmarkFileProcessing benchmarks the file processing pipeline.
+func BenchmarkFileProcessing(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ result, err := FileProcessingBenchmark("", "json", runtime.NumCPU())
+ if err != nil {
+ b.Fatalf("FileProcessingBenchmark failed: %v", err)
+ }
+ if result.FilesProcessed <= 0 {
+ b.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
+ }
+ }
+}
+
+// BenchmarkConcurrency benchmarks different concurrency levels.
+func BenchmarkConcurrency(b *testing.B) {
+ concurrencyLevels := []int{1, 2, 4}
+
+ for i := 0; i < b.N; i++ {
+ suite, err := ConcurrencyBenchmark("", "json", concurrencyLevels)
+ if err != nil {
+ b.Fatalf("ConcurrencyBenchmark failed: %v", err)
+ }
+ if len(suite.Results) != len(concurrencyLevels) {
+ b.Errorf("Expected %d results, got %d", len(concurrencyLevels), len(suite.Results))
+ }
+ }
+}
+
+// BenchmarkFormats benchmarks different output formats.
+func BenchmarkFormats(b *testing.B) {
+ formats := []string{"json", "yaml", "markdown"}
+
+ for i := 0; i < b.N; i++ {
+ suite, err := FormatBenchmark("", formats)
+ if err != nil {
+ b.Fatalf("FormatBenchmark failed: %v", err)
+ }
+ if len(suite.Results) != len(formats) {
+ b.Errorf("Expected %d results, got %d", len(formats), len(suite.Results))
+ }
+ }
+}
diff --git a/cli/errors.go b/cli/errors.go
new file mode 100644
index 0000000..b1e6ed3
--- /dev/null
+++ b/cli/errors.go
@@ -0,0 +1,285 @@
+package cli
+
+import (
+ "errors"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+// ErrorFormatter handles CLI-friendly error formatting with suggestions.
+type ErrorFormatter struct {
+ ui *UIManager
+}
+
+// NewErrorFormatter creates a new error formatter.
+func NewErrorFormatter(ui *UIManager) *ErrorFormatter {
+ return &ErrorFormatter{ui: ui}
+}
+
+// FormatError formats an error with context and suggestions.
+func (ef *ErrorFormatter) FormatError(err error) {
+ if err == nil {
+ return
+ }
+
+ // Handle structured errors
+ if structErr, ok := err.(*utils.StructuredError); ok {
+ ef.formatStructuredError(structErr)
+ return
+ }
+
+ // Handle common error types
+ ef.formatGenericError(err)
+}
+
+// formatStructuredError formats a structured error with context and suggestions.
+func (ef *ErrorFormatter) formatStructuredError(err *utils.StructuredError) {
+ // Print main error
+ ef.ui.PrintError("Error: %s", err.Message)
+
+ // Print error type and code
+ if err.Type != utils.ErrorTypeUnknown || err.Code != "" {
+ ef.ui.PrintInfo("Type: %s, Code: %s", err.Type.String(), err.Code)
+ }
+
+ // Print file path if available
+ if err.FilePath != "" {
+ ef.ui.PrintInfo("File: %s", err.FilePath)
+ }
+
+ // Print context if available
+ if len(err.Context) > 0 {
+ ef.ui.PrintInfo("Context:")
+ for key, value := range err.Context {
+ ef.ui.printf(" %s: %v\n", key, value)
+ }
+ }
+
+ // Provide suggestions based on error type
+ ef.provideSuggestions(err)
+}
+
+// formatGenericError formats a generic error.
+func (ef *ErrorFormatter) formatGenericError(err error) {
+ ef.ui.PrintError("Error: %s", err.Error())
+ ef.provideGenericSuggestions(err)
+}
+
+// provideSuggestions provides helpful suggestions based on the error.
+func (ef *ErrorFormatter) provideSuggestions(err *utils.StructuredError) {
+ switch err.Type {
+ case utils.ErrorTypeFileSystem:
+ ef.provideFileSystemSuggestions(err)
+ case utils.ErrorTypeValidation:
+ ef.provideValidationSuggestions(err)
+ case utils.ErrorTypeProcessing:
+ ef.provideProcessingSuggestions(err)
+ case utils.ErrorTypeIO:
+ ef.provideIOSuggestions(err)
+ default:
+ ef.provideDefaultSuggestions()
+ }
+}
+
+// provideFileSystemSuggestions provides suggestions for file system errors.
+func (ef *ErrorFormatter) provideFileSystemSuggestions(err *utils.StructuredError) {
+ filePath := err.FilePath
+
+ ef.ui.PrintWarning("Suggestions:")
+
+ switch err.Code {
+ case utils.CodeFSAccess:
+ ef.suggestFileAccess(filePath)
+ case utils.CodeFSPathResolution:
+ ef.suggestPathResolution(filePath)
+ case utils.CodeFSNotFound:
+ ef.suggestFileNotFound(filePath)
+ default:
+ ef.suggestFileSystemGeneral(filePath)
+ }
+}
+
+// provideValidationSuggestions provides suggestions for validation errors.
+func (ef *ErrorFormatter) provideValidationSuggestions(err *utils.StructuredError) {
+ ef.ui.PrintWarning("Suggestions:")
+
+ switch err.Code {
+ case utils.CodeValidationFormat:
+ ef.ui.printf(" • Use a supported format: markdown, json, yaml\n")
+ ef.ui.printf(" • Example: -format markdown\n")
+ case utils.CodeValidationSize:
+ ef.ui.printf(" • Increase file size limit in config.yaml\n")
+ ef.ui.printf(" • Use smaller files or exclude large files\n")
+ default:
+ ef.ui.printf(" • Check your command line arguments\n")
+ ef.ui.printf(" • Run with --help for usage information\n")
+ }
+}
+
+// provideProcessingSuggestions provides suggestions for processing errors.
+func (ef *ErrorFormatter) provideProcessingSuggestions(err *utils.StructuredError) {
+ ef.ui.PrintWarning("Suggestions:")
+
+ switch err.Code {
+ case utils.CodeProcessingCollection:
+ ef.ui.printf(" • Check if the source directory exists and is readable\n")
+ ef.ui.printf(" • Verify directory permissions\n")
+ case utils.CodeProcessingFileRead:
+ ef.ui.printf(" • Check file permissions\n")
+ ef.ui.printf(" • Verify the file is not corrupted\n")
+ default:
+ ef.ui.printf(" • Try reducing concurrency: -concurrency 1\n")
+ ef.ui.printf(" • Check available system resources\n")
+ }
+}
+
+// provideIOSuggestions provides suggestions for I/O errors.
+func (ef *ErrorFormatter) provideIOSuggestions(err *utils.StructuredError) {
+ ef.ui.PrintWarning("Suggestions:")
+
+ switch err.Code {
+ case utils.CodeIOFileCreate:
+ ef.ui.printf(" • Check if the destination directory exists\n")
+ ef.ui.printf(" • Verify write permissions for the output file\n")
+ ef.ui.printf(" • Ensure sufficient disk space\n")
+ case utils.CodeIOWrite:
+ ef.ui.printf(" • Check available disk space\n")
+ ef.ui.printf(" • Verify write permissions\n")
+ default:
+ ef.ui.printf(" • Check file/directory permissions\n")
+ ef.ui.printf(" • Verify available disk space\n")
+ }
+}
+
+// Helper methods for specific suggestions
+func (ef *ErrorFormatter) suggestFileAccess(filePath string) {
+ ef.ui.printf(" • Check if the path exists: %s\n", filePath)
+ ef.ui.printf(" • Verify read permissions\n")
+ if filePath != "" {
+ if stat, err := os.Stat(filePath); err == nil {
+ ef.ui.printf(" • Path exists but may not be accessible\n")
+ ef.ui.printf(" • Mode: %s\n", stat.Mode())
+ }
+ }
+}
+
+func (ef *ErrorFormatter) suggestPathResolution(filePath string) {
+ ef.ui.printf(" • Use an absolute path instead of relative\n")
+ if filePath != "" {
+ if abs, err := filepath.Abs(filePath); err == nil {
+ ef.ui.printf(" • Try: %s\n", abs)
+ }
+ }
+}
+
+func (ef *ErrorFormatter) suggestFileNotFound(filePath string) {
+ ef.ui.printf(" • Check if the file/directory exists: %s\n", filePath)
+ if filePath != "" {
+ dir := filepath.Dir(filePath)
+ if entries, err := os.ReadDir(dir); err == nil {
+ ef.ui.printf(" • Similar files in %s:\n", dir)
+ count := 0
+ for _, entry := range entries {
+ if count >= 3 {
+ break
+ }
+ if strings.Contains(entry.Name(), filepath.Base(filePath)) {
+ ef.ui.printf(" - %s\n", entry.Name())
+ count++
+ }
+ }
+ }
+ }
+}
+
+func (ef *ErrorFormatter) suggestFileSystemGeneral(filePath string) {
+ ef.ui.printf(" • Check file/directory permissions\n")
+ ef.ui.printf(" • Verify the path is correct\n")
+ if filePath != "" {
+ ef.ui.printf(" • Path: %s\n", filePath)
+ }
+}
+
+// provideDefaultSuggestions provides general suggestions.
+func (ef *ErrorFormatter) provideDefaultSuggestions() {
+ ef.ui.printf(" • Check your command line arguments\n")
+ ef.ui.printf(" • Run with --help for usage information\n")
+ ef.ui.printf(" • Try with -concurrency 1 to reduce resource usage\n")
+}
+
+// provideGenericSuggestions provides suggestions for generic errors.
+func (ef *ErrorFormatter) provideGenericSuggestions(err error) {
+ errorMsg := err.Error()
+
+ ef.ui.PrintWarning("Suggestions:")
+
+ // Pattern matching for common errors
+ switch {
+ case strings.Contains(errorMsg, "permission denied"):
+ ef.ui.printf(" • Check file/directory permissions\n")
+ ef.ui.printf(" • Try running with appropriate privileges\n")
+ case strings.Contains(errorMsg, "no such file or directory"):
+ ef.ui.printf(" • Verify the file/directory path is correct\n")
+ ef.ui.printf(" • Check if the file exists\n")
+ case strings.Contains(errorMsg, "flag") && strings.Contains(errorMsg, "redefined"):
+ ef.ui.printf(" • This is likely a test environment issue\n")
+ ef.ui.printf(" • Try running the command directly instead of in tests\n")
+ default:
+ ef.provideDefaultSuggestions()
+ }
+}
+
+// CLI-specific error types
+
+// CLIMissingSourceError represents a missing source directory error.
+type CLIMissingSourceError struct{}
+
+func (e CLIMissingSourceError) Error() string {
+ return "source directory is required"
+}
+
+// NewCLIMissingSourceError creates a new CLI missing source error with suggestions.
+func NewCLIMissingSourceError() error {
+ return &CLIMissingSourceError{}
+}
+
+// IsUserError checks if an error is a user input error that should be handled gracefully.
+func IsUserError(err error) bool {
+ if err == nil {
+ return false
+ }
+
+ // Check for specific user error types
+ var cliErr *CLIMissingSourceError
+ if errors.As(err, &cliErr) {
+ return true
+ }
+
+ // Check for structured errors that are user-facing
+ if structErr, ok := err.(*utils.StructuredError); ok {
+ return structErr.Type == utils.ErrorTypeValidation ||
+ structErr.Code == utils.CodeValidationFormat ||
+ structErr.Code == utils.CodeValidationSize
+ }
+
+ // Check error message patterns
+ errMsg := err.Error()
+ userErrorPatterns := []string{
+ "flag",
+ "usage",
+ "invalid argument",
+ "file not found",
+ "permission denied",
+ }
+
+ for _, pattern := range userErrorPatterns {
+ if strings.Contains(strings.ToLower(errMsg), pattern) {
+ return true
+ }
+ }
+
+ return false
+}
diff --git a/cli/flags.go b/cli/flags.go
new file mode 100644
index 0000000..d18ab3e
--- /dev/null
+++ b/cli/flags.go
@@ -0,0 +1,93 @@
+package cli
+
+import (
+ "flag"
+ "runtime"
+
+ "github.com/ivuorinen/gibidify/config"
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+// Flags holds CLI flags values.
+type Flags struct {
+ SourceDir string
+ Destination string
+ Prefix string
+ Suffix string
+ Concurrency int
+ Format string
+ NoColors bool
+ NoProgress bool
+ Verbose bool
+}
+
+var (
+ flagsParsed bool
+ globalFlags *Flags
+)
+
+// ParseFlags parses and validates CLI flags.
+func ParseFlags() (*Flags, error) {
+ if flagsParsed {
+ return globalFlags, nil
+ }
+
+ flags := &Flags{}
+
+ flag.StringVar(&flags.SourceDir, "source", "", "Source directory to scan recursively")
+ flag.StringVar(&flags.Destination, "destination", "", "Output file to write aggregated code")
+ flag.StringVar(&flags.Prefix, "prefix", "", "Text to add at the beginning of the output file")
+ flag.StringVar(&flags.Suffix, "suffix", "", "Text to add at the end of the output file")
+ flag.StringVar(&flags.Format, "format", "markdown", "Output format (json, markdown, yaml)")
+ flag.IntVar(&flags.Concurrency, "concurrency", runtime.NumCPU(),
+ "Number of concurrent workers (default: number of CPU cores)")
+ flag.BoolVar(&flags.NoColors, "no-colors", false, "Disable colored output")
+ flag.BoolVar(&flags.NoProgress, "no-progress", false, "Disable progress bars")
+ flag.BoolVar(&flags.Verbose, "verbose", false, "Enable verbose output")
+
+ flag.Parse()
+
+ if err := flags.validate(); err != nil {
+ return nil, err
+ }
+
+ if err := flags.setDefaultDestination(); err != nil {
+ return nil, err
+ }
+
+ flagsParsed = true
+ globalFlags = flags
+ return flags, nil
+}
+
+// validate validates the CLI flags.
+func (f *Flags) validate() error {
+ if f.SourceDir == "" {
+ return NewCLIMissingSourceError()
+ }
+
+ // Validate output format
+ if err := config.ValidateOutputFormat(f.Format); err != nil {
+ return err
+ }
+
+ // Validate concurrency
+ if err := config.ValidateConcurrency(f.Concurrency); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// setDefaultDestination sets the default destination if not provided.
+func (f *Flags) setDefaultDestination() error {
+ if f.Destination == "" {
+ absRoot, err := utils.GetAbsolutePath(f.SourceDir)
+ if err != nil {
+ return err
+ }
+ baseName := utils.GetBaseName(absRoot)
+ f.Destination = baseName + "." + f.Format
+ }
+ return nil
+}
diff --git a/cli/processor.go b/cli/processor.go
new file mode 100644
index 0000000..5c2cd1d
--- /dev/null
+++ b/cli/processor.go
@@ -0,0 +1,210 @@
+package cli
+
+import (
+ "context"
+ "os"
+ "sync"
+
+ "github.com/sirupsen/logrus"
+
+ "github.com/ivuorinen/gibidify/config"
+ "github.com/ivuorinen/gibidify/fileproc"
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+// Processor handles the main file processing logic.
+type Processor struct {
+ flags *Flags
+ backpressure *fileproc.BackpressureManager
+ ui *UIManager
+}
+
+// NewProcessor creates a new processor with the given flags.
+func NewProcessor(flags *Flags) *Processor {
+ ui := NewUIManager()
+
+ // Configure UI based on flags
+ ui.SetColorOutput(!flags.NoColors)
+ ui.SetProgressOutput(!flags.NoProgress)
+
+ return &Processor{
+ flags: flags,
+ backpressure: fileproc.NewBackpressureManager(),
+ ui: ui,
+ }
+}
+
+// Process executes the main file processing workflow.
+func (p *Processor) Process(ctx context.Context) error {
+ // Configure file type registry
+ p.configureFileTypes()
+
+ // Print startup info with colors
+ p.ui.PrintHeader("🚀 Starting gibidify")
+ p.ui.PrintInfo("Format: %s", p.flags.Format)
+ p.ui.PrintInfo("Source: %s", p.flags.SourceDir)
+ p.ui.PrintInfo("Destination: %s", p.flags.Destination)
+ p.ui.PrintInfo("Workers: %d", p.flags.Concurrency)
+
+ // Collect files with progress indication
+ p.ui.PrintInfo("📁 Collecting files...")
+ files, err := p.collectFiles()
+ if err != nil {
+ return err
+ }
+
+ // Show collection results
+ p.ui.PrintSuccess("Found %d files to process", len(files))
+
+ // Process files
+ return p.processFiles(ctx, files)
+}
+
+// configureFileTypes configures the file type registry.
+func (p *Processor) configureFileTypes() {
+ if config.GetFileTypesEnabled() {
+ fileproc.ConfigureFromSettings(
+ config.GetCustomImageExtensions(),
+ config.GetCustomBinaryExtensions(),
+ config.GetCustomLanguages(),
+ config.GetDisabledImageExtensions(),
+ config.GetDisabledBinaryExtensions(),
+ config.GetDisabledLanguageExtensions(),
+ )
+ }
+}
+
+// collectFiles collects all files to be processed.
+func (p *Processor) collectFiles() ([]string, error) {
+ files, err := fileproc.CollectFiles(p.flags.SourceDir)
+ if err != nil {
+ return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "error collecting files")
+ }
+ logrus.Infof("Found %d files to process", len(files))
+ return files, nil
+}
+
+// processFiles processes the collected files.
+func (p *Processor) processFiles(ctx context.Context, files []string) error {
+ outFile, err := p.createOutputFile()
+ if err != nil {
+ return err
+ }
+ defer func() {
+ utils.LogError("Error closing output file", outFile.Close())
+ }()
+
+ // Initialize back-pressure and channels
+ p.ui.PrintInfo("⚙️ Initializing processing...")
+ p.backpressure.LogBackpressureInfo()
+ fileCh, writeCh := p.backpressure.CreateChannels()
+ writerDone := make(chan struct{})
+
+ // Start writer
+ go fileproc.StartWriter(outFile, writeCh, writerDone, p.flags.Format, p.flags.Prefix, p.flags.Suffix)
+
+ // Start workers
+ var wg sync.WaitGroup
+ p.startWorkers(ctx, &wg, fileCh, writeCh)
+
+ // Start progress bar
+ p.ui.StartProgress(len(files), "📝 Processing files")
+
+ // Send files to workers
+ if err := p.sendFiles(ctx, files, fileCh); err != nil {
+ p.ui.FinishProgress()
+ return err
+ }
+
+ // Wait for completion
+ p.waitForCompletion(&wg, writeCh, writerDone)
+ p.ui.FinishProgress()
+
+ p.logFinalStats()
+ p.ui.PrintSuccess("Processing completed. Output saved to %s", p.flags.Destination)
+ return nil
+}
+
+// createOutputFile creates the output file.
+func (p *Processor) createOutputFile() (*os.File, error) {
+ outFile, err := os.Create(p.flags.Destination) // #nosec G304 - destination is user-provided CLI arg
+ if err != nil {
+ return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create output file").WithFilePath(p.flags.Destination)
+ }
+ return outFile, nil
+}
+
+// startWorkers starts the worker goroutines.
+func (p *Processor) startWorkers(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) {
+ for range p.flags.Concurrency {
+ wg.Add(1)
+ go p.worker(ctx, wg, fileCh, writeCh)
+ }
+}
+
+// worker is the worker goroutine function.
+func (p *Processor) worker(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) {
+ defer wg.Done()
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case filePath, ok := <-fileCh:
+ if !ok {
+ return
+ }
+ p.processFile(filePath, writeCh)
+ }
+ }
+}
+
+// processFile processes a single file.
+func (p *Processor) processFile(filePath string, writeCh chan fileproc.WriteRequest) {
+ absRoot, err := utils.GetAbsolutePath(p.flags.SourceDir)
+ if err != nil {
+ utils.LogError("Failed to get absolute path", err)
+ return
+ }
+ fileproc.ProcessFile(filePath, writeCh, absRoot)
+
+ // Update progress bar
+ p.ui.UpdateProgress(1)
+}
+
+// sendFiles sends files to the worker channels with back-pressure handling.
+func (p *Processor) sendFiles(ctx context.Context, files []string, fileCh chan string) error {
+ defer close(fileCh)
+
+ for _, fp := range files {
+ // Check if we should apply back-pressure
+ if p.backpressure.ShouldApplyBackpressure(ctx) {
+ p.backpressure.ApplyBackpressure(ctx)
+ }
+
+ // Wait for channel space if needed
+ p.backpressure.WaitForChannelSpace(ctx, fileCh, nil)
+
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case fileCh <- fp:
+ }
+ }
+ return nil
+}
+
+// waitForCompletion waits for all workers to complete.
+func (p *Processor) waitForCompletion(wg *sync.WaitGroup, writeCh chan fileproc.WriteRequest, writerDone chan struct{}) {
+ wg.Wait()
+ close(writeCh)
+ <-writerDone
+}
+
+// logFinalStats logs the final back-pressure statistics.
+func (p *Processor) logFinalStats() {
+ stats := p.backpressure.GetStats()
+ if stats.Enabled {
+ logrus.Infof("Back-pressure stats: processed=%d files, memory=%dMB/%dMB",
+ stats.FilesProcessed, stats.CurrentMemoryUsage/1024/1024, stats.MaxMemoryUsage/1024/1024)
+ }
+}
diff --git a/cli/ui.go b/cli/ui.go
new file mode 100644
index 0000000..1c7bc27
--- /dev/null
+++ b/cli/ui.go
@@ -0,0 +1,173 @@
+package cli
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "time"
+
+ "github.com/fatih/color"
+ "github.com/schollz/progressbar/v3"
+)
+
+// UIManager handles CLI user interface elements.
+type UIManager struct {
+ enableColors bool
+ enableProgress bool
+ progressBar *progressbar.ProgressBar
+ output io.Writer
+}
+
+// NewUIManager creates a new UI manager.
+func NewUIManager() *UIManager {
+ return &UIManager{
+ enableColors: isColorTerminal(),
+ enableProgress: isInteractiveTerminal(),
+ output: os.Stderr, // Progress and colors go to stderr
+ }
+}
+
+// SetColorOutput enables or disables colored output.
+func (ui *UIManager) SetColorOutput(enabled bool) {
+ ui.enableColors = enabled
+ color.NoColor = !enabled
+}
+
+// SetProgressOutput enables or disables progress bars.
+func (ui *UIManager) SetProgressOutput(enabled bool) {
+ ui.enableProgress = enabled
+}
+
+// StartProgress initializes a progress bar for file processing.
+func (ui *UIManager) StartProgress(total int, description string) {
+ if !ui.enableProgress || total <= 0 {
+ return
+ }
+
+ ui.progressBar = progressbar.NewOptions(total,
+ progressbar.OptionSetWriter(ui.output),
+ progressbar.OptionSetDescription(description),
+ progressbar.OptionSetTheme(progressbar.Theme{
+ Saucer: color.GreenString("█"),
+ SaucerHead: color.GreenString("█"),
+ SaucerPadding: " ",
+ BarStart: "[",
+ BarEnd: "]",
+ }),
+ progressbar.OptionShowCount(),
+ progressbar.OptionShowIts(),
+ progressbar.OptionSetWidth(40),
+ progressbar.OptionThrottle(100*time.Millisecond),
+ progressbar.OptionOnCompletion(func() {
+ _, _ = fmt.Fprint(ui.output, "\n")
+ }),
+ progressbar.OptionSetRenderBlankState(true),
+ )
+}
+
+// UpdateProgress increments the progress bar.
+func (ui *UIManager) UpdateProgress(increment int) {
+ if ui.progressBar != nil {
+ _ = ui.progressBar.Add(increment)
+ }
+}
+
+// FinishProgress completes the progress bar.
+func (ui *UIManager) FinishProgress() {
+ if ui.progressBar != nil {
+ _ = ui.progressBar.Finish()
+ ui.progressBar = nil
+ }
+}
+
+// PrintSuccess prints a success message in green.
+func (ui *UIManager) PrintSuccess(format string, args ...interface{}) {
+ if ui.enableColors {
+ color.Green("✓ "+format, args...)
+ } else {
+ ui.printf("✓ "+format+"\n", args...)
+ }
+}
+
+// PrintError prints an error message in red.
+func (ui *UIManager) PrintError(format string, args ...interface{}) {
+ if ui.enableColors {
+ color.Red("✗ "+format, args...)
+ } else {
+ ui.printf("✗ "+format+"\n", args...)
+ }
+}
+
+// PrintWarning prints a warning message in yellow.
+func (ui *UIManager) PrintWarning(format string, args ...interface{}) {
+ if ui.enableColors {
+ color.Yellow("⚠ "+format, args...)
+ } else {
+ ui.printf("⚠ "+format+"\n", args...)
+ }
+}
+
+// PrintInfo prints an info message in blue.
+func (ui *UIManager) PrintInfo(format string, args ...interface{}) {
+ if ui.enableColors {
+ color.Blue("ℹ "+format, args...)
+ } else {
+ ui.printf("ℹ "+format+"\n", args...)
+ }
+}
+
+// PrintHeader prints a header message in bold.
+func (ui *UIManager) PrintHeader(format string, args ...interface{}) {
+ if ui.enableColors {
+ _, _ = color.New(color.Bold).Fprintf(ui.output, format+"\n", args...)
+ } else {
+ ui.printf(format+"\n", args...)
+ }
+}
+
+// isColorTerminal checks if the terminal supports colors.
+func isColorTerminal() bool {
+ // Check common environment variables
+ term := os.Getenv("TERM")
+ if term == "" || term == "dumb" {
+ return false
+ }
+
+ // Check for CI environments that typically don't support colors
+ if os.Getenv("CI") != "" {
+ // GitHub Actions supports colors
+ if os.Getenv("GITHUB_ACTIONS") == "true" {
+ return true
+ }
+ // Most other CI systems don't
+ return false
+ }
+
+ // Check if NO_COLOR is set (https://no-color.org/)
+ if os.Getenv("NO_COLOR") != "" {
+ return false
+ }
+
+ // Check if FORCE_COLOR is set
+ if os.Getenv("FORCE_COLOR") != "" {
+ return true
+ }
+
+ // Default to true for interactive terminals
+ return isInteractiveTerminal()
+}
+
+// isInteractiveTerminal checks if we're running in an interactive terminal.
+func isInteractiveTerminal() bool {
+ // Check if stderr is a terminal (where we output progress/colors)
+ fileInfo, err := os.Stderr.Stat()
+ if err != nil {
+ return false
+ }
+ return (fileInfo.Mode() & os.ModeCharDevice) != 0
+}
+
+// printf is a helper that ignores printf errors (for UI output).
+func (ui *UIManager) printf(format string, args ...interface{}) {
+ _, _ = fmt.Fprintf(ui.output, format, args...)
+}
diff --git a/cmd/benchmark/main.go b/cmd/benchmark/main.go
new file mode 100644
index 0000000..e5be0ea
--- /dev/null
+++ b/cmd/benchmark/main.go
@@ -0,0 +1,145 @@
+// Package main provides a CLI for running gibidify benchmarks.
+package main
+
+import (
+ "flag"
+ "fmt"
+ "os"
+ "runtime"
+ "strings"
+
+ "github.com/ivuorinen/gibidify/benchmark"
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+var (
+ sourceDir = flag.String("source", "", "Source directory to benchmark (uses temp files if empty)")
+ benchmarkType = flag.String("type", "all", "Benchmark type: all, collection, processing, concurrency, format")
+ format = flag.String("format", "json", "Output format for processing benchmarks")
+ concurrency = flag.Int("concurrency", runtime.NumCPU(), "Concurrency level for processing benchmarks")
+ concurrencyList = flag.String("concurrency-list", "1,2,4,8", "Comma-separated list of concurrency levels")
+ formatList = flag.String("format-list", "json,yaml,markdown", "Comma-separated list of formats")
+ numFiles = flag.Int("files", 100, "Number of files to create for benchmarks")
+)
+
+func main() {
+ flag.Parse()
+
+ if err := runBenchmarks(); err != nil {
+ fmt.Fprintf(os.Stderr, "Benchmark failed: %v\n", err)
+ os.Exit(1)
+ }
+}
+
+func runBenchmarks() error {
+ fmt.Printf("Running gibidify benchmarks...\n")
+ fmt.Printf("Source: %s\n", getSourceDescription())
+ fmt.Printf("Type: %s\n", *benchmarkType)
+ fmt.Printf("CPU cores: %d\n", runtime.NumCPU())
+ fmt.Println()
+
+ switch *benchmarkType {
+ case "all":
+ return benchmark.RunAllBenchmarks(*sourceDir)
+ case "collection":
+ return runCollectionBenchmark()
+ case "processing":
+ return runProcessingBenchmark()
+ case "concurrency":
+ return runConcurrencyBenchmark()
+ case "format":
+ return runFormatBenchmark()
+ default:
+ return utils.NewValidationError(utils.CodeValidationFormat, "invalid benchmark type: "+*benchmarkType)
+ }
+}
+
+func runCollectionBenchmark() error {
+ fmt.Println("Running file collection benchmark...")
+ result, err := benchmark.FileCollectionBenchmark(*sourceDir, *numFiles)
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file collection benchmark failed")
+ }
+ benchmark.PrintBenchmarkResult(result)
+ return nil
+}
+
+func runProcessingBenchmark() error {
+ fmt.Printf("Running file processing benchmark (format: %s, concurrency: %d)...\n", *format, *concurrency)
+ result, err := benchmark.FileProcessingBenchmark(*sourceDir, *format, *concurrency)
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file processing benchmark failed")
+ }
+ benchmark.PrintBenchmarkResult(result)
+ return nil
+}
+
+func runConcurrencyBenchmark() error {
+ concurrencyLevels, err := parseConcurrencyList(*concurrencyList)
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeValidation, utils.CodeValidationFormat, "invalid concurrency list")
+ }
+
+ fmt.Printf("Running concurrency benchmark (format: %s, levels: %v)...\n", *format, concurrencyLevels)
+ suite, err := benchmark.ConcurrencyBenchmark(*sourceDir, *format, concurrencyLevels)
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed")
+ }
+ benchmark.PrintBenchmarkSuite(suite)
+ return nil
+}
+
+func runFormatBenchmark() error {
+ formats := parseFormatList(*formatList)
+ fmt.Printf("Running format benchmark (formats: %v)...\n", formats)
+ suite, err := benchmark.FormatBenchmark(*sourceDir, formats)
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed")
+ }
+ benchmark.PrintBenchmarkSuite(suite)
+ return nil
+}
+
+func getSourceDescription() string {
+ if *sourceDir == "" {
+ return fmt.Sprintf("temporary files (%d files)", *numFiles)
+ }
+ return *sourceDir
+}
+
+func parseConcurrencyList(list string) ([]int, error) {
+ parts := strings.Split(list, ",")
+ levels := make([]int, 0, len(parts))
+
+ for _, part := range parts {
+ part = strings.TrimSpace(part)
+ var level int
+ if _, err := fmt.Sscanf(part, "%d", &level); err != nil {
+ return nil, utils.WrapErrorf(err, utils.ErrorTypeValidation, utils.CodeValidationFormat, "invalid concurrency level: %s", part)
+ }
+ if level <= 0 {
+ return nil, utils.NewValidationError(utils.CodeValidationFormat, "concurrency level must be positive: "+part)
+ }
+ levels = append(levels, level)
+ }
+
+ if len(levels) == 0 {
+ return nil, utils.NewValidationError(utils.CodeValidationFormat, "no valid concurrency levels found")
+ }
+
+ return levels, nil
+}
+
+func parseFormatList(list string) []string {
+ parts := strings.Split(list, ",")
+ formats := make([]string, 0, len(parts))
+
+ for _, part := range parts {
+ part = strings.TrimSpace(part)
+ if part != "" {
+ formats = append(formats, part)
+ }
+ }
+
+ return formats
+}
diff --git a/config.example.yaml b/config.example.yaml
new file mode 100644
index 0000000..fad9a43
--- /dev/null
+++ b/config.example.yaml
@@ -0,0 +1,84 @@
+# gibidify configuration example
+# Place this file in one of these locations:
+# - $XDG_CONFIG_HOME/gibidify/config.yaml
+# - $HOME/.config/gibidify/config.yaml
+# - Current directory (if no gibidify.yaml output file exists)
+
+# File size limit in bytes (default: 5MB)
+fileSizeLimit: 5242880
+
+# Directories to ignore during scanning
+ignoreDirectories:
+ - vendor
+ - node_modules
+ - .git
+ - dist
+ - build
+ - target
+ - bower_components
+ - cache
+ - tmp
+ - .next
+ - .nuxt
+
+# FileType registry configuration
+fileTypes:
+ # Enable/disable file type detection entirely (default: true)
+ enabled: true
+
+ # Add custom image extensions
+ customImageExtensions:
+ - .webp
+ - .avif
+ - .heic
+ - .jxl
+
+ # Add custom binary extensions
+ customBinaryExtensions:
+ - .custom
+ - .proprietary
+ - .blob
+
+ # Add custom language mappings
+ customLanguages:
+ .zig: zig
+ .odin: odin
+ .v: vlang
+ .grain: grain
+ .gleam: gleam
+ .roc: roc
+ .janet: janet
+ .fennel: fennel
+ .wast: wast
+ .wat: wat
+
+ # Disable specific default image extensions
+ disabledImageExtensions:
+ - .bmp # Disable bitmap support
+ - .tif # Disable TIFF support
+
+ # Disable specific default binary extensions
+ disabledBinaryExtensions:
+ - .exe # Don't treat executables as binary
+ - .dll # Don't treat DLL files as binary
+
+ # Disable specific default language extensions
+ disabledLanguageExtensions:
+ - .bat # Don't detect batch files
+ - .cmd # Don't detect command files
+
+# Maximum concurrency (optional)
+maxConcurrency: 16
+
+# Supported output formats (optional validation)
+supportedFormats:
+ - json
+ - yaml
+ - markdown
+
+# File patterns for filtering (optional)
+filePatterns:
+ - "*.go"
+ - "*.py"
+ - "*.js"
+ - "*.ts"
\ No newline at end of file
diff --git a/config/config.go b/config/config.go
index 13ec049..5e7fd80 100644
--- a/config/config.go
+++ b/config/config.go
@@ -2,11 +2,24 @@
package config
import (
+ "fmt"
"os"
"path/filepath"
+ "strings"
"github.com/sirupsen/logrus"
"github.com/spf13/viper"
+
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+const (
+ // DefaultFileSizeLimit is the default maximum file size (5MB).
+ DefaultFileSizeLimit = 5242880
+ // MinFileSizeLimit is the minimum allowed file size limit (1KB).
+ MinFileSizeLimit = 1024
+ // MaxFileSizeLimit is the maximum allowed file size limit (100MB).
+ MaxFileSizeLimit = 104857600
)
// LoadConfig reads configuration from a YAML file.
@@ -23,23 +36,51 @@ func LoadConfig() {
} else if home, err := os.UserHomeDir(); err == nil {
viper.AddConfigPath(filepath.Join(home, ".config", "gibidify"))
}
- viper.AddConfigPath(".")
+ // Only add current directory if no config file named gibidify.yaml exists
+ // to avoid conflicts with the project's output file
+ if _, err := os.Stat("gibidify.yaml"); os.IsNotExist(err) {
+ viper.AddConfigPath(".")
+ }
if err := viper.ReadInConfig(); err != nil {
logrus.Infof("Config file not found, using default values: %v", err)
setDefaultConfig()
} else {
logrus.Infof("Using config file: %s", viper.ConfigFileUsed())
+ // Validate configuration after loading
+ if err := ValidateConfig(); err != nil {
+ logrus.Warnf("Configuration validation failed: %v", err)
+ logrus.Info("Falling back to default configuration")
+ // Reset viper and set defaults when validation fails
+ viper.Reset()
+ setDefaultConfig()
+ }
}
}
// setDefaultConfig sets default configuration values.
func setDefaultConfig() {
- viper.SetDefault("fileSizeLimit", 5242880) // 5 MB
+ viper.SetDefault("fileSizeLimit", DefaultFileSizeLimit)
// Default ignored directories.
viper.SetDefault("ignoreDirectories", []string{
"vendor", "node_modules", ".git", "dist", "build", "target", "bower_components", "cache", "tmp",
})
+
+ // FileTypeRegistry defaults
+ viper.SetDefault("fileTypes.enabled", true)
+ viper.SetDefault("fileTypes.customImageExtensions", []string{})
+ viper.SetDefault("fileTypes.customBinaryExtensions", []string{})
+ viper.SetDefault("fileTypes.customLanguages", map[string]string{})
+ viper.SetDefault("fileTypes.disabledImageExtensions", []string{})
+ viper.SetDefault("fileTypes.disabledBinaryExtensions", []string{})
+ viper.SetDefault("fileTypes.disabledLanguageExtensions", []string{})
+
+ // Back-pressure and memory management defaults
+ viper.SetDefault("backpressure.enabled", true)
+ viper.SetDefault("backpressure.maxPendingFiles", 1000) // Max files in file channel buffer
+ viper.SetDefault("backpressure.maxPendingWrites", 100) // Max writes in write channel buffer
+ viper.SetDefault("backpressure.maxMemoryUsage", 104857600) // 100MB max memory usage
+ viper.SetDefault("backpressure.memoryCheckInterval", 1000) // Check memory every 1000 files
}
// GetFileSizeLimit returns the file size limit from configuration.
@@ -51,3 +92,303 @@ func GetFileSizeLimit() int64 {
func GetIgnoredDirectories() []string {
return viper.GetStringSlice("ignoreDirectories")
}
+
+// ValidateConfig validates the loaded configuration.
+func ValidateConfig() error {
+ var validationErrors []string
+
+ // Validate file size limit
+ fileSizeLimit := viper.GetInt64("fileSizeLimit")
+ if fileSizeLimit < MinFileSizeLimit {
+ validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) is below minimum (%d)", fileSizeLimit, MinFileSizeLimit))
+ }
+ if fileSizeLimit > MaxFileSizeLimit {
+ validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) exceeds maximum (%d)", fileSizeLimit, MaxFileSizeLimit))
+ }
+
+ // Validate ignore directories
+ ignoreDirectories := viper.GetStringSlice("ignoreDirectories")
+ for i, dir := range ignoreDirectories {
+ dir = strings.TrimSpace(dir)
+ if dir == "" {
+ validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] is empty", i))
+ continue
+ }
+ if strings.Contains(dir, "/") {
+ validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) contains path separator - only directory names are allowed", i, dir))
+ }
+ if strings.HasPrefix(dir, ".") && dir != ".git" && dir != ".vscode" && dir != ".idea" {
+ validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) starts with dot - this may cause unexpected behavior", i, dir))
+ }
+ }
+
+ // Validate supported output formats if configured
+ if viper.IsSet("supportedFormats") {
+ supportedFormats := viper.GetStringSlice("supportedFormats")
+ validFormats := map[string]bool{"json": true, "yaml": true, "markdown": true}
+ for i, format := range supportedFormats {
+ format = strings.ToLower(strings.TrimSpace(format))
+ if !validFormats[format] {
+ validationErrors = append(validationErrors, fmt.Sprintf("supportedFormats[%d] (%s) is not a valid format (json, yaml, markdown)", i, format))
+ }
+ }
+ }
+
+ // Validate concurrency settings if configured
+ if viper.IsSet("maxConcurrency") {
+ maxConcurrency := viper.GetInt("maxConcurrency")
+ if maxConcurrency < 1 {
+ validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) must be at least 1", maxConcurrency))
+ }
+ if maxConcurrency > 100 {
+ validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) is unreasonably high (max 100)", maxConcurrency))
+ }
+ }
+
+ // Validate file patterns if configured
+ if viper.IsSet("filePatterns") {
+ filePatterns := viper.GetStringSlice("filePatterns")
+ for i, pattern := range filePatterns {
+ pattern = strings.TrimSpace(pattern)
+ if pattern == "" {
+ validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] is empty", i))
+ continue
+ }
+ // Basic validation - patterns should contain at least one alphanumeric character
+ if !strings.ContainsAny(pattern, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") {
+ validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] (%s) appears to be invalid", i, pattern))
+ }
+ }
+ }
+
+ // Validate FileTypeRegistry configuration
+ if viper.IsSet("fileTypes.customImageExtensions") {
+ customImages := viper.GetStringSlice("fileTypes.customImageExtensions")
+ for i, ext := range customImages {
+ ext = strings.TrimSpace(ext)
+ if ext == "" {
+ validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] is empty", i))
+ continue
+ }
+ if !strings.HasPrefix(ext, ".") {
+ validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] (%s) must start with a dot", i, ext))
+ }
+ }
+ }
+
+ if viper.IsSet("fileTypes.customBinaryExtensions") {
+ customBinary := viper.GetStringSlice("fileTypes.customBinaryExtensions")
+ for i, ext := range customBinary {
+ ext = strings.TrimSpace(ext)
+ if ext == "" {
+ validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] is empty", i))
+ continue
+ }
+ if !strings.HasPrefix(ext, ".") {
+ validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] (%s) must start with a dot", i, ext))
+ }
+ }
+ }
+
+ if viper.IsSet("fileTypes.customLanguages") {
+ customLangs := viper.GetStringMapString("fileTypes.customLanguages")
+ for ext, lang := range customLangs {
+ ext = strings.TrimSpace(ext)
+ lang = strings.TrimSpace(lang)
+ if ext == "" {
+ validationErrors = append(validationErrors, "fileTypes.customLanguages contains empty extension key")
+ continue
+ }
+ if !strings.HasPrefix(ext, ".") {
+ validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages extension (%s) must start with a dot", ext))
+ }
+ if lang == "" {
+ validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages[%s] has empty language value", ext))
+ }
+ }
+ }
+
+ // Validate back-pressure configuration
+ if viper.IsSet("backpressure.maxPendingFiles") {
+ maxPendingFiles := viper.GetInt("backpressure.maxPendingFiles")
+ if maxPendingFiles < 1 {
+ validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) must be at least 1", maxPendingFiles))
+ }
+ if maxPendingFiles > 100000 {
+ validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) is unreasonably high (max 100000)", maxPendingFiles))
+ }
+ }
+
+ if viper.IsSet("backpressure.maxPendingWrites") {
+ maxPendingWrites := viper.GetInt("backpressure.maxPendingWrites")
+ if maxPendingWrites < 1 {
+ validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) must be at least 1", maxPendingWrites))
+ }
+ if maxPendingWrites > 10000 {
+ validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) is unreasonably high (max 10000)", maxPendingWrites))
+ }
+ }
+
+ if viper.IsSet("backpressure.maxMemoryUsage") {
+ maxMemoryUsage := viper.GetInt64("backpressure.maxMemoryUsage")
+ if maxMemoryUsage < 1048576 { // 1MB minimum
+ validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) must be at least 1MB (1048576 bytes)", maxMemoryUsage))
+ }
+ if maxMemoryUsage > 10737418240 { // 10GB maximum
+ validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) is unreasonably high (max 10GB)", maxMemoryUsage))
+ }
+ }
+
+ if viper.IsSet("backpressure.memoryCheckInterval") {
+ interval := viper.GetInt("backpressure.memoryCheckInterval")
+ if interval < 1 {
+ validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) must be at least 1", interval))
+ }
+ if interval > 100000 {
+ validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) is unreasonably high (max 100000)", interval))
+ }
+ }
+
+ if len(validationErrors) > 0 {
+ return utils.NewStructuredError(
+ utils.ErrorTypeConfiguration,
+ utils.CodeConfigValidation,
+ "configuration validation failed: "+strings.Join(validationErrors, "; "),
+ ).WithContext("validation_errors", validationErrors)
+ }
+
+ return nil
+}
+
+// GetMaxConcurrency returns the maximum concurrency limit from configuration.
+func GetMaxConcurrency() int {
+ return viper.GetInt("maxConcurrency")
+}
+
+// GetSupportedFormats returns the supported output formats from configuration.
+func GetSupportedFormats() []string {
+ return viper.GetStringSlice("supportedFormats")
+}
+
+// GetFilePatterns returns the file patterns from configuration.
+func GetFilePatterns() []string {
+ return viper.GetStringSlice("filePatterns")
+}
+
+// IsValidFormat checks if a format is supported.
+func IsValidFormat(format string) bool {
+ format = strings.ToLower(strings.TrimSpace(format))
+ validFormats := map[string]bool{"json": true, "yaml": true, "markdown": true}
+ return validFormats[format]
+}
+
+// ValidateFileSize checks if a file size is within the configured limit.
+func ValidateFileSize(size int64) error {
+ limit := GetFileSizeLimit()
+ if size > limit {
+ return utils.NewStructuredError(
+ utils.ErrorTypeValidation,
+ utils.CodeValidationSize,
+ fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", size, limit),
+ ).WithContext("file_size", size).WithContext("size_limit", limit)
+ }
+ return nil
+}
+
+// ValidateOutputFormat checks if an output format is valid.
+func ValidateOutputFormat(format string) error {
+ if !IsValidFormat(format) {
+ return utils.NewStructuredError(
+ utils.ErrorTypeValidation,
+ utils.CodeValidationFormat,
+ fmt.Sprintf("unsupported output format: %s (supported: json, yaml, markdown)", format),
+ ).WithContext("format", format)
+ }
+ return nil
+}
+
+// ValidateConcurrency checks if a concurrency level is valid.
+func ValidateConcurrency(concurrency int) error {
+ if concurrency < 1 {
+ return utils.NewStructuredError(
+ utils.ErrorTypeValidation,
+ utils.CodeValidationFormat,
+ fmt.Sprintf("concurrency (%d) must be at least 1", concurrency),
+ ).WithContext("concurrency", concurrency)
+ }
+
+ if viper.IsSet("maxConcurrency") {
+ maxConcurrency := GetMaxConcurrency()
+ if concurrency > maxConcurrency {
+ return utils.NewStructuredError(
+ utils.ErrorTypeValidation,
+ utils.CodeValidationFormat,
+ fmt.Sprintf("concurrency (%d) exceeds maximum (%d)", concurrency, maxConcurrency),
+ ).WithContext("concurrency", concurrency).WithContext("max_concurrency", maxConcurrency)
+ }
+ }
+
+ return nil
+}
+
+// GetFileTypesEnabled returns whether file type detection is enabled.
+func GetFileTypesEnabled() bool {
+ return viper.GetBool("fileTypes.enabled")
+}
+
+// GetCustomImageExtensions returns custom image extensions from configuration.
+func GetCustomImageExtensions() []string {
+ return viper.GetStringSlice("fileTypes.customImageExtensions")
+}
+
+// GetCustomBinaryExtensions returns custom binary extensions from configuration.
+func GetCustomBinaryExtensions() []string {
+ return viper.GetStringSlice("fileTypes.customBinaryExtensions")
+}
+
+// GetCustomLanguages returns custom language mappings from configuration.
+func GetCustomLanguages() map[string]string {
+ return viper.GetStringMapString("fileTypes.customLanguages")
+}
+
+// GetDisabledImageExtensions returns disabled image extensions from configuration.
+func GetDisabledImageExtensions() []string {
+ return viper.GetStringSlice("fileTypes.disabledImageExtensions")
+}
+
+// GetDisabledBinaryExtensions returns disabled binary extensions from configuration.
+func GetDisabledBinaryExtensions() []string {
+ return viper.GetStringSlice("fileTypes.disabledBinaryExtensions")
+}
+
+// GetDisabledLanguageExtensions returns disabled language extensions from configuration.
+func GetDisabledLanguageExtensions() []string {
+ return viper.GetStringSlice("fileTypes.disabledLanguageExtensions")
+}
+
+// Back-pressure configuration getters
+
+// GetBackpressureEnabled returns whether back-pressure management is enabled.
+func GetBackpressureEnabled() bool {
+ return viper.GetBool("backpressure.enabled")
+}
+
+// GetMaxPendingFiles returns the maximum number of files that can be pending in the file channel.
+func GetMaxPendingFiles() int {
+ return viper.GetInt("backpressure.maxPendingFiles")
+}
+
+// GetMaxPendingWrites returns the maximum number of writes that can be pending in the write channel.
+func GetMaxPendingWrites() int {
+ return viper.GetInt("backpressure.maxPendingWrites")
+}
+
+// GetMaxMemoryUsage returns the maximum memory usage in bytes before back-pressure kicks in.
+func GetMaxMemoryUsage() int64 {
+ return viper.GetInt64("backpressure.maxMemoryUsage")
+}
+
+// GetMemoryCheckInterval returns how often to check memory usage (in number of files processed).
+func GetMemoryCheckInterval() int {
+ return viper.GetInt("backpressure.memoryCheckInterval")
+}
diff --git a/config/config_filetype_test.go b/config/config_filetype_test.go
new file mode 100644
index 0000000..0065bfa
--- /dev/null
+++ b/config/config_filetype_test.go
@@ -0,0 +1,174 @@
+package config
+
+import (
+ "testing"
+
+ "github.com/spf13/viper"
+)
+
+// TestFileTypeRegistryConfig tests the FileTypeRegistry configuration functionality.
+func TestFileTypeRegistryConfig(t *testing.T) {
+ // Test default values
+ t.Run("DefaultValues", func(t *testing.T) {
+ viper.Reset()
+ setDefaultConfig()
+
+ if !GetFileTypesEnabled() {
+ t.Error("Expected file types to be enabled by default")
+ }
+
+ if len(GetCustomImageExtensions()) != 0 {
+ t.Error("Expected custom image extensions to be empty by default")
+ }
+
+ if len(GetCustomBinaryExtensions()) != 0 {
+ t.Error("Expected custom binary extensions to be empty by default")
+ }
+
+ if len(GetCustomLanguages()) != 0 {
+ t.Error("Expected custom languages to be empty by default")
+ }
+
+ if len(GetDisabledImageExtensions()) != 0 {
+ t.Error("Expected disabled image extensions to be empty by default")
+ }
+
+ if len(GetDisabledBinaryExtensions()) != 0 {
+ t.Error("Expected disabled binary extensions to be empty by default")
+ }
+
+ if len(GetDisabledLanguageExtensions()) != 0 {
+ t.Error("Expected disabled language extensions to be empty by default")
+ }
+ })
+
+ // Test configuration setting and getting
+ t.Run("ConfigurationSetGet", func(t *testing.T) {
+ viper.Reset()
+
+ // Set test values
+ viper.Set("fileTypes.enabled", false)
+ viper.Set("fileTypes.customImageExtensions", []string{".webp", ".avif"})
+ viper.Set("fileTypes.customBinaryExtensions", []string{".custom", ".mybin"})
+ viper.Set("fileTypes.customLanguages", map[string]string{
+ ".zig": "zig",
+ ".v": "vlang",
+ })
+ viper.Set("fileTypes.disabledImageExtensions", []string{".gif", ".bmp"})
+ viper.Set("fileTypes.disabledBinaryExtensions", []string{".exe", ".dll"})
+ viper.Set("fileTypes.disabledLanguageExtensions", []string{".rb", ".pl"})
+
+ // Test getter functions
+ if GetFileTypesEnabled() {
+ t.Error("Expected file types to be disabled")
+ }
+
+ customImages := GetCustomImageExtensions()
+ expectedImages := []string{".webp", ".avif"}
+ if len(customImages) != len(expectedImages) {
+ t.Errorf("Expected %d custom image extensions, got %d", len(expectedImages), len(customImages))
+ }
+ for i, ext := range expectedImages {
+ if customImages[i] != ext {
+ t.Errorf("Expected custom image extension %s, got %s", ext, customImages[i])
+ }
+ }
+
+ customBinary := GetCustomBinaryExtensions()
+ expectedBinary := []string{".custom", ".mybin"}
+ if len(customBinary) != len(expectedBinary) {
+ t.Errorf("Expected %d custom binary extensions, got %d", len(expectedBinary), len(customBinary))
+ }
+ for i, ext := range expectedBinary {
+ if customBinary[i] != ext {
+ t.Errorf("Expected custom binary extension %s, got %s", ext, customBinary[i])
+ }
+ }
+
+ customLangs := GetCustomLanguages()
+ expectedLangs := map[string]string{
+ ".zig": "zig",
+ ".v": "vlang",
+ }
+ if len(customLangs) != len(expectedLangs) {
+ t.Errorf("Expected %d custom languages, got %d", len(expectedLangs), len(customLangs))
+ }
+ for ext, lang := range expectedLangs {
+ if customLangs[ext] != lang {
+ t.Errorf("Expected custom language %s -> %s, got %s", ext, lang, customLangs[ext])
+ }
+ }
+
+ disabledImages := GetDisabledImageExtensions()
+ expectedDisabledImages := []string{".gif", ".bmp"}
+ if len(disabledImages) != len(expectedDisabledImages) {
+ t.Errorf("Expected %d disabled image extensions, got %d", len(expectedDisabledImages), len(disabledImages))
+ }
+
+ disabledBinary := GetDisabledBinaryExtensions()
+ expectedDisabledBinary := []string{".exe", ".dll"}
+ if len(disabledBinary) != len(expectedDisabledBinary) {
+ t.Errorf("Expected %d disabled binary extensions, got %d", len(expectedDisabledBinary), len(disabledBinary))
+ }
+
+ disabledLangs := GetDisabledLanguageExtensions()
+ expectedDisabledLangs := []string{".rb", ".pl"}
+ if len(disabledLangs) != len(expectedDisabledLangs) {
+ t.Errorf("Expected %d disabled language extensions, got %d", len(expectedDisabledLangs), len(disabledLangs))
+ }
+ })
+
+ // Test validation
+ t.Run("ValidationSuccess", func(t *testing.T) {
+ viper.Reset()
+ setDefaultConfig()
+
+ // Set valid configuration
+ viper.Set("fileTypes.customImageExtensions", []string{".webp", ".avif"})
+ viper.Set("fileTypes.customBinaryExtensions", []string{".custom"})
+ viper.Set("fileTypes.customLanguages", map[string]string{
+ ".zig": "zig",
+ ".v": "vlang",
+ })
+
+ err := ValidateConfig()
+ if err != nil {
+ t.Errorf("Expected validation to pass with valid config, got error: %v", err)
+ }
+ })
+
+ t.Run("ValidationFailure", func(t *testing.T) {
+ // Test invalid custom image extensions
+ viper.Reset()
+ setDefaultConfig()
+ viper.Set("fileTypes.customImageExtensions", []string{"", "webp"}) // Empty and missing dot
+
+ err := ValidateConfig()
+ if err == nil {
+ t.Error("Expected validation to fail with invalid custom image extensions")
+ }
+
+ // Test invalid custom binary extensions
+ viper.Reset()
+ setDefaultConfig()
+ viper.Set("fileTypes.customBinaryExtensions", []string{"custom"}) // Missing dot
+
+ err = ValidateConfig()
+ if err == nil {
+ t.Error("Expected validation to fail with invalid custom binary extensions")
+ }
+
+ // Test invalid custom languages
+ viper.Reset()
+ setDefaultConfig()
+ viper.Set("fileTypes.customLanguages", map[string]string{
+ "zig": "zig", // Missing dot in extension
+ ".v": "", // Empty language
+ })
+
+ err = ValidateConfig()
+ if err == nil {
+ t.Error("Expected validation to fail with invalid custom languages")
+ }
+ })
+}
diff --git a/config/config_test.go b/config/config_test.go
index a0b7c8e..55fc55c 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -2,40 +2,38 @@ package config_test
import (
"os"
- "path/filepath"
+ "strings"
"testing"
- configpkg "github.com/ivuorinen/gibidify/config"
"github.com/spf13/viper"
+
+ "github.com/ivuorinen/gibidify/config"
+ "github.com/ivuorinen/gibidify/testutil"
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+const (
+ defaultFileSizeLimit = 5242880
+ testFileSizeLimit = 123456
)
// TestDefaultConfig verifies that if no config file is found,
// the default configuration values are correctly set.
func TestDefaultConfig(t *testing.T) {
// Create a temporary directory to ensure no config file is present.
- tmpDir, err := os.MkdirTemp("", "gibidify_config_test_default")
- if err != nil {
- t.Fatalf("Failed to create temp directory: %v", err)
- }
- defer func() {
- if err := os.RemoveAll(tmpDir); err != nil {
- t.Fatalf("cleanup failed: %v", err)
- }
- }()
+ tmpDir := t.TempDir()
// Point Viper to the temp directory with no config file.
originalConfigPaths := viper.ConfigFileUsed()
- viper.Reset()
- viper.AddConfigPath(tmpDir)
- configpkg.LoadConfig()
+ testutil.ResetViperConfig(t, tmpDir)
// Check defaults
- defaultSizeLimit := configpkg.GetFileSizeLimit()
- if defaultSizeLimit != 5242880 {
+ defaultSizeLimit := config.GetFileSizeLimit()
+ if defaultSizeLimit != defaultFileSizeLimit {
t.Errorf("Expected default file size limit of 5242880, got %d", defaultSizeLimit)
}
- ignoredDirs := configpkg.GetIgnoredDirectories()
+ ignoredDirs := config.GetIgnoredDirectories()
if len(ignoredDirs) == 0 {
t.Errorf("Expected some default ignored directories, got none")
}
@@ -47,15 +45,7 @@ func TestDefaultConfig(t *testing.T) {
// TestLoadConfigFile verifies that when a valid config file is present,
// viper loads the specified values correctly.
func TestLoadConfigFile(t *testing.T) {
- tmpDir, err := os.MkdirTemp("", "gibidify_config_test_file")
- if err != nil {
- t.Fatalf("Failed to create temp directory: %v", err)
- }
- defer func() {
- if err := os.RemoveAll(tmpDir); err != nil {
- t.Fatalf("cleanup failed: %v", err)
- }
- }()
+ tmpDir := t.TempDir()
// Prepare a minimal config file
configContent := []byte(`---
@@ -65,22 +55,17 @@ ignoreDirectories:
- "testdir2"
`)
- configPath := filepath.Join(tmpDir, "config.yaml")
- if err := os.WriteFile(configPath, configContent, 0644); err != nil {
- t.Fatalf("Failed to write config file: %v", err)
- }
+ testutil.CreateTestFile(t, tmpDir, "config.yaml", configContent)
// Reset viper and point to the new config path
viper.Reset()
viper.AddConfigPath(tmpDir)
// Force Viper to read our config file
- if err := viper.ReadInConfig(); err != nil {
- t.Fatalf("Could not read config file: %v", err)
- }
+ testutil.MustSucceed(t, viper.ReadInConfig(), "reading config file")
// Validate loaded data
- if got := viper.GetInt64("fileSizeLimit"); got != 123456 {
+ if got := viper.GetInt64("fileSizeLimit"); got != testFileSizeLimit {
t.Errorf("Expected fileSizeLimit=123456, got %d", got)
}
@@ -89,3 +74,283 @@ ignoreDirectories:
t.Errorf("Expected [\"testdir1\", \"testdir2\"], got %v", ignored)
}
}
+
+// TestValidateConfig tests the configuration validation functionality.
+func TestValidateConfig(t *testing.T) {
+ tests := []struct {
+ name string
+ config map[string]interface{}
+ wantErr bool
+ errContains string
+ }{
+ {
+ name: "valid default config",
+ config: map[string]interface{}{
+ "fileSizeLimit": config.DefaultFileSizeLimit,
+ "ignoreDirectories": []string{"node_modules", ".git"},
+ },
+ wantErr: false,
+ },
+ {
+ name: "file size limit too small",
+ config: map[string]interface{}{
+ "fileSizeLimit": config.MinFileSizeLimit - 1,
+ },
+ wantErr: true,
+ errContains: "fileSizeLimit",
+ },
+ {
+ name: "file size limit too large",
+ config: map[string]interface{}{
+ "fileSizeLimit": config.MaxFileSizeLimit + 1,
+ },
+ wantErr: true,
+ errContains: "fileSizeLimit",
+ },
+ {
+ name: "empty ignore directory",
+ config: map[string]interface{}{
+ "ignoreDirectories": []string{"node_modules", "", ".git"},
+ },
+ wantErr: true,
+ errContains: "ignoreDirectories",
+ },
+ {
+ name: "ignore directory with path separator",
+ config: map[string]interface{}{
+ "ignoreDirectories": []string{"node_modules", "src/build", ".git"},
+ },
+ wantErr: true,
+ errContains: "path separator",
+ },
+ {
+ name: "invalid supported format",
+ config: map[string]interface{}{
+ "supportedFormats": []string{"json", "xml", "yaml"},
+ },
+ wantErr: true,
+ errContains: "not a valid format",
+ },
+ {
+ name: "invalid max concurrency",
+ config: map[string]interface{}{
+ "maxConcurrency": 0,
+ },
+ wantErr: true,
+ errContains: "maxConcurrency",
+ },
+ {
+ name: "valid comprehensive config",
+ config: map[string]interface{}{
+ "fileSizeLimit": config.DefaultFileSizeLimit,
+ "ignoreDirectories": []string{"node_modules", ".git", ".vscode"},
+ "supportedFormats": []string{"json", "yaml", "markdown"},
+ "maxConcurrency": 8,
+ "filePatterns": []string{"*.go", "*.js", "*.py"},
+ },
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Reset viper for each test
+ viper.Reset()
+
+ // Set test configuration
+ for key, value := range tt.config {
+ viper.Set(key, value)
+ }
+
+ // Load defaults for missing values
+ config.LoadConfig()
+
+ err := config.ValidateConfig()
+
+ if tt.wantErr {
+ if err == nil {
+ t.Errorf("Expected error but got none")
+ return
+ }
+ if tt.errContains != "" && !strings.Contains(err.Error(), tt.errContains) {
+ t.Errorf("Expected error to contain %q, got %q", tt.errContains, err.Error())
+ }
+
+ // Check that it's a structured error
+ var structErr *utils.StructuredError
+ if !errorAs(err, &structErr) {
+ t.Errorf("Expected structured error, got %T", err)
+ return
+ }
+ if structErr.Type != utils.ErrorTypeConfiguration {
+ t.Errorf("Expected error type %v, got %v", utils.ErrorTypeConfiguration, structErr.Type)
+ }
+ if structErr.Code != utils.CodeConfigValidation {
+ t.Errorf("Expected error code %v, got %v", utils.CodeConfigValidation, structErr.Code)
+ }
+ } else {
+ if err != nil {
+ t.Errorf("Expected no error but got: %v", err)
+ }
+ }
+ })
+ }
+}
+
+// TestValidationFunctions tests individual validation functions.
+func TestValidationFunctions(t *testing.T) {
+ t.Run("IsValidFormat", func(t *testing.T) {
+ tests := []struct {
+ format string
+ valid bool
+ }{
+ {"json", true},
+ {"yaml", true},
+ {"markdown", true},
+ {"JSON", true},
+ {"xml", false},
+ {"txt", false},
+ {"", false},
+ {" json ", true},
+ }
+
+ for _, tt := range tests {
+ result := config.IsValidFormat(tt.format)
+ if result != tt.valid {
+ t.Errorf("IsValidFormat(%q) = %v, want %v", tt.format, result, tt.valid)
+ }
+ }
+ })
+
+ t.Run("ValidateFileSize", func(t *testing.T) {
+ viper.Reset()
+ viper.Set("fileSizeLimit", config.DefaultFileSizeLimit)
+
+ tests := []struct {
+ name string
+ size int64
+ wantErr bool
+ }{
+ {"size within limit", config.DefaultFileSizeLimit - 1, false},
+ {"size at limit", config.DefaultFileSizeLimit, false},
+ {"size exceeds limit", config.DefaultFileSizeLimit + 1, true},
+ {"zero size", 0, false},
+ }
+
+ for _, tt := range tests {
+ err := config.ValidateFileSize(tt.size)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("%s: ValidateFileSize(%d) error = %v, wantErr %v", tt.name, tt.size, err, tt.wantErr)
+ }
+ }
+ })
+
+ t.Run("ValidateOutputFormat", func(t *testing.T) {
+ tests := []struct {
+ format string
+ wantErr bool
+ }{
+ {"json", false},
+ {"yaml", false},
+ {"markdown", false},
+ {"xml", true},
+ {"txt", true},
+ {"", true},
+ }
+
+ for _, tt := range tests {
+ err := config.ValidateOutputFormat(tt.format)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("ValidateOutputFormat(%q) error = %v, wantErr %v", tt.format, err, tt.wantErr)
+ }
+ }
+ })
+
+ t.Run("ValidateConcurrency", func(t *testing.T) {
+ tests := []struct {
+ name string
+ concurrency int
+ maxConcurrency int
+ setMax bool
+ wantErr bool
+ }{
+ {"valid concurrency", 4, 0, false, false},
+ {"minimum concurrency", 1, 0, false, false},
+ {"zero concurrency", 0, 0, false, true},
+ {"negative concurrency", -1, 0, false, true},
+ {"concurrency within max", 4, 8, true, false},
+ {"concurrency exceeds max", 16, 8, true, true},
+ }
+
+ for _, tt := range tests {
+ viper.Reset()
+ if tt.setMax {
+ viper.Set("maxConcurrency", tt.maxConcurrency)
+ }
+
+ err := config.ValidateConcurrency(tt.concurrency)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("%s: ValidateConcurrency(%d) error = %v, wantErr %v", tt.name, tt.concurrency, err, tt.wantErr)
+ }
+ }
+ })
+}
+
+// TestLoadConfigWithValidation tests that invalid config files fall back to defaults.
+func TestLoadConfigWithValidation(t *testing.T) {
+ // Create a temporary config file with invalid content
+ configContent := `
+fileSizeLimit: 100
+ignoreDirectories:
+ - node_modules
+ - ""
+ - .git
+`
+
+ tempDir := t.TempDir()
+ configFile := tempDir + "/config.yaml"
+
+ err := os.WriteFile(configFile, []byte(configContent), 0o644)
+ if err != nil {
+ t.Fatalf("Failed to write config file: %v", err)
+ }
+
+ // Reset viper and set config path
+ viper.Reset()
+ viper.AddConfigPath(tempDir)
+
+ // This should load the config but validation should fail and fall back to defaults
+ config.LoadConfig()
+
+ // Should have fallen back to defaults due to validation failure
+ if config.GetFileSizeLimit() != int64(config.DefaultFileSizeLimit) {
+ t.Errorf("Expected default file size limit after validation failure, got %d", config.GetFileSizeLimit())
+ }
+ if containsString(config.GetIgnoredDirectories(), "") {
+ t.Errorf("Expected ignored directories not to contain empty string after validation failure, got %v", config.GetIgnoredDirectories())
+ }
+}
+
+// Helper functions
+
+func containsString(slice []string, item string) bool {
+ for _, s := range slice {
+ if s == item {
+ return true
+ }
+ }
+ return false
+}
+
+func errorAs(err error, target interface{}) bool {
+ if err == nil {
+ return false
+ }
+ if structErr, ok := err.(*utils.StructuredError); ok {
+ if ptr, ok := target.(**utils.StructuredError); ok {
+ *ptr = structErr
+ return true
+ }
+ }
+ return false
+}
diff --git a/fileproc/backpressure.go b/fileproc/backpressure.go
new file mode 100644
index 0000000..733a271
--- /dev/null
+++ b/fileproc/backpressure.go
@@ -0,0 +1,196 @@
+// Package fileproc provides back-pressure management for memory optimization.
+package fileproc
+
+import (
+ "context"
+ "runtime"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/sirupsen/logrus"
+
+ "github.com/ivuorinen/gibidify/config"
+)
+
+// BackpressureManager manages memory usage and applies back-pressure when needed.
+type BackpressureManager struct {
+ enabled bool
+ maxMemoryUsage int64
+ memoryCheckInterval int
+ maxPendingFiles int
+ maxPendingWrites int
+ filesProcessed int64
+ mu sync.RWMutex
+ memoryWarningLogged bool
+ lastMemoryCheck time.Time
+}
+
+// NewBackpressureManager creates a new back-pressure manager with configuration.
+func NewBackpressureManager() *BackpressureManager {
+ return &BackpressureManager{
+ enabled: config.GetBackpressureEnabled(),
+ maxMemoryUsage: config.GetMaxMemoryUsage(),
+ memoryCheckInterval: config.GetMemoryCheckInterval(),
+ maxPendingFiles: config.GetMaxPendingFiles(),
+ maxPendingWrites: config.GetMaxPendingWrites(),
+ lastMemoryCheck: time.Now(),
+ }
+}
+
+// CreateChannels creates properly sized channels based on back-pressure configuration.
+func (bp *BackpressureManager) CreateChannels() (chan string, chan WriteRequest) {
+ var fileCh chan string
+ var writeCh chan WriteRequest
+
+ if bp.enabled {
+ // Use buffered channels with configured limits
+ fileCh = make(chan string, bp.maxPendingFiles)
+ writeCh = make(chan WriteRequest, bp.maxPendingWrites)
+ logrus.Debugf("Created buffered channels: files=%d, writes=%d", bp.maxPendingFiles, bp.maxPendingWrites)
+ } else {
+ // Use unbuffered channels (default behavior)
+ fileCh = make(chan string)
+ writeCh = make(chan WriteRequest)
+ logrus.Debug("Created unbuffered channels (back-pressure disabled)")
+ }
+
+ return fileCh, writeCh
+}
+
+// ShouldApplyBackpressure checks if back-pressure should be applied.
+func (bp *BackpressureManager) ShouldApplyBackpressure(ctx context.Context) bool {
+ if !bp.enabled {
+ return false
+ }
+
+ // Check if we should evaluate memory usage
+ filesProcessed := atomic.AddInt64(&bp.filesProcessed, 1)
+ if int(filesProcessed)%bp.memoryCheckInterval != 0 {
+ return false
+ }
+
+ // Get current memory usage
+ var m runtime.MemStats
+ runtime.ReadMemStats(&m)
+ currentMemory := int64(m.Alloc)
+
+ bp.mu.Lock()
+ defer bp.mu.Unlock()
+
+ bp.lastMemoryCheck = time.Now()
+
+ // Check if we're over the memory limit
+ if currentMemory > bp.maxMemoryUsage {
+ if !bp.memoryWarningLogged {
+ logrus.Warnf("Memory usage (%d bytes) exceeds limit (%d bytes), applying back-pressure",
+ currentMemory, bp.maxMemoryUsage)
+ bp.memoryWarningLogged = true
+ }
+ return true
+ }
+
+ // Reset warning flag if we're back under the limit
+ if bp.memoryWarningLogged && currentMemory < bp.maxMemoryUsage*8/10 { // 80% of limit
+ logrus.Infof("Memory usage normalized (%d bytes), removing back-pressure", currentMemory)
+ bp.memoryWarningLogged = false
+ }
+
+ return false
+}
+
+// ApplyBackpressure applies back-pressure by triggering garbage collection and adding delay.
+func (bp *BackpressureManager) ApplyBackpressure(ctx context.Context) {
+ if !bp.enabled {
+ return
+ }
+
+ // Force garbage collection to free up memory
+ runtime.GC()
+
+ // Add a small delay to allow memory to be freed
+ select {
+ case <-ctx.Done():
+ return
+ case <-time.After(10 * time.Millisecond):
+ // Small delay to allow GC to complete
+ }
+
+ // Log memory usage after GC
+ var m runtime.MemStats
+ runtime.ReadMemStats(&m)
+ logrus.Debugf("Applied back-pressure: memory after GC = %d bytes", m.Alloc)
+}
+
+// GetStats returns current back-pressure statistics.
+func (bp *BackpressureManager) GetStats() BackpressureStats {
+ bp.mu.RLock()
+ defer bp.mu.RUnlock()
+
+ var m runtime.MemStats
+ runtime.ReadMemStats(&m)
+
+ return BackpressureStats{
+ Enabled: bp.enabled,
+ FilesProcessed: atomic.LoadInt64(&bp.filesProcessed),
+ CurrentMemoryUsage: int64(m.Alloc),
+ MaxMemoryUsage: bp.maxMemoryUsage,
+ MemoryWarningActive: bp.memoryWarningLogged,
+ LastMemoryCheck: bp.lastMemoryCheck,
+ MaxPendingFiles: bp.maxPendingFiles,
+ MaxPendingWrites: bp.maxPendingWrites,
+ }
+}
+
+// BackpressureStats represents back-pressure manager statistics.
+type BackpressureStats struct {
+ Enabled bool `json:"enabled"`
+ FilesProcessed int64 `json:"files_processed"`
+ CurrentMemoryUsage int64 `json:"current_memory_usage"`
+ MaxMemoryUsage int64 `json:"max_memory_usage"`
+ MemoryWarningActive bool `json:"memory_warning_active"`
+ LastMemoryCheck time.Time `json:"last_memory_check"`
+ MaxPendingFiles int `json:"max_pending_files"`
+ MaxPendingWrites int `json:"max_pending_writes"`
+}
+
+// WaitForChannelSpace waits for space in channels if they're getting full.
+func (bp *BackpressureManager) WaitForChannelSpace(ctx context.Context, fileCh chan string, writeCh chan WriteRequest) {
+ if !bp.enabled {
+ return
+ }
+
+ // Check if file channel is getting full (>90% capacity)
+ if len(fileCh) > bp.maxPendingFiles*9/10 {
+ logrus.Debugf("File channel is %d%% full, waiting for space", len(fileCh)*100/bp.maxPendingFiles)
+
+ // Wait a bit for the channel to drain
+ select {
+ case <-ctx.Done():
+ return
+ case <-time.After(5 * time.Millisecond):
+ }
+ }
+
+ // Check if write channel is getting full (>90% capacity)
+ if len(writeCh) > bp.maxPendingWrites*9/10 {
+ logrus.Debugf("Write channel is %d%% full, waiting for space", len(writeCh)*100/bp.maxPendingWrites)
+
+ // Wait a bit for the channel to drain
+ select {
+ case <-ctx.Done():
+ return
+ case <-time.After(5 * time.Millisecond):
+ }
+ }
+}
+
+// LogBackpressureInfo logs back-pressure configuration and status.
+func (bp *BackpressureManager) LogBackpressureInfo() {
+ if bp.enabled {
+ logrus.Infof("Back-pressure enabled: maxMemory=%dMB, fileBuffer=%d, writeBuffer=%d, checkInterval=%d",
+ bp.maxMemoryUsage/1024/1024, bp.maxPendingFiles, bp.maxPendingWrites, bp.memoryCheckInterval)
+ } else {
+ logrus.Info("Back-pressure disabled")
+ }
+}
diff --git a/fileproc/cache.go b/fileproc/cache.go
new file mode 100644
index 0000000..ab3ad60
--- /dev/null
+++ b/fileproc/cache.go
@@ -0,0 +1,127 @@
+package fileproc
+
+// getNormalizedExtension efficiently extracts and normalizes the file extension with caching.
+func (r *FileTypeRegistry) getNormalizedExtension(filename string) string {
+ // Try cache first (read lock)
+ r.cacheMutex.RLock()
+ if ext, exists := r.extCache[filename]; exists {
+ r.cacheMutex.RUnlock()
+ return ext
+ }
+ r.cacheMutex.RUnlock()
+
+ // Compute normalized extension
+ ext := normalizeExtension(filename)
+
+ // Cache the result (write lock)
+ r.cacheMutex.Lock()
+ // Check cache size and clean if needed
+ if len(r.extCache) >= r.maxCacheSize*2 {
+ r.clearExtCache()
+ r.stats.CacheEvictions++
+ }
+ r.extCache[filename] = ext
+ r.cacheMutex.Unlock()
+
+ return ext
+}
+
+// getFileTypeResult gets cached file type detection result or computes it.
+func (r *FileTypeRegistry) getFileTypeResult(filename string) FileTypeResult {
+ ext := r.getNormalizedExtension(filename)
+
+ // Update statistics
+ r.updateStats(func() {
+ r.stats.TotalLookups++
+ })
+
+ // Try cache first (read lock)
+ r.cacheMutex.RLock()
+ if result, exists := r.resultCache[ext]; exists {
+ r.cacheMutex.RUnlock()
+ r.updateStats(func() {
+ r.stats.CacheHits++
+ })
+ return result
+ }
+ r.cacheMutex.RUnlock()
+
+ // Cache miss
+ r.updateStats(func() {
+ r.stats.CacheMisses++
+ })
+
+ // Compute result
+ result := FileTypeResult{
+ Extension: ext,
+ IsImage: r.imageExts[ext],
+ IsBinary: r.binaryExts[ext],
+ Language: r.languageMap[ext],
+ }
+
+ // Handle special cases for binary detection (like .DS_Store)
+ if !result.IsBinary && isSpecialFile(filename, r.binaryExts) {
+ result.IsBinary = true
+ }
+
+ // Cache the result (write lock)
+ r.cacheMutex.Lock()
+ if len(r.resultCache) >= r.maxCacheSize {
+ r.clearResultCache()
+ r.stats.CacheEvictions++
+ }
+ r.resultCache[ext] = result
+ r.cacheMutex.Unlock()
+
+ return result
+}
+
+// clearExtCache clears half of the extension cache (LRU-like behavior).
+func (r *FileTypeRegistry) clearExtCache() {
+ r.clearCache(&r.extCache, r.maxCacheSize)
+}
+
+// clearResultCache clears half of the result cache.
+func (r *FileTypeRegistry) clearResultCache() {
+ newCache := make(map[string]FileTypeResult, r.maxCacheSize)
+ count := 0
+ for k, v := range r.resultCache {
+ if count >= r.maxCacheSize/2 {
+ break
+ }
+ newCache[k] = v
+ count++
+ }
+ r.resultCache = newCache
+}
+
+// clearCache is a generic cache clearing function.
+func (r *FileTypeRegistry) clearCache(cache *map[string]string, maxSize int) {
+ newCache := make(map[string]string, maxSize)
+ count := 0
+ for k, v := range *cache {
+ if count >= maxSize/2 {
+ break
+ }
+ newCache[k] = v
+ count++
+ }
+ *cache = newCache
+}
+
+// invalidateCache clears both caches when the registry is modified.
+func (r *FileTypeRegistry) invalidateCache() {
+ r.cacheMutex.Lock()
+ defer r.cacheMutex.Unlock()
+
+ r.extCache = make(map[string]string, r.maxCacheSize)
+ r.resultCache = make(map[string]FileTypeResult, r.maxCacheSize)
+ r.stats.CacheEvictions++
+}
+
+// updateStats safely updates statistics.
+func (r *FileTypeRegistry) updateStats(fn func()) {
+ r.cacheMutex.Lock()
+ fn()
+ r.cacheMutex.Unlock()
+}
diff --git a/fileproc/collector.go b/fileproc/collector.go
index 72b4d2d..6091c98 100644
--- a/fileproc/collector.go
+++ b/fileproc/collector.go
@@ -4,6 +4,6 @@ package fileproc
// CollectFiles scans the given root directory using the default walker (ProdWalker)
// and returns a slice of file paths.
func CollectFiles(root string) ([]string, error) {
- var w Walker = ProdWalker{}
+ w := NewProdWalker()
return w.Walk(root)
}
diff --git a/fileproc/collector_test.go b/fileproc/collector_test.go
index 2437403..55740c1 100644
--- a/fileproc/collector_test.go
+++ b/fileproc/collector_test.go
@@ -4,7 +4,7 @@ import (
"os"
"testing"
- fileproc "github.com/ivuorinen/gibidify/fileproc"
+ "github.com/ivuorinen/gibidify/fileproc"
)
func TestCollectFilesWithFakeWalker(t *testing.T) {
diff --git a/fileproc/config.go b/fileproc/config.go
new file mode 100644
index 0000000..24d59e0
--- /dev/null
+++ b/fileproc/config.go
@@ -0,0 +1,40 @@
+package fileproc
+
+import "strings"
+
+// ApplyCustomExtensions applies custom extensions from configuration.
+func (r *FileTypeRegistry) ApplyCustomExtensions(customImages, customBinary []string, customLanguages map[string]string) {
+ // Add custom image extensions
+ r.addExtensions(customImages, r.AddImageExtension)
+
+ // Add custom binary extensions
+ r.addExtensions(customBinary, r.AddBinaryExtension)
+
+ // Add custom language mappings
+ for ext, lang := range customLanguages {
+ if ext != "" && lang != "" {
+ r.AddLanguageMapping(strings.ToLower(ext), lang)
+ }
+ }
+}
+
+// addExtensions is a helper to add multiple extensions.
+func (r *FileTypeRegistry) addExtensions(extensions []string, adder func(string)) {
+ for _, ext := range extensions {
+ if ext != "" {
+ adder(strings.ToLower(ext))
+ }
+ }
+}
+
+// ConfigureFromSettings applies configuration settings to the registry.
+// This function is called from main.go after config is loaded to avoid circular imports.
+func ConfigureFromSettings(
+ customImages, customBinary []string,
+ customLanguages map[string]string,
+ disabledImages, disabledBinary, disabledLanguages []string,
+) {
+ registry := GetDefaultRegistry()
+ registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
+ registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
+}
diff --git a/fileproc/detection.go b/fileproc/detection.go
new file mode 100644
index 0000000..f4e2929
--- /dev/null
+++ b/fileproc/detection.go
@@ -0,0 +1,99 @@
+package fileproc
+
+import "strings"
+
+// Package-level detection functions
+
+// IsImage checks if the file extension indicates an image file.
+func IsImage(filename string) bool {
+ return getRegistry().IsImage(filename)
+}
+
+// IsBinary checks if the file extension indicates a binary file.
+func IsBinary(filename string) bool {
+ return getRegistry().IsBinary(filename)
+}
+
+// GetLanguage returns the language identifier for the given filename based on its extension.
+func GetLanguage(filename string) string {
+ return getRegistry().GetLanguage(filename)
+}
+
+// Registry methods for detection
+
+// IsImage checks if the file extension indicates an image file.
+func (r *FileTypeRegistry) IsImage(filename string) bool {
+ result := r.getFileTypeResult(filename)
+ return result.IsImage
+}
+
+// IsBinary checks if the file extension indicates a binary file.
+func (r *FileTypeRegistry) IsBinary(filename string) bool {
+ result := r.getFileTypeResult(filename)
+ return result.IsBinary
+}
+
+// GetLanguage returns the language identifier for the given filename based on its extension.
+func (r *FileTypeRegistry) GetLanguage(filename string) string {
+ if len(filename) < minExtensionLength {
+ return ""
+ }
+ result := r.getFileTypeResult(filename)
+ return result.Language
+}
+
+// Extension management methods
+
+// AddImageExtension adds a new image extension to the registry.
+func (r *FileTypeRegistry) AddImageExtension(ext string) {
+ r.addExtension(ext, r.imageExts)
+}
+
+// AddBinaryExtension adds a new binary extension to the registry.
+func (r *FileTypeRegistry) AddBinaryExtension(ext string) {
+ r.addExtension(ext, r.binaryExts)
+}
+
+// AddLanguageMapping adds a new language mapping to the registry.
+func (r *FileTypeRegistry) AddLanguageMapping(ext, language string) {
+ r.languageMap[strings.ToLower(ext)] = language
+ r.invalidateCache()
+}
+
+// addExtension is a helper to add extensions to a map.
+func (r *FileTypeRegistry) addExtension(ext string, target map[string]bool) {
+ target[strings.ToLower(ext)] = true
+ r.invalidateCache()
+}
+
+// removeExtension is a helper to remove extensions from a map.
+func (r *FileTypeRegistry) removeExtension(ext string, target map[string]bool) {
+ delete(target, strings.ToLower(ext))
+}
+
+// DisableExtensions removes specified extensions from the registry.
+func (r *FileTypeRegistry) DisableExtensions(disabledImages, disabledBinary, disabledLanguages []string) {
+ // Disable image extensions
+ for _, ext := range disabledImages {
+ if ext != "" {
+ r.removeExtension(ext, r.imageExts)
+ }
+ }
+
+ // Disable binary extensions
+ for _, ext := range disabledBinary {
+ if ext != "" {
+ r.removeExtension(ext, r.binaryExts)
+ }
+ }
+
+ // Disable language extensions
+ for _, ext := range disabledLanguages {
+ if ext != "" {
+ delete(r.languageMap, strings.ToLower(ext))
+ }
+ }
+
+ // Invalidate cache after all modifications
+ r.invalidateCache()
+}
diff --git a/fileproc/extensions.go b/fileproc/extensions.go
new file mode 100644
index 0000000..602f107
--- /dev/null
+++ b/fileproc/extensions.go
@@ -0,0 +1,161 @@
+package fileproc
+
+// getImageExtensions returns the default image file extensions.
+func getImageExtensions() map[string]bool {
+ return map[string]bool{
+ ".png": true,
+ ".jpg": true,
+ ".jpeg": true,
+ ".gif": true,
+ ".bmp": true,
+ ".tiff": true,
+ ".tif": true,
+ ".svg": true,
+ ".webp": true,
+ ".ico": true,
+ }
+}
+
+// getBinaryExtensions returns the default binary file extensions.
+func getBinaryExtensions() map[string]bool {
+ return map[string]bool{
+ // Executables and libraries
+ ".exe": true,
+ ".dll": true,
+ ".so": true,
+ ".dylib": true,
+ ".bin": true,
+ ".o": true,
+ ".a": true,
+ ".lib": true,
+
+ // Compiled bytecode
+ ".jar": true,
+ ".class": true,
+ ".pyc": true,
+ ".pyo": true,
+
+ // Data files
+ ".dat": true,
+ ".db": true,
+ ".sqlite": true,
+ ".ds_store": true,
+
+ // Documents
+ ".pdf": true,
+
+ // Archives
+ ".zip": true,
+ ".tar": true,
+ ".gz": true,
+ ".bz2": true,
+ ".xz": true,
+ ".7z": true,
+ ".rar": true,
+
+ // Fonts
+ ".ttf": true,
+ ".otf": true,
+ ".woff": true,
+ ".woff2": true,
+
+ // Media files
+ ".mp3": true,
+ ".mp4": true,
+ ".avi": true,
+ ".mov": true,
+ ".wmv": true,
+ ".flv": true,
+ ".webm": true,
+ ".ogg": true,
+ ".wav": true,
+ ".flac": true,
+ }
+}
+
+// getLanguageMap returns the default language mappings.
+func getLanguageMap() map[string]string {
+ return map[string]string{
+ // Systems programming
+ ".go": "go",
+ ".c": "c",
+ ".cpp": "cpp",
+ ".h": "c",
+ ".hpp": "cpp",
+ ".rs": "rust",
+
+ // Scripting languages
+ ".py": "python",
+ ".rb": "ruby",
+ ".pl": "perl",
+ ".lua": "lua",
+ ".php": "php",
+
+ // Web technologies
+ ".js": "javascript",
+ ".ts": "typescript",
+ ".jsx": "javascript",
+ ".tsx": "typescript",
+ ".html": "html",
+ ".htm": "html",
+ ".css": "css",
+ ".scss": "scss",
+ ".sass": "sass",
+ ".less": "less",
+ ".vue": "vue",
+
+ // JVM languages
+ ".java": "java",
+ ".scala": "scala",
+ ".kt": "kotlin",
+ ".clj": "clojure",
+
+ // .NET languages
+ ".cs": "csharp",
+ ".vb": "vbnet",
+ ".fs": "fsharp",
+
+ // Apple platforms
+ ".swift": "swift",
+ ".m": "objc",
+ ".mm": "objcpp",
+
+ // Shell scripts
+ ".sh": "bash",
+ ".bash": "bash",
+ ".zsh": "zsh",
+ ".fish": "fish",
+ ".ps1": "powershell",
+ ".bat": "batch",
+ ".cmd": "batch",
+
+ // Data formats
+ ".json": "json",
+ ".yaml": "yaml",
+ ".yml": "yaml",
+ ".toml": "toml",
+ ".xml": "xml",
+ ".sql": "sql",
+
+ // Documentation
+ ".md": "markdown",
+ ".rst": "rst",
+ ".tex": "latex",
+
+ // Functional languages
+ ".hs": "haskell",
+ ".ml": "ocaml",
+ ".mli": "ocaml",
+ ".elm": "elm",
+ ".ex": "elixir",
+ ".exs": "elixir",
+ ".erl": "erlang",
+ ".hrl": "erlang",
+
+ // Other languages
+ ".r": "r",
+ ".dart": "dart",
+ ".nim": "nim",
+ ".nims": "nim",
+ }
+}
diff --git a/fileproc/fake_walker.go b/fileproc/fake_walker.go
index fc156fd..f809717 100644
--- a/fileproc/fake_walker.go
+++ b/fileproc/fake_walker.go
@@ -3,8 +3,8 @@ package fileproc
// FakeWalker implements Walker for testing purposes.
type FakeWalker struct {
- Files []string
Err error
+ Files []string
}
// Walk returns predetermined file paths or an error, depending on FakeWalker's configuration.
diff --git a/fileproc/file_filters.go b/fileproc/file_filters.go
new file mode 100644
index 0000000..995d98f
--- /dev/null
+++ b/fileproc/file_filters.go
@@ -0,0 +1,55 @@
+package fileproc
+
+import (
+ "os"
+
+ "github.com/ivuorinen/gibidify/config"
+)
+
+// FileFilter defines filtering criteria for files and directories.
+type FileFilter struct {
+ ignoredDirs []string
+ sizeLimit int64
+}
+
+// NewFileFilter creates a new file filter with current configuration.
+func NewFileFilter() *FileFilter {
+ return &FileFilter{
+ ignoredDirs: config.GetIgnoredDirectories(),
+ sizeLimit: config.GetFileSizeLimit(),
+ }
+}
+
+// shouldSkipEntry determines if an entry should be skipped based on ignore rules and filters.
+func (f *FileFilter) shouldSkipEntry(entry os.DirEntry, fullPath string, rules []ignoreRule) bool {
+ if entry.IsDir() {
+ return f.shouldSkipDirectory(entry)
+ }
+
+ if f.shouldSkipFile(entry, fullPath) {
+ return true
+ }
+
+ return matchesIgnoreRules(fullPath, rules)
+}
+
+// shouldSkipDirectory checks if a directory should be skipped based on the ignored directories list.
+func (f *FileFilter) shouldSkipDirectory(entry os.DirEntry) bool {
+ for _, d := range f.ignoredDirs {
+ if entry.Name() == d {
+ return true
+ }
+ }
+ return false
+}
+
+// shouldSkipFile checks if a file should be skipped based on size limit and file type.
+func (f *FileFilter) shouldSkipFile(entry os.DirEntry, fullPath string) bool {
+ // Check if file exceeds the configured size limit.
+ if info, err := entry.Info(); err == nil && info.Size() > f.sizeLimit {
+ return true
+ }
+
+ // Apply the default filter to ignore binary and image files.
+ return IsBinary(fullPath) || IsImage(fullPath)
+}
diff --git a/fileproc/filetypes_test.go b/fileproc/filetypes_test.go
new file mode 100644
index 0000000..3053068
--- /dev/null
+++ b/fileproc/filetypes_test.go
@@ -0,0 +1,827 @@
+package fileproc
+
+import (
+ "fmt"
+ "sync"
+ "testing"
+)
+
+// TestFileTypeRegistry_ModificationMethods tests the modification methods of FileTypeRegistry.
+func TestFileTypeRegistry_ModificationMethods(t *testing.T) {
+ // Create a new registry instance for testing
+ registry := &FileTypeRegistry{
+ imageExts: make(map[string]bool),
+ binaryExts: make(map[string]bool),
+ languageMap: make(map[string]string),
+ }
+
+ // Test AddImageExtension
+ t.Run("AddImageExtension", func(t *testing.T) {
+ // Add a new image extension
+ registry.AddImageExtension(".webp")
+ if !registry.IsImage("test.webp") {
+ t.Errorf("Expected .webp to be recognized as image after adding")
+ }
+
+ // Test case insensitive addition
+ registry.AddImageExtension(".AVIF")
+ if !registry.IsImage("test.avif") {
+ t.Errorf("Expected .avif to be recognized as image after adding .AVIF")
+ }
+ if !registry.IsImage("test.AVIF") {
+ t.Errorf("Expected .AVIF to be recognized as image")
+ }
+
+ // Test with dot prefix
+ registry.AddImageExtension("heic")
+ if registry.IsImage("test.heic") {
+ t.Errorf("Expected extension without dot to not work")
+ }
+
+ // Test with proper dot prefix
+ registry.AddImageExtension(".heic")
+ if !registry.IsImage("test.heic") {
+ t.Errorf("Expected .heic to be recognized as image")
+ }
+ })
+
+ // Test AddBinaryExtension
+ t.Run("AddBinaryExtension", func(t *testing.T) {
+ // Add a new binary extension
+ registry.AddBinaryExtension(".custom")
+ if !registry.IsBinary("test.custom") {
+ t.Errorf("Expected .custom to be recognized as binary after adding")
+ }
+
+ // Test case insensitive addition
+ registry.AddBinaryExtension(".NEWBIN")
+ if !registry.IsBinary("test.newbin") {
+ t.Errorf("Expected .newbin to be recognized as binary after adding .NEWBIN")
+ }
+ if !registry.IsBinary("test.NEWBIN") {
+ t.Errorf("Expected .NEWBIN to be recognized as binary")
+ }
+
+ // Test overwriting existing extension
+ registry.AddBinaryExtension(".custom")
+ if !registry.IsBinary("test.custom") {
+ t.Errorf("Expected .custom to still be recognized as binary after re-adding")
+ }
+ })
+
+ // Test AddLanguageMapping
+ t.Run("AddLanguageMapping", func(t *testing.T) {
+ // Add a new language mapping
+ registry.AddLanguageMapping(".zig", "zig")
+ if registry.GetLanguage("test.zig") != "zig" {
+ t.Errorf("Expected .zig to map to 'zig', got '%s'", registry.GetLanguage("test.zig"))
+ }
+
+ // Test case insensitive addition
+ registry.AddLanguageMapping(".V", "vlang")
+ if registry.GetLanguage("test.v") != "vlang" {
+ t.Errorf("Expected .v to map to 'vlang' after adding .V, got '%s'", registry.GetLanguage("test.v"))
+ }
+ if registry.GetLanguage("test.V") != "vlang" {
+ t.Errorf("Expected .V to map to 'vlang', got '%s'", registry.GetLanguage("test.V"))
+ }
+
+ // Test overwriting existing mapping
+ registry.AddLanguageMapping(".zig", "ziglang")
+ if registry.GetLanguage("test.zig") != "ziglang" {
+ t.Errorf("Expected .zig to map to 'ziglang' after update, got '%s'", registry.GetLanguage("test.zig"))
+ }
+
+ // Test empty language
+ registry.AddLanguageMapping(".empty", "")
+ if registry.GetLanguage("test.empty") != "" {
+ t.Errorf("Expected .empty to map to empty string, got '%s'", registry.GetLanguage("test.empty"))
+ }
+ })
+}
+
+// TestFileTypeRegistry_LanguageDetection tests the language detection functionality.
+func TestFileTypeRegistry_LanguageDetection(t *testing.T) {
+ registry := GetDefaultRegistry()
+
+ tests := []struct {
+ filename string
+ expected string
+ }{
+ // Programming languages
+ {"main.go", "go"},
+ {"script.py", "python"},
+ {"app.js", "javascript"},
+ {"component.tsx", "typescript"},
+ {"service.ts", "typescript"},
+ {"App.java", "java"},
+ {"program.c", "c"},
+ {"program.cpp", "cpp"},
+ {"header.h", "c"},
+ {"header.hpp", "cpp"},
+ {"main.rs", "rust"},
+ {"script.rb", "ruby"},
+ {"index.php", "php"},
+ {"app.swift", "swift"},
+ {"MainActivity.kt", "kotlin"},
+ {"Main.scala", "scala"},
+ {"analysis.r", "r"},
+ {"ViewController.m", "objc"},
+ {"ViewController.mm", "objcpp"},
+ {"Program.cs", "csharp"},
+ {"Module.vb", "vbnet"},
+ {"program.fs", "fsharp"},
+ {"script.lua", "lua"},
+ {"script.pl", "perl"},
+
+ // Shell scripts
+ {"script.sh", "bash"},
+ {"script.bash", "bash"},
+ {"script.zsh", "zsh"},
+ {"script.fish", "fish"},
+ {"script.ps1", "powershell"},
+ {"script.bat", "batch"},
+ {"script.cmd", "batch"},
+
+ // Data and markup
+ {"query.sql", "sql"},
+ {"index.html", "html"},
+ {"page.htm", "html"},
+ {"data.xml", "xml"},
+ {"style.css", "css"},
+ {"style.scss", "scss"},
+ {"style.sass", "sass"},
+ {"style.less", "less"},
+ {"data.json", "json"},
+ {"config.yaml", "yaml"},
+ {"config.yml", "yaml"},
+ {"config.toml", "toml"},
+ {"README.md", "markdown"},
+ {"doc.rst", "rst"},
+ {"paper.tex", "latex"},
+
+ // Modern languages
+ {"main.dart", "dart"},
+ {"Main.elm", "elm"},
+ {"core.clj", "clojure"},
+ {"server.ex", "elixir"},
+ {"test.exs", "elixir"},
+ {"server.erl", "erlang"},
+ {"header.hrl", "erlang"},
+ {"main.hs", "haskell"},
+ {"module.ml", "ocaml"},
+ {"interface.mli", "ocaml"},
+ {"main.nim", "nim"},
+ {"config.nims", "nim"},
+
+ // Web frameworks
+ {"Component.vue", "vue"},
+ {"Component.jsx", "javascript"},
+
+ // Case sensitivity tests
+ {"MAIN.GO", "go"},
+ {"Script.PY", "python"},
+ {"APP.JS", "javascript"},
+
+ // Edge cases
+ {"", ""}, // Empty filename
+ {"a", ""}, // Too short (less than minExtensionLength)
+ {"noext", ""}, // No extension
+ {".hidden", ""}, // Hidden file with no name
+ {"file.", ""}, // Extension is just a dot
+ {"file.unknown", ""}, // Unknown extension
+ {"file.123", ""}, // Numeric extension
+ {"a.b", ""}, // Very short filename and extension
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.filename, func(t *testing.T) {
+ result := registry.GetLanguage(tt.filename)
+ if result != tt.expected {
+ t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected)
+ }
+ })
+ }
+}
+
+// TestFileTypeRegistry_ImageDetection tests the image detection functionality.
+func TestFileTypeRegistry_ImageDetection(t *testing.T) {
+ registry := GetDefaultRegistry()
+
+ tests := []struct {
+ filename string
+ expected bool
+ }{
+ // Common image formats
+ {"photo.png", true},
+ {"image.jpg", true},
+ {"picture.jpeg", true},
+ {"animation.gif", true},
+ {"bitmap.bmp", true},
+ {"image.tiff", true},
+ {"scan.tif", true},
+ {"vector.svg", true},
+ {"modern.webp", true},
+ {"favicon.ico", true},
+
+ // Case sensitivity tests
+ {"PHOTO.PNG", true},
+ {"IMAGE.JPG", true},
+ {"PICTURE.JPEG", true},
+
+ // Non-image files
+ {"document.txt", false},
+ {"script.js", false},
+ {"data.json", false},
+ {"archive.zip", false},
+ {"executable.exe", false},
+
+ // Edge cases
+ {"", false}, // Empty filename
+ {"image", false}, // No extension
+ {".png", true}, // Just extension
+ {"file.png.bak", false}, // Multiple extensions
+ {"image.unknown", false}, // Unknown extension
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.filename, func(t *testing.T) {
+ result := registry.IsImage(tt.filename)
+ if result != tt.expected {
+ t.Errorf("IsImage(%q) = %t, expected %t", tt.filename, result, tt.expected)
+ }
+ })
+ }
+}
+
+// TestFileTypeRegistry_BinaryDetection tests the binary detection functionality.
+func TestFileTypeRegistry_BinaryDetection(t *testing.T) {
+ registry := GetDefaultRegistry()
+
+ tests := []struct {
+ filename string
+ expected bool
+ }{
+ // Executable files
+ {"program.exe", true},
+ {"library.dll", true},
+ {"libfoo.so", true},
+ {"framework.dylib", true},
+ {"data.bin", true},
+
+ // Object and library files
+ {"object.o", true},
+ {"archive.a", true},
+ {"library.lib", true},
+ {"application.jar", true},
+ {"bytecode.class", true},
+ {"compiled.pyc", true},
+ {"optimized.pyo", true},
+
+ // System files
+ {".DS_Store", true},
+
+ // Document files (treated as binary)
+ {"document.pdf", true},
+
+ // Archive files
+ {"archive.zip", true},
+ {"backup.tar", true},
+ {"compressed.gz", true},
+ {"data.bz2", true},
+ {"package.xz", true},
+ {"archive.7z", true},
+ {"backup.rar", true},
+
+ // Font files
+ {"font.ttf", true},
+ {"font.otf", true},
+ {"font.woff", true},
+ {"font.woff2", true},
+
+ // Media files
+ {"song.mp3", true},
+ {"video.mp4", true},
+ {"movie.avi", true},
+ {"clip.mov", true},
+ {"video.wmv", true},
+ {"animation.flv", true},
+ {"modern.webm", true},
+ {"audio.ogg", true},
+ {"sound.wav", true},
+ {"music.flac", true},
+
+ // Database files
+ {"data.dat", true},
+ {"database.db", true},
+ {"app.sqlite", true},
+
+ // Case sensitivity tests
+ {"PROGRAM.EXE", true},
+ {"LIBRARY.DLL", true},
+
+ // Non-binary files
+ {"document.txt", false},
+ {"script.js", false},
+ {"data.json", false},
+ {"style.css", false},
+ {"page.html", false},
+
+ // Edge cases
+ {"", false}, // Empty filename
+ {"binary", false}, // No extension
+ {".exe", true}, // Just extension
+ {"file.exe.bak", false}, // Multiple extensions
+ {"file.unknown", false}, // Unknown extension
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.filename, func(t *testing.T) {
+ result := registry.IsBinary(tt.filename)
+ if result != tt.expected {
+ t.Errorf("IsBinary(%q) = %t, expected %t", tt.filename, result, tt.expected)
+ }
+ })
+ }
+}
+
+// TestFileTypeRegistry_DefaultRegistryConsistency tests that the default registry is consistent.
+func TestFileTypeRegistry_DefaultRegistryConsistency(t *testing.T) {
+ // Get registry multiple times and ensure it's the same instance
+ registry1 := GetDefaultRegistry()
+ registry2 := GetDefaultRegistry()
+ registry3 := getRegistry()
+
+ if registry1 != registry2 {
+ t.Error("GetDefaultRegistry() should return the same instance")
+ }
+ if registry1 != registry3 {
+ t.Error("getRegistry() should return the same instance as GetDefaultRegistry()")
+ }
+
+ // Test that global functions use the same registry
+ filename := "test.go"
+ if IsImage(filename) != registry1.IsImage(filename) {
+ t.Error("IsImage() global function should match registry method")
+ }
+ if IsBinary(filename) != registry1.IsBinary(filename) {
+ t.Error("IsBinary() global function should match registry method")
+ }
+ if GetLanguage(filename) != registry1.GetLanguage(filename) {
+ t.Error("GetLanguage() global function should match registry method")
+ }
+}
+
+// TestFileTypeRegistry_ThreadSafety tests the thread safety of the FileTypeRegistry.
+func TestFileTypeRegistry_ThreadSafety(t *testing.T) {
+ const numGoroutines = 100
+ const numOperationsPerGoroutine = 100
+
+ var wg sync.WaitGroup
+
+ // Test concurrent read operations
+ t.Run("ConcurrentReads", func(t *testing.T) {
+ for i := 0; i < numGoroutines; i++ {
+ wg.Add(1)
+ go func(id int) {
+ defer wg.Done()
+ registry := GetDefaultRegistry()
+
+ for j := 0; j < numOperationsPerGoroutine; j++ {
+ // Test various file detection operations
+ _ = registry.IsImage("test.png")
+ _ = registry.IsBinary("test.exe")
+ _ = registry.GetLanguage("test.go")
+
+ // Test global functions too
+ _ = IsImage("image.jpg")
+ _ = IsBinary("binary.dll")
+ _ = GetLanguage("script.py")
+ }
+ }(i)
+ }
+ wg.Wait()
+ })
+
+ // Test concurrent registry access (singleton creation)
+ t.Run("ConcurrentRegistryAccess", func(t *testing.T) {
+ // Reset the registry to test concurrent initialization
+ // Note: This is not safe in a real application, but needed for testing
+ registryOnce = sync.Once{}
+ registry = nil
+
+ registries := make([]*FileTypeRegistry, numGoroutines)
+
+ for i := 0; i < numGoroutines; i++ {
+ wg.Add(1)
+ go func(id int) {
+ defer wg.Done()
+ registries[id] = GetDefaultRegistry()
+ }(i)
+ }
+ wg.Wait()
+
+ // Verify all goroutines got the same registry instance
+ firstRegistry := registries[0]
+ for i := 1; i < numGoroutines; i++ {
+ if registries[i] != firstRegistry {
+ t.Errorf("Registry %d is different from registry 0", i)
+ }
+ }
+ })
+
+ // Test concurrent modifications on separate registry instances
+ t.Run("ConcurrentModifications", func(t *testing.T) {
+ // Create separate registry instances for each goroutine to test modification thread safety
+ for i := 0; i < numGoroutines; i++ {
+ wg.Add(1)
+ go func(id int) {
+ defer wg.Done()
+
+ // Create a new registry instance for this goroutine
+ registry := &FileTypeRegistry{
+ imageExts: make(map[string]bool),
+ binaryExts: make(map[string]bool),
+ languageMap: make(map[string]string),
+ }
+
+ for j := 0; j < numOperationsPerGoroutine; j++ {
+ // Add unique extensions for this goroutine
+ extSuffix := fmt.Sprintf("_%d_%d", id, j)
+
+ registry.AddImageExtension(".img" + extSuffix)
+ registry.AddBinaryExtension(".bin" + extSuffix)
+ registry.AddLanguageMapping(".lang"+extSuffix, "lang"+extSuffix)
+
+ // Verify the additions worked
+ if !registry.IsImage("test.img" + extSuffix) {
+ t.Errorf("Failed to add image extension .img%s", extSuffix)
+ }
+ if !registry.IsBinary("test.bin" + extSuffix) {
+ t.Errorf("Failed to add binary extension .bin%s", extSuffix)
+ }
+ if registry.GetLanguage("test.lang"+extSuffix) != "lang"+extSuffix {
+ t.Errorf("Failed to add language mapping .lang%s", extSuffix)
+ }
+ }
+ }(i)
+ }
+ wg.Wait()
+ })
+}
+
+// TestFileTypeRegistry_EdgeCases tests edge cases and boundary conditions.
+func TestFileTypeRegistry_EdgeCases(t *testing.T) {
+ registry := GetDefaultRegistry()
+
+ // Test various edge cases for filename handling
+ edgeCases := []struct {
+ name string
+ filename string
+ desc string
+ }{
+ {"empty", "", "empty filename"},
+ {"single_char", "a", "single character filename"},
+ {"just_dot", ".", "just a dot"},
+ {"double_dot", "..", "double dot"},
+ {"hidden_file", ".hidden", "hidden file"},
+ {"hidden_with_ext", ".hidden.txt", "hidden file with extension"},
+ {"multiple_dots", "file.tar.gz", "multiple extensions"},
+ {"trailing_dot", "file.", "trailing dot"},
+ {"unicode", "файл.txt", "unicode filename"},
+ {"spaces", "my file.txt", "filename with spaces"},
+ {"special_chars", "file@#$.txt", "filename with special characters"},
+ {"very_long", "very_long_filename_with_many_characters_in_it.extension", "very long filename"},
+ {"no_basename", ".gitignore", "dotfile with no basename"},
+ {"case_mixed", "FiLe.ExT", "mixed case"},
+ }
+
+ for _, tc := range edgeCases {
+ t.Run(tc.name, func(t *testing.T) {
+ // These should not panic
+ _ = registry.IsImage(tc.filename)
+ _ = registry.IsBinary(tc.filename)
+ _ = registry.GetLanguage(tc.filename)
+
+ // Global functions should also not panic
+ _ = IsImage(tc.filename)
+ _ = IsBinary(tc.filename)
+ _ = GetLanguage(tc.filename)
+ })
+ }
+}
+
+// TestFileTypeRegistry_MinimumExtensionLength tests the minimum extension length requirement.
+func TestFileTypeRegistry_MinimumExtensionLength(t *testing.T) {
+ registry := GetDefaultRegistry()
+
+ tests := []struct {
+ filename string
+ expected string
+ }{
+ {"", ""}, // Empty filename
+ {"a", ""}, // Single character (less than minExtensionLength)
+ {"ab", ""}, // Two characters, no extension
+ {"a.b", ""}, // Extension too short, but filename too short anyway
+ {"ab.c", "c"}, // Valid: filename >= minExtensionLength and .c is valid extension
+ {"a.go", "go"}, // Valid extension
+ {"ab.py", "python"}, // Valid extension
+ {"a.unknown", ""}, // Valid length but unknown extension
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.filename, func(t *testing.T) {
+ result := registry.GetLanguage(tt.filename)
+ if result != tt.expected {
+ t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected)
+ }
+ })
+ }
+}
+
+// BenchmarkFileTypeRegistry tests performance of the registry operations.
+func BenchmarkFileTypeRegistry_IsImage(b *testing.B) {
+ registry := GetDefaultRegistry()
+ filename := "test.png"
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _ = registry.IsImage(filename)
+ }
+}
+
+func BenchmarkFileTypeRegistry_IsBinary(b *testing.B) {
+ registry := GetDefaultRegistry()
+ filename := "test.exe"
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _ = registry.IsBinary(filename)
+ }
+}
+
+func BenchmarkFileTypeRegistry_GetLanguage(b *testing.B) {
+ registry := GetDefaultRegistry()
+ filename := "test.go"
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _ = registry.GetLanguage(filename)
+ }
+}
+
+func BenchmarkFileTypeRegistry_GlobalFunctions(b *testing.B) {
+ filename := "test.go"
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _ = IsImage(filename)
+ _ = IsBinary(filename)
+ _ = GetLanguage(filename)
+ }
+}
+
+func BenchmarkFileTypeRegistry_ConcurrentAccess(b *testing.B) {
+ filename := "test.go"
+
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ _ = IsImage(filename)
+ _ = IsBinary(filename)
+ _ = GetLanguage(filename)
+ }
+ })
+}
+
+// TestFileTypeRegistry_Configuration tests the configuration functionality.
+func TestFileTypeRegistry_Configuration(t *testing.T) {
+ // Create a new registry instance for testing
+ registry := &FileTypeRegistry{
+ imageExts: make(map[string]bool),
+ binaryExts: make(map[string]bool),
+ languageMap: make(map[string]string),
+ }
+
+ // Test ApplyCustomExtensions
+ t.Run("ApplyCustomExtensions", func(t *testing.T) {
+ customImages := []string{".webp", ".avif", ".heic"}
+ customBinary := []string{".custom", ".mybin"}
+ customLanguages := map[string]string{
+ ".zig": "zig",
+ ".odin": "odin",
+ ".v": "vlang",
+ }
+
+ registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
+
+ // Test custom image extensions
+ for _, ext := range customImages {
+ if !registry.IsImage("test" + ext) {
+ t.Errorf("Expected %s to be recognized as image", ext)
+ }
+ }
+
+ // Test custom binary extensions
+ for _, ext := range customBinary {
+ if !registry.IsBinary("test" + ext) {
+ t.Errorf("Expected %s to be recognized as binary", ext)
+ }
+ }
+
+ // Test custom language mappings
+ for ext, expectedLang := range customLanguages {
+ if lang := registry.GetLanguage("test" + ext); lang != expectedLang {
+ t.Errorf("Expected %s to map to %s, got %s", ext, expectedLang, lang)
+ }
+ }
+ })
+
+ // Test DisableExtensions
+ t.Run("DisableExtensions", func(t *testing.T) {
+ // Add some extensions first
+ registry.AddImageExtension(".png")
+ registry.AddImageExtension(".jpg")
+ registry.AddBinaryExtension(".exe")
+ registry.AddBinaryExtension(".dll")
+ registry.AddLanguageMapping(".go", "go")
+ registry.AddLanguageMapping(".py", "python")
+
+ // Verify they work
+ if !registry.IsImage("test.png") {
+ t.Error("Expected .png to be image before disabling")
+ }
+ if !registry.IsBinary("test.exe") {
+ t.Error("Expected .exe to be binary before disabling")
+ }
+ if registry.GetLanguage("test.go") != "go" {
+ t.Error("Expected .go to map to go before disabling")
+ }
+
+ // Disable some extensions
+ disabledImages := []string{".png"}
+ disabledBinary := []string{".exe"}
+ disabledLanguages := []string{".go"}
+
+ registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
+
+ // Test that disabled extensions no longer work
+ if registry.IsImage("test.png") {
+ t.Error("Expected .png to not be image after disabling")
+ }
+ if registry.IsBinary("test.exe") {
+ t.Error("Expected .exe to not be binary after disabling")
+ }
+ if registry.GetLanguage("test.go") != "" {
+ t.Error("Expected .go to not map to language after disabling")
+ }
+
+ // Test that non-disabled extensions still work
+ if !registry.IsImage("test.jpg") {
+ t.Error("Expected .jpg to still be image after disabling .png")
+ }
+ if !registry.IsBinary("test.dll") {
+ t.Error("Expected .dll to still be binary after disabling .exe")
+ }
+ if registry.GetLanguage("test.py") != "python" {
+ t.Error("Expected .py to still map to python after disabling .go")
+ }
+ })
+
+ // Test empty values handling
+ t.Run("EmptyValuesHandling", func(t *testing.T) {
+ registry := &FileTypeRegistry{
+ imageExts: make(map[string]bool),
+ binaryExts: make(map[string]bool),
+ languageMap: make(map[string]string),
+ }
+
+ // Test with empty values
+ customImages := []string{"", ".valid", ""}
+ customBinary := []string{"", ".valid"}
+ customLanguages := map[string]string{
+ "": "invalid",
+ ".valid": "",
+ ".good": "good",
+ }
+
+ registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
+
+ // Only valid entries should be added
+ if registry.IsImage("test.") {
+ t.Error("Expected empty extension to not be added as image")
+ }
+ if !registry.IsImage("test.valid") {
+ t.Error("Expected .valid to be added as image")
+ }
+ if registry.IsBinary("test.") {
+ t.Error("Expected empty extension to not be added as binary")
+ }
+ if !registry.IsBinary("test.valid") {
+ t.Error("Expected .valid to be added as binary")
+ }
+ if registry.GetLanguage("test.") != "" {
+ t.Error("Expected empty extension to not be added as language")
+ }
+ if registry.GetLanguage("test.valid") != "" {
+ t.Error("Expected .valid with empty language to not be added")
+ }
+ if registry.GetLanguage("test.good") != "good" {
+ t.Error("Expected .good to map to good")
+ }
+ })
+
+ // Test case insensitive handling
+ t.Run("CaseInsensitiveHandling", func(t *testing.T) {
+ registry := &FileTypeRegistry{
+ imageExts: make(map[string]bool),
+ binaryExts: make(map[string]bool),
+ languageMap: make(map[string]string),
+ }
+
+ customImages := []string{".WEBP", ".Avif"}
+ customBinary := []string{".CUSTOM", ".MyBin"}
+ customLanguages := map[string]string{
+ ".ZIG": "zig",
+ ".Odin": "odin",
+ }
+
+ registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
+
+ // Test that both upper and lower case work
+ if !registry.IsImage("test.webp") {
+ t.Error("Expected .webp (lowercase) to work after adding .WEBP")
+ }
+ if !registry.IsImage("test.WEBP") {
+ t.Error("Expected .WEBP (uppercase) to work")
+ }
+ if !registry.IsBinary("test.custom") {
+ t.Error("Expected .custom (lowercase) to work after adding .CUSTOM")
+ }
+ if !registry.IsBinary("test.CUSTOM") {
+ t.Error("Expected .CUSTOM (uppercase) to work")
+ }
+ if registry.GetLanguage("test.zig") != "zig" {
+ t.Error("Expected .zig (lowercase) to work after adding .ZIG")
+ }
+ if registry.GetLanguage("test.ZIG") != "zig" {
+ t.Error("Expected .ZIG (uppercase) to work")
+ }
+ })
+}
+
+// TestConfigureFromSettings tests the global configuration function.
+func TestConfigureFromSettings(t *testing.T) {
+ // Reset registry to ensure clean state
+ registryOnce = sync.Once{}
+ registry = nil
+
+ // Test configuration application
+ customImages := []string{".webp", ".avif"}
+ customBinary := []string{".custom"}
+ customLanguages := map[string]string{".zig": "zig"}
+ disabledImages := []string{".gif"} // Disable default extension
+ disabledBinary := []string{".exe"} // Disable default extension
+ disabledLanguages := []string{".rb"} // Disable default extension
+
+ ConfigureFromSettings(
+ customImages,
+ customBinary,
+ customLanguages,
+ disabledImages,
+ disabledBinary,
+ disabledLanguages,
+ )
+
+ // Test that custom extensions work
+ if !IsImage("test.webp") {
+ t.Error("Expected custom image extension .webp to work")
+ }
+ if !IsBinary("test.custom") {
+ t.Error("Expected custom binary extension .custom to work")
+ }
+ if GetLanguage("test.zig") != "zig" {
+ t.Error("Expected custom language .zig to work")
+ }
+
+ // Test that disabled extensions don't work
+ if IsImage("test.gif") {
+ t.Error("Expected disabled image extension .gif to not work")
+ }
+ if IsBinary("test.exe") {
+ t.Error("Expected disabled binary extension .exe to not work")
+ }
+ if GetLanguage("test.rb") != "" {
+ t.Error("Expected disabled language extension .rb to not work")
+ }
+
+ // Test that non-disabled defaults still work
+ if !IsImage("test.png") {
+ t.Error("Expected non-disabled image extension .png to still work")
+ }
+ if !IsBinary("test.dll") {
+ t.Error("Expected non-disabled binary extension .dll to still work")
+ }
+ if GetLanguage("test.go") != "go" {
+ t.Error("Expected non-disabled language extension .go to still work")
+ }
+}
diff --git a/fileproc/formats.go b/fileproc/formats.go
new file mode 100644
index 0000000..86795af
--- /dev/null
+++ b/fileproc/formats.go
@@ -0,0 +1,28 @@
+package fileproc
+
+// FileData represents a single file's path and content.
+type FileData struct {
+ Path string `json:"path" yaml:"path"`
+ Content string `json:"content" yaml:"content"`
+ Language string `json:"language" yaml:"language"`
+}
+
+// OutputData represents the full output structure.
+type OutputData struct {
+ Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
+ Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
+ Files []FileData `json:"files" yaml:"files"`
+}
+
+// FormatWriter defines the interface for format-specific writers.
+type FormatWriter interface {
+ Start(prefix, suffix string) error
+ WriteFile(req WriteRequest) error
+ Close() error
+}
+
+// detectLanguage tries to infer the code block language from the file extension.
+func detectLanguage(filePath string) string {
+ registry := GetDefaultRegistry()
+ return registry.GetLanguage(filePath)
+}
diff --git a/fileproc/ignore_rules.go b/fileproc/ignore_rules.go
new file mode 100644
index 0000000..827a0f0
--- /dev/null
+++ b/fileproc/ignore_rules.go
@@ -0,0 +1,66 @@
+package fileproc
+
+import (
+ "os"
+ "path/filepath"
+
+ ignore "github.com/sabhiram/go-gitignore"
+)
+
+// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
+type ignoreRule struct {
+ gi *ignore.GitIgnore
+ base string
+}
+
+// loadIgnoreRules loads ignore rules from the current directory and combines them with parent rules.
+func loadIgnoreRules(currentDir string, parentRules []ignoreRule) []ignoreRule {
+ // Pre-allocate for parent rules plus possible .gitignore and .ignore
+ const expectedIgnoreFiles = 2
+ rules := make([]ignoreRule, 0, len(parentRules)+expectedIgnoreFiles)
+ rules = append(rules, parentRules...)
+
+ // Check for .gitignore and .ignore files in the current directory.
+ for _, fileName := range []string{".gitignore", ".ignore"} {
+ if rule := tryLoadIgnoreFile(currentDir, fileName); rule != nil {
+ rules = append(rules, *rule)
+ }
+ }
+
+ return rules
+}
+
+// tryLoadIgnoreFile attempts to load an ignore file from the given directory.
+func tryLoadIgnoreFile(dir, fileName string) *ignoreRule {
+ ignorePath := filepath.Join(dir, fileName)
+ if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
+ if gi, err := ignore.CompileIgnoreFile(ignorePath); err == nil {
+ return &ignoreRule{
+ base: dir,
+ gi: gi,
+ }
+ }
+ }
+ return nil
+}
+
+// matchesIgnoreRules checks if a path matches any of the ignore rules.
+func matchesIgnoreRules(fullPath string, rules []ignoreRule) bool {
+ for _, rule := range rules {
+ if matchesRule(fullPath, rule) {
+ return true
+ }
+ }
+ return false
+}
+
+// matchesRule checks if a path matches a specific ignore rule.
+func matchesRule(fullPath string, rule ignoreRule) bool {
+ // Compute the path relative to the base where the ignore rule was defined.
+ rel, err := filepath.Rel(rule.base, fullPath)
+ if err != nil {
+ return false
+ }
+ // If the rule matches, skip this entry.
+ return rule.gi.MatchesPath(rel)
+}
diff --git a/fileproc/json_writer.go b/fileproc/json_writer.go
new file mode 100644
index 0000000..57bc6d9
--- /dev/null
+++ b/fileproc/json_writer.go
@@ -0,0 +1,188 @@
+package fileproc
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+// JSONWriter handles JSON format output with streaming support.
+type JSONWriter struct {
+ outFile *os.File
+ firstFile bool
+}
+
+// NewJSONWriter creates a new JSON writer.
+func NewJSONWriter(outFile *os.File) *JSONWriter {
+ return &JSONWriter{
+ outFile: outFile,
+ firstFile: true,
+ }
+}
+
+// Start writes the JSON header.
+func (w *JSONWriter) Start(prefix, suffix string) error {
+ // Start JSON structure
+ if _, err := w.outFile.WriteString(`{"prefix":"`); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON start")
+ }
+
+ // Write escaped prefix
+ escapedPrefix := escapeJSONString(prefix)
+ if _, err := w.outFile.WriteString(escapedPrefix); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON prefix")
+ }
+
+ if _, err := w.outFile.WriteString(`","suffix":"`); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON middle")
+ }
+
+ // Write escaped suffix
+ escapedSuffix := escapeJSONString(suffix)
+ if _, err := w.outFile.WriteString(escapedSuffix); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON suffix")
+ }
+
+ if _, err := w.outFile.WriteString(`","files":[`); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON files start")
+ }
+
+ return nil
+}
+
+// WriteFile writes a file entry in JSON format.
+func (w *JSONWriter) WriteFile(req WriteRequest) error {
+ if !w.firstFile {
+ if _, err := w.outFile.WriteString(","); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON separator")
+ }
+ }
+ w.firstFile = false
+
+ if req.IsStream {
+ return w.writeStreaming(req)
+ }
+ return w.writeInline(req)
+}
+
+// Close writes the JSON footer.
+func (w *JSONWriter) Close() error {
+ // Close JSON structure
+ if _, err := w.outFile.WriteString("]}"); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON end")
+ }
+ return nil
+}
+
+// writeStreaming writes a large file as JSON in streaming chunks.
+func (w *JSONWriter) writeStreaming(req WriteRequest) error {
+ defer w.closeReader(req.Reader, req.Path)
+
+ language := detectLanguage(req.Path)
+
+ // Write file start
+ escapedPath := escapeJSONString(req.Path)
+ if _, err := fmt.Fprintf(w.outFile, `{"path":"%s","language":"%s","content":"`, escapedPath, language); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file start").WithFilePath(req.Path)
+ }
+
+ // Stream content with JSON escaping
+ if err := w.streamJSONContent(req.Reader, req.Path); err != nil {
+ return err
+ }
+
+ // Write file end
+ if _, err := w.outFile.WriteString(`"}`); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file end").WithFilePath(req.Path)
+ }
+
+ return nil
+}
+
+// writeInline writes a small file directly as JSON.
+func (w *JSONWriter) writeInline(req WriteRequest) error {
+ language := detectLanguage(req.Path)
+ fileData := FileData{
+ Path: req.Path,
+ Content: req.Content,
+ Language: language,
+ }
+
+ encoded, err := json.Marshal(fileData)
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingEncode, "failed to marshal JSON").WithFilePath(req.Path)
+ }
+
+ if _, err := w.outFile.Write(encoded); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file").WithFilePath(req.Path)
+ }
+ return nil
+}
+
+// streamJSONContent streams content with JSON escaping.
+func (w *JSONWriter) streamJSONContent(reader io.Reader, path string) error {
+ buf := make([]byte, StreamChunkSize)
+ for {
+ n, err := reader.Read(buf)
+ if n > 0 {
+ escaped := escapeJSONString(string(buf[:n]))
+ if _, writeErr := w.outFile.WriteString(escaped); writeErr != nil {
+ return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON chunk").WithFilePath(path)
+ }
+ }
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read JSON chunk").WithFilePath(path)
+ }
+ }
+ return nil
+}
+
+// closeReader safely closes a reader if it implements io.Closer.
+func (w *JSONWriter) closeReader(reader io.Reader, path string) {
+ if closer, ok := reader.(io.Closer); ok {
+ if err := closer.Close(); err != nil {
+ utils.LogError(
+ "Failed to close file reader",
+ utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
+ )
+ }
+ }
+}
+
+// escapeJSONString escapes a string for JSON output.
+func escapeJSONString(s string) string {
+ // Use json.Marshal to properly escape the string, then remove the quotes
+ escaped, _ := json.Marshal(s)
+ return string(escaped[1 : len(escaped)-1]) // Remove surrounding quotes
+}
+
+// startJSONWriter handles JSON format output with streaming support.
+func startJSONWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
+ defer close(done)
+
+ writer := NewJSONWriter(outFile)
+
+ // Start writing
+ if err := writer.Start(prefix, suffix); err != nil {
+ utils.LogError("Failed to write JSON start", err)
+ return
+ }
+
+ // Process files
+ for req := range writeCh {
+ if err := writer.WriteFile(req); err != nil {
+ utils.LogError("Failed to write JSON file", err)
+ }
+ }
+
+ // Close writer
+ if err := writer.Close(); err != nil {
+ utils.LogError("Failed to write JSON end", err)
+ }
+}
diff --git a/fileproc/markdown_writer.go b/fileproc/markdown_writer.go
new file mode 100644
index 0000000..56e5fdf
--- /dev/null
+++ b/fileproc/markdown_writer.go
@@ -0,0 +1,139 @@
+package fileproc
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+// MarkdownWriter handles markdown format output with streaming support.
+type MarkdownWriter struct {
+ outFile *os.File
+}
+
+// NewMarkdownWriter creates a new markdown writer.
+func NewMarkdownWriter(outFile *os.File) *MarkdownWriter {
+ return &MarkdownWriter{outFile: outFile}
+}
+
+// Start writes the markdown header.
+func (w *MarkdownWriter) Start(prefix, suffix string) error {
+ if prefix != "" {
+ if _, err := fmt.Fprintf(w.outFile, "# %s\n\n", prefix); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write prefix")
+ }
+ }
+ return nil
+}
+
+// WriteFile writes a file entry in markdown format.
+func (w *MarkdownWriter) WriteFile(req WriteRequest) error {
+ if req.IsStream {
+ return w.writeStreaming(req)
+ }
+ return w.writeInline(req)
+}
+
+// Close writes the markdown footer.
+func (w *MarkdownWriter) Close(suffix string) error {
+ if suffix != "" {
+ if _, err := fmt.Fprintf(w.outFile, "\n# %s\n", suffix); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write suffix")
+ }
+ }
+ return nil
+}
+
+// writeStreaming writes a large file in streaming chunks.
+func (w *MarkdownWriter) writeStreaming(req WriteRequest) error {
+ defer w.closeReader(req.Reader, req.Path)
+
+ language := detectLanguage(req.Path)
+
+ // Write file header
+ if _, err := fmt.Fprintf(w.outFile, "## File: `%s`\n```%s\n", req.Path, language); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file header").WithFilePath(req.Path)
+ }
+
+ // Stream file content in chunks
+ if err := w.streamContent(req.Reader, req.Path); err != nil {
+ return err
+ }
+
+ // Write file footer
+ if _, err := w.outFile.WriteString("\n```\n\n"); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file footer").WithFilePath(req.Path)
+ }
+
+ return nil
+}
+
+// writeInline writes a small file directly from content.
+func (w *MarkdownWriter) writeInline(req WriteRequest) error {
+ language := detectLanguage(req.Path)
+ formatted := fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", req.Path, language, req.Content)
+
+ if _, err := w.outFile.WriteString(formatted); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write inline content").WithFilePath(req.Path)
+ }
+ return nil
+}
+
+// streamContent streams file content in chunks.
+func (w *MarkdownWriter) streamContent(reader io.Reader, path string) error {
+ buf := make([]byte, StreamChunkSize)
+ for {
+ n, err := reader.Read(buf)
+ if n > 0 {
+ if _, writeErr := w.outFile.Write(buf[:n]); writeErr != nil {
+ return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write chunk").WithFilePath(path)
+ }
+ }
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read chunk").WithFilePath(path)
+ }
+ }
+ return nil
+}
+
+// closeReader safely closes a reader if it implements io.Closer.
+func (w *MarkdownWriter) closeReader(reader io.Reader, path string) {
+ if closer, ok := reader.(io.Closer); ok {
+ if err := closer.Close(); err != nil {
+ utils.LogError(
+ "Failed to close file reader",
+ utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
+ )
+ }
+ }
+}
+
+// startMarkdownWriter handles markdown format output with streaming support.
+func startMarkdownWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
+ defer close(done)
+
+ writer := NewMarkdownWriter(outFile)
+
+ // Start writing
+ if err := writer.Start(prefix, suffix); err != nil {
+ utils.LogError("Failed to write markdown prefix", err)
+ return
+ }
+
+ // Process files
+ for req := range writeCh {
+ if err := writer.WriteFile(req); err != nil {
+ utils.LogError("Failed to write markdown file", err)
+ }
+ }
+
+ // Close writer
+ if err := writer.Close(suffix); err != nil {
+ utils.LogError("Failed to write markdown suffix", err)
+ }
+}
diff --git a/fileproc/processor.go b/fileproc/processor.go
index 97e0761..335c364 100644
--- a/fileproc/processor.go
+++ b/fileproc/processor.go
@@ -3,34 +3,157 @@ package fileproc
import (
"fmt"
+ "io"
"os"
"path/filepath"
+ "strings"
- "github.com/sirupsen/logrus"
+ "github.com/ivuorinen/gibidify/config"
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+const (
+ // StreamChunkSize is the size of chunks when streaming large files (64KB).
+ StreamChunkSize = 65536
+ // StreamThreshold is the file size above which we use streaming (1MB).
+ StreamThreshold = 1048576
+ // MaxMemoryBuffer is the maximum memory to use for buffering content (10MB).
+ MaxMemoryBuffer = 10485760
)
// WriteRequest represents the content to be written.
type WriteRequest struct {
- Path string
- Content string
+ Path string
+ Content string
+ IsStream bool
+ Reader io.Reader
+}
+
+// FileProcessor handles file processing operations.
+type FileProcessor struct {
+ rootPath string
+ sizeLimit int64
+}
+
+// NewFileProcessor creates a new file processor.
+func NewFileProcessor(rootPath string) *FileProcessor {
+ return &FileProcessor{
+ rootPath: rootPath,
+ sizeLimit: config.GetFileSizeLimit(),
+ }
}
// ProcessFile reads the file at filePath and sends a formatted output to outCh.
+// It automatically chooses between loading the entire file or streaming based on file size.
func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) {
- content, err := os.ReadFile(filePath)
+ processor := NewFileProcessor(rootPath)
+ processor.Process(filePath, outCh)
+}
+
+// Process handles file processing with the configured settings.
+func (p *FileProcessor) Process(filePath string, outCh chan<- WriteRequest) {
+ // Validate file
+ fileInfo, err := p.validateFile(filePath)
if err != nil {
- logrus.Errorf("Failed to read file %s: %v", filePath, err)
+ return // Error already logged
+ }
+
+ // Get relative path
+ relPath := p.getRelativePath(filePath)
+
+ // Choose processing strategy based on file size
+ if fileInfo.Size() <= StreamThreshold {
+ p.processInMemory(filePath, relPath, outCh)
+ } else {
+ p.processStreaming(filePath, relPath, outCh)
+ }
+}
+
+// validateFile checks if the file can be processed.
+func (p *FileProcessor) validateFile(filePath string) (os.FileInfo, error) {
+ fileInfo, err := os.Stat(filePath)
+ if err != nil {
+ structErr := utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to stat file").WithFilePath(filePath)
+ utils.LogErrorf(structErr, "Failed to stat file %s", filePath)
+ return nil, err
+ }
+
+ // Check size limit
+ if fileInfo.Size() > p.sizeLimit {
+ utils.LogErrorf(
+ utils.NewStructuredError(
+ utils.ErrorTypeValidation,
+ utils.CodeValidationSize,
+ fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", fileInfo.Size(), p.sizeLimit),
+ ).WithFilePath(filePath).WithContext("file_size", fileInfo.Size()).WithContext("size_limit", p.sizeLimit),
+ "Skipping large file %s", filePath,
+ )
+ return nil, fmt.Errorf("file too large")
+ }
+
+ return fileInfo, nil
+}
+
+// getRelativePath computes the path relative to rootPath.
+func (p *FileProcessor) getRelativePath(filePath string) string {
+ relPath, err := filepath.Rel(p.rootPath, filePath)
+ if err != nil {
+ return filePath // Fallback
+ }
+ return relPath
+}
+
+// processInMemory loads the entire file into memory (for small files).
+func (p *FileProcessor) processInMemory(filePath, relPath string, outCh chan<- WriteRequest) {
+ content, err := os.ReadFile(filePath) // #nosec G304 - filePath is validated by walker
+ if err != nil {
+ structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to read file").WithFilePath(filePath)
+ utils.LogErrorf(structErr, "Failed to read file %s", filePath)
return
}
- // Compute path relative to rootPath, so /a/b/c/d.c becomes c/d.c
- relPath, err := filepath.Rel(rootPath, filePath)
- if err != nil {
- // Fallback if something unexpected happens
- relPath = filePath
+ outCh <- WriteRequest{
+ Path: relPath,
+ Content: p.formatContent(relPath, string(content)),
+ IsStream: false,
+ }
+}
+
+// processStreaming creates a streaming reader for large files.
+func (p *FileProcessor) processStreaming(filePath, relPath string, outCh chan<- WriteRequest) {
+ reader := p.createStreamReader(filePath, relPath)
+ if reader == nil {
+ return // Error already logged
}
- // Format: separator, then relative path, then content
- formatted := fmt.Sprintf("\n---\n%s\n%s\n", relPath, string(content))
- outCh <- WriteRequest{Path: relPath, Content: formatted}
+ outCh <- WriteRequest{
+ Path: relPath,
+ Content: "", // Empty since content is in Reader
+ IsStream: true,
+ Reader: reader,
+ }
+}
+
+// createStreamReader creates a reader that combines header and file content.
+func (p *FileProcessor) createStreamReader(filePath, relPath string) io.Reader {
+ file, err := os.Open(filePath) // #nosec G304 - filePath is validated by walker
+ if err != nil {
+ structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to open file for streaming").WithFilePath(filePath)
+ utils.LogErrorf(structErr, "Failed to open file for streaming %s", filePath)
+ return nil
+ }
+ // Note: file will be closed by the writer
+
+ header := p.formatHeader(relPath)
+ return io.MultiReader(header, file)
+}
+
+// formatContent formats the file content with header.
+func (p *FileProcessor) formatContent(relPath, content string) string {
+ return fmt.Sprintf("\n---\n%s\n%s\n", relPath, content)
+}
+
+// formatHeader creates a reader for the file header.
+func (p *FileProcessor) formatHeader(relPath string) io.Reader {
+ return strings.NewReader(fmt.Sprintf("\n---\n%s\n", relPath))
}
diff --git a/fileproc/processor_test.go b/fileproc/processor_test.go
index d1c1077..e825399 100644
--- a/fileproc/processor_test.go
+++ b/fileproc/processor_test.go
@@ -6,12 +6,15 @@ import (
"sync"
"testing"
- fileproc "github.com/ivuorinen/gibidify/fileproc"
+ "github.com/ivuorinen/gibidify/fileproc"
+ "github.com/ivuorinen/gibidify/testutil"
)
func TestProcessFile(t *testing.T) {
+ // Reset and load default config to ensure proper file size limits
+ testutil.ResetViperConfig(t, "")
// Create a temporary file with known content.
- tmpFile, err := os.CreateTemp("", "testfile")
+ tmpFile, err := os.CreateTemp(t.TempDir(), "testfile")
if err != nil {
t.Fatal(err)
}
diff --git a/fileproc/registry.go b/fileproc/registry.go
new file mode 100644
index 0000000..1ea6f74
--- /dev/null
+++ b/fileproc/registry.go
@@ -0,0 +1,107 @@
+// Package fileproc provides file processing utilities.
+package fileproc
+
+import (
+ "path/filepath"
+ "strings"
+ "sync"
+)
+
+const minExtensionLength = 2
+
+var (
+ registry *FileTypeRegistry
+ registryOnce sync.Once
+)
+
+// FileTypeRegistry manages file type detection and classification.
+type FileTypeRegistry struct {
+ imageExts map[string]bool
+ binaryExts map[string]bool
+ languageMap map[string]string
+
+ // Cache for frequent lookups to avoid repeated string operations
+ extCache map[string]string // filename -> normalized extension
+ resultCache map[string]FileTypeResult // extension -> cached result
+ cacheMutex sync.RWMutex
+ maxCacheSize int
+
+ // Performance statistics
+ stats RegistryStats
+}
+
+// RegistryStats tracks performance metrics for the registry.
+type RegistryStats struct {
+ TotalLookups uint64
+ CacheHits uint64
+ CacheMisses uint64
+ CacheEvictions uint64
+}
+
+// FileTypeResult represents cached file type detection results.
+type FileTypeResult struct {
+ IsImage bool
+ IsBinary bool
+ Language string
+ Extension string
+}
+
+// initRegistry initializes the default file type registry with common extensions.
+func initRegistry() *FileTypeRegistry {
+ return &FileTypeRegistry{
+ imageExts: getImageExtensions(),
+ binaryExts: getBinaryExtensions(),
+ languageMap: getLanguageMap(),
+ extCache: make(map[string]string, 1000), // Cache for extension normalization
+ resultCache: make(map[string]FileTypeResult, 500), // Cache for type results
+ maxCacheSize: 500,
+ }
+}
+
+// getRegistry returns the singleton file type registry, creating it if necessary.
+func getRegistry() *FileTypeRegistry {
+ registryOnce.Do(func() {
+ registry = initRegistry()
+ })
+ return registry
+}
+
+// GetDefaultRegistry returns the default file type registry.
+func GetDefaultRegistry() *FileTypeRegistry {
+ return getRegistry()
+}
+
+// GetStats returns a copy of the current registry statistics.
+func (r *FileTypeRegistry) GetStats() RegistryStats {
+ r.cacheMutex.RLock()
+ defer r.cacheMutex.RUnlock()
+ return r.stats
+}
+
+// GetCacheInfo returns current cache size information.
+func (r *FileTypeRegistry) GetCacheInfo() (extCacheSize, resultCacheSize, maxCacheSize int) {
+ r.cacheMutex.RLock()
+ defer r.cacheMutex.RUnlock()
+ return len(r.extCache), len(r.resultCache), r.maxCacheSize
+}
+
+// ResetRegistryForTesting resets the registry to its initial state.
+// This function should only be used in tests.
+func ResetRegistryForTesting() {
+ registryOnce = sync.Once{}
+ registry = nil
+}
+
+// normalizeExtension extracts and normalizes the file extension.
+func normalizeExtension(filename string) string {
+ return strings.ToLower(filepath.Ext(filename))
+}
+
+// isSpecialFile checks if the filename matches special cases like .DS_Store.
+func isSpecialFile(filename string, extensions map[string]bool) bool {
+ if filepath.Ext(filename) == "" {
+ basename := strings.ToLower(filepath.Base(filename))
+ return extensions[basename]
+ }
+ return false
+}
diff --git a/fileproc/walker.go b/fileproc/walker.go
index f4bb776..58f9e64 100644
--- a/fileproc/walker.go
+++ b/fileproc/walker.go
@@ -4,10 +4,8 @@ package fileproc
import (
"os"
"path/filepath"
- "strings"
- "github.com/ivuorinen/gibidify/config"
- ignore "github.com/sabhiram/go-gitignore"
+ "github.com/ivuorinen/gibidify/utils"
)
// Walker defines an interface for scanning directories.
@@ -18,22 +16,25 @@ type Walker interface {
// ProdWalker implements Walker using a custom directory walker that
// respects .gitignore and .ignore files, configuration-defined ignore directories,
// and ignores binary and image files by default.
-type ProdWalker struct{}
+type ProdWalker struct {
+ filter *FileFilter
+}
-// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
-type ignoreRule struct {
- base string
- gi *ignore.GitIgnore
+// NewProdWalker creates a new production walker with current configuration.
+func NewProdWalker() *ProdWalker {
+ return &ProdWalker{
+ filter: NewFileFilter(),
+ }
}
// Walk scans the given root directory recursively and returns a slice of file paths
// that are not ignored based on .gitignore/.ignore files, the configuration, or the default binary/image filter.
-func (pw ProdWalker) Walk(root string) ([]string, error) {
- absRoot, err := filepath.Abs(root)
+func (w *ProdWalker) Walk(root string) ([]string, error) {
+ absRoot, err := utils.GetAbsolutePath(root)
if err != nil {
- return nil, err
+ return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to resolve root path").WithFilePath(root)
}
- return walkDir(absRoot, absRoot, []ignoreRule{})
+ return w.walkDir(absRoot, []ignoreRule{})
}
// walkDir recursively walks the directory tree starting at currentDir.
@@ -41,122 +42,34 @@ func (pw ProdWalker) Walk(root string) ([]string, error) {
// appends the corresponding rules to the inherited list. Each file/directory is
// then checked against the accumulated ignore rules, the configuration's list of ignored directories,
// and a default filter that ignores binary and image files.
-func walkDir(root string, currentDir string, parentRules []ignoreRule) ([]string, error) {
+func (w *ProdWalker) walkDir(currentDir string, parentRules []ignoreRule) ([]string, error) {
var results []string
entries, err := os.ReadDir(currentDir)
if err != nil {
- return nil, err
+ return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to read directory").WithFilePath(currentDir)
}
- // Start with the parent's ignore rules.
- rules := make([]ignoreRule, len(parentRules))
- copy(rules, parentRules)
-
- // Check for .gitignore and .ignore files in the current directory.
- for _, fileName := range []string{".gitignore", ".ignore"} {
- ignorePath := filepath.Join(currentDir, fileName)
- if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
- gi, err := ignore.CompileIgnoreFile(ignorePath)
- if err == nil {
- rules = append(rules, ignoreRule{
- base: currentDir,
- gi: gi,
- })
- }
- }
- }
-
- // Get the list of directories to ignore from configuration.
- ignoredDirs := config.GetIgnoredDirectories()
- sizeLimit := config.GetFileSizeLimit() // e.g., 5242880 for 5 MB
+ rules := loadIgnoreRules(currentDir, parentRules)
for _, entry := range entries {
fullPath := filepath.Join(currentDir, entry.Name())
- // For directories, check if its name is in the config ignore list.
- if entry.IsDir() {
- for _, d := range ignoredDirs {
- if entry.Name() == d {
- // Skip this directory entirely.
- goto SkipEntry
- }
- }
- } else {
- // Check if file exceeds the configured size limit.
- info, err := entry.Info()
- if err == nil && info.Size() > sizeLimit {
- goto SkipEntry
- }
-
- // For files, apply the default filter to ignore binary and image files.
- if isBinaryOrImage(fullPath) {
- goto SkipEntry
- }
+ if w.filter.shouldSkipEntry(entry, fullPath, rules) {
+ continue
}
- // Check accumulated ignore rules.
- for _, rule := range rules {
- // Compute the path relative to the base where the ignore rule was defined.
- rel, err := filepath.Rel(rule.base, fullPath)
- if err != nil {
- continue
- }
- // If the rule matches, skip this entry.
- if rule.gi.MatchesPath(rel) {
- goto SkipEntry
- }
- }
-
- // If not ignored, then process the entry.
+ // Process entry
if entry.IsDir() {
- subFiles, err := walkDir(root, fullPath, rules)
+ subFiles, err := w.walkDir(fullPath, rules)
if err != nil {
- return nil, err
+ return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingTraversal, "failed to traverse subdirectory").WithFilePath(fullPath)
}
results = append(results, subFiles...)
} else {
results = append(results, fullPath)
}
- SkipEntry:
- continue
}
return results, nil
}
-
-// isBinaryOrImage checks if a file should be considered binary or an image based on its extension.
-// The check is case-insensitive.
-func isBinaryOrImage(filePath string) bool {
- ext := strings.ToLower(filepath.Ext(filePath))
- // Common image file extensions.
- imageExtensions := map[string]bool{
- ".png": true,
- ".jpg": true,
- ".jpeg": true,
- ".gif": true,
- ".bmp": true,
- ".tiff": true,
- ".ico": true,
- ".svg": true,
- ".webp": true,
- }
- // Common binary file extensions.
- binaryExtensions := map[string]bool{
- ".exe": true,
- ".dll": true,
- ".so": true,
- ".bin": true,
- ".dat": true,
- ".zip": true,
- ".tar": true,
- ".gz": true,
- ".7z": true,
- ".rar": true,
- ".DS_Store": true,
- }
- if imageExtensions[ext] || binaryExtensions[ext] {
- return true
- }
- return false
-}
diff --git a/fileproc/walker_test.go b/fileproc/walker_test.go
index f684947..dfee038 100644
--- a/fileproc/walker_test.go
+++ b/fileproc/walker_test.go
@@ -1,64 +1,42 @@
package fileproc_test
import (
- "os"
"path/filepath"
"testing"
- "github.com/ivuorinen/gibidify/config"
- fileproc "github.com/ivuorinen/gibidify/fileproc"
"github.com/spf13/viper"
+
+ "github.com/ivuorinen/gibidify/fileproc"
+ "github.com/ivuorinen/gibidify/testutil"
)
func TestProdWalkerWithIgnore(t *testing.T) {
// Create a temporary directory structure.
- rootDir, err := os.MkdirTemp("", "walker_test_root")
- if err != nil {
- t.Fatalf("Failed to create temp root directory: %v", err)
- }
- defer func() {
- if err := os.RemoveAll(rootDir); err != nil {
- t.Fatalf("cleanup failed: %v", err)
- }
- }()
+ rootDir := t.TempDir()
- subDir := filepath.Join(rootDir, "vendor")
- if err := os.Mkdir(subDir, 0755); err != nil {
- t.Fatalf("Failed to create subDir: %v", err)
- }
+ subDir := testutil.CreateTestDirectory(t, rootDir, "vendor")
// Write sample files
- filePaths := []string{
- filepath.Join(rootDir, "file1.go"),
- filepath.Join(rootDir, "file2.txt"),
- filepath.Join(subDir, "file_in_vendor.txt"), // should be ignored
- }
- for _, fp := range filePaths {
- if err := os.WriteFile(fp, []byte("content"), 0644); err != nil {
- t.Fatalf("Failed to write file %s: %v", fp, err)
- }
- }
+ testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{
+ {Name: "file1.go", Content: "content"},
+ {Name: "file2.txt", Content: "content"},
+ })
+ testutil.CreateTestFile(t, subDir, "file_in_vendor.txt", []byte("content")) // should be ignored
// .gitignore that ignores *.txt and itself
gitignoreContent := `*.txt
.gitignore
`
- gitignorePath := filepath.Join(rootDir, ".gitignore")
- if err := os.WriteFile(gitignorePath, []byte(gitignoreContent), 0644); err != nil {
- t.Fatalf("Failed to write .gitignore: %v", err)
- }
+ testutil.CreateTestFile(t, rootDir, ".gitignore", []byte(gitignoreContent))
// Initialize config to ignore "vendor" directory
- viper.Reset()
- config.LoadConfig()
+ testutil.ResetViperConfig(t, "")
viper.Set("ignoreDirectories", []string{"vendor"})
// Run walker
- var w fileproc.Walker = fileproc.ProdWalker{}
+ w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir)
- if err != nil {
- t.Fatalf("Walk returned error: %v", err)
- }
+ testutil.MustSucceed(t, err, "walking directory")
// We expect only file1.go to appear
if len(found) != 1 {
@@ -70,38 +48,24 @@ func TestProdWalkerWithIgnore(t *testing.T) {
}
func TestProdWalkerBinaryCheck(t *testing.T) {
- rootDir, err := os.MkdirTemp("", "walker_test_bincheck")
- if err != nil {
- t.Fatalf("Failed to create temp root directory: %v", err)
- }
- defer func() {
- if err := os.RemoveAll(rootDir); err != nil {
- t.Fatalf("cleanup failed: %v", err)
- }
- }()
+ rootDir := t.TempDir()
- // Create a mock binary file
- binFile := filepath.Join(rootDir, "somefile.exe")
- if err := os.WriteFile(binFile, []byte("fake-binary-content"), 0644); err != nil {
- t.Fatalf("Failed to write file %s: %v", binFile, err)
- }
-
- // Create a normal file
- normalFile := filepath.Join(rootDir, "keep.go")
- if err := os.WriteFile(normalFile, []byte("package main"), 0644); err != nil {
- t.Fatalf("Failed to write file %s: %v", normalFile, err)
- }
+ // Create test files
+ testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{
+ {Name: "somefile.exe", Content: "fake-binary-content"},
+ {Name: "keep.go", Content: "package main"},
+ })
// Reset and load default config
- viper.Reset()
- config.LoadConfig()
+ testutil.ResetViperConfig(t, "")
+
+ // Reset FileTypeRegistry to ensure clean state
+ fileproc.ResetRegistryForTesting()
// Run walker
- var w fileproc.Walker = fileproc.ProdWalker{}
+ w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir)
- if err != nil {
- t.Fatalf("Walk returned error: %v", err)
- }
+ testutil.MustSucceed(t, err, "walking directory")
// Only "keep.go" should be returned
if len(found) != 1 {
@@ -113,34 +77,17 @@ func TestProdWalkerBinaryCheck(t *testing.T) {
}
func TestProdWalkerSizeLimit(t *testing.T) {
- rootDir, err := os.MkdirTemp("", "walker_test_sizelimit")
- if err != nil {
- t.Fatalf("Failed to create temp root directory: %v", err)
- }
- defer func() {
- if err := os.RemoveAll(rootDir); err != nil {
- t.Fatalf("cleanup failed: %v", err)
- }
- }()
+ rootDir := t.TempDir()
- // Create a file exceeding the size limit
- largeFilePath := filepath.Join(rootDir, "largefile.txt")
+ // Create test files
largeFileData := make([]byte, 6*1024*1024) // 6 MB
- if err := os.WriteFile(largeFilePath, largeFileData, 0644); err != nil {
- t.Fatalf("Failed to write large file: %v", err)
- }
-
- // Create a small file
- smallFilePath := filepath.Join(rootDir, "smallfile.go")
- if err := os.WriteFile(smallFilePath, []byte("package main"), 0644); err != nil {
- t.Fatalf("Failed to write small file: %v", err)
- }
+ testutil.CreateTestFile(t, rootDir, "largefile.txt", largeFileData)
+ testutil.CreateTestFile(t, rootDir, "smallfile.go", []byte("package main"))
// Reset and load default config, which sets size limit to 5 MB
- viper.Reset()
- config.LoadConfig()
+ testutil.ResetViperConfig(t, "")
- var w fileproc.Walker = fileproc.ProdWalker{}
+ w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir)
if err != nil {
t.Fatalf("Walk returned error: %v", err)
diff --git a/fileproc/writer.go b/fileproc/writer.go
index b530b25..8858b0e 100644
--- a/fileproc/writer.go
+++ b/fileproc/writer.go
@@ -1,101 +1,29 @@
// Package fileproc provides a writer for the output of the file processor.
-//
-// The StartWriter function writes the output in the specified format.
-// The formatMarkdown function formats the output in Markdown format.
-// The detectLanguage function tries to infer the code block language from the file extension.
-// The OutputData struct represents the full output structure.
-// The FileData struct represents a single file's path and content.
package fileproc
import (
- "encoding/json"
"fmt"
"os"
- "github.com/sirupsen/logrus"
- "gopkg.in/yaml.v3"
+ "github.com/ivuorinen/gibidify/utils"
)
-// FileData represents a single file's path and content.
-type FileData struct {
- Path string `json:"path" yaml:"path"`
- Content string `json:"content" yaml:"content"`
-}
-
-// OutputData represents the full output structure.
-type OutputData struct {
- Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
- Files []FileData `json:"files" yaml:"files"`
- Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
-}
-
-// StartWriter writes the output in the specified format.
-func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format string, prefix, suffix string) {
- var files []FileData
-
- // Read from channel until closed
- for req := range writeCh {
- files = append(files, FileData(req))
- }
-
- // Create output struct
- output := OutputData{Prefix: prefix, Files: files, Suffix: suffix}
-
- // Serialize based on format
- var outputData []byte
- var err error
-
+// StartWriter writes the output in the specified format with memory optimization.
+func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format, prefix, suffix string) {
switch format {
- case "json":
- outputData, err = json.MarshalIndent(output, "", " ")
- case "yaml":
- outputData, err = yaml.Marshal(output)
case "markdown":
- outputData = []byte(formatMarkdown(output))
+ startMarkdownWriter(outFile, writeCh, done, prefix, suffix)
+ case "json":
+ startJSONWriter(outFile, writeCh, done, prefix, suffix)
+ case "yaml":
+ startYAMLWriter(outFile, writeCh, done, prefix, suffix)
default:
- err = fmt.Errorf("unsupported format: %s", format)
- }
-
- if err != nil {
- logrus.Errorf("Error encoding output: %v", err)
+ err := utils.NewStructuredError(
+ utils.ErrorTypeValidation,
+ utils.CodeValidationFormat,
+ fmt.Sprintf("unsupported format: %s", format),
+ ).WithContext("format", format)
+ utils.LogError("Failed to encode output", err)
close(done)
- return
- }
-
- // Write to file
- if _, err := outFile.Write(outputData); err != nil {
- logrus.Errorf("Error writing to file: %v", err)
- }
-
- close(done)
-}
-
-func formatMarkdown(output OutputData) string {
- markdown := "# " + output.Prefix + "\n\n"
-
- for _, file := range output.Files {
- markdown += fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", file.Path, detectLanguage(file.Path), file.Content)
- }
-
- markdown += "# " + output.Suffix
- return markdown
-}
-
-// detectLanguage tries to infer code block language from file extension.
-func detectLanguage(filename string) string {
- if len(filename) < 3 {
- return ""
- }
- switch {
- case len(filename) >= 3 && filename[len(filename)-3:] == ".go":
- return "go"
- case len(filename) >= 3 && filename[len(filename)-3:] == ".py":
- return "python"
- case len(filename) >= 2 && filename[len(filename)-2:] == ".c":
- return "c"
- case len(filename) >= 3 && filename[len(filename)-3:] == ".js":
- return "javascript"
- default:
- return ""
}
}
diff --git a/fileproc/writer_test.go b/fileproc/writer_test.go
index 2c3eaa4..0320e23 100644
--- a/fileproc/writer_test.go
+++ b/fileproc/writer_test.go
@@ -7,8 +7,9 @@ import (
"sync"
"testing"
- fileproc "github.com/ivuorinen/gibidify/fileproc"
"gopkg.in/yaml.v3"
+
+ "github.com/ivuorinen/gibidify/fileproc"
)
func TestStartWriter_Formats(t *testing.T) {
@@ -18,107 +19,109 @@ func TestStartWriter_Formats(t *testing.T) {
format string
expectError bool
}{
- {
- name: "JSON format",
- format: "json",
- expectError: false,
- },
- {
- name: "YAML format",
- format: "yaml",
- expectError: false,
- },
- {
- name: "Markdown format",
- format: "markdown",
- expectError: false,
- },
- {
- name: "Invalid format",
- format: "invalid",
- expectError: true,
- },
+ {"JSON format", "json", false},
+ {"YAML format", "yaml", false},
+ {"Markdown format", "markdown", false},
+ {"Invalid format", "invalid", true},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
- outFile, err := os.CreateTemp("", "gibidify_test_output")
- if err != nil {
- t.Fatalf("Failed to create temp file: %v", err)
- }
- defer func() {
- if err := outFile.Close(); err != nil {
- t.Errorf("close temp file: %v", err)
- }
- if err := os.Remove(outFile.Name()); err != nil {
- t.Errorf("remove temp file: %v", err)
- }
- }()
-
- // Prepare channels
- writeCh := make(chan fileproc.WriteRequest, 2)
- doneCh := make(chan struct{})
-
- // Write a couple of sample requests
- writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"}
- writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"}
- close(writeCh)
-
- // Start the writer
- var wg sync.WaitGroup
- wg.Add(1)
- go func() {
- defer wg.Done()
- fileproc.StartWriter(outFile, writeCh, doneCh, tc.format, "PREFIX", "SUFFIX")
- }()
-
- // Wait until writer signals completion
- wg.Wait()
- <-doneCh // make sure all writes finished
-
- // Read output
- data, err := os.ReadFile(outFile.Name())
- if err != nil {
- t.Fatalf("Error reading output file: %v", err)
- }
-
+ data := runWriterTest(t, tc.format)
if tc.expectError {
- // For an invalid format, we expect StartWriter to log an error
- // and produce no content or minimal content. There's no official
- // error returned, so check if it's empty or obviously incorrect.
- if len(data) != 0 {
- t.Errorf("Expected no output for invalid format, got:\n%s", data)
- }
+ verifyErrorOutput(t, data)
} else {
- // Valid format: check basic properties in the output
- content := string(data)
- switch tc.format {
- case "json":
- // Quick parse check
- var outStruct fileproc.OutputData
- if err := json.Unmarshal(data, &outStruct); err != nil {
- t.Errorf("JSON unmarshal failed: %v", err)
- }
- case "yaml":
- var outStruct fileproc.OutputData
- if err := yaml.Unmarshal(data, &outStruct); err != nil {
- t.Errorf("YAML unmarshal failed: %v", err)
- }
- case "markdown":
- // Check presence of code fences or "## File: ..."
- if !strings.Contains(content, "```") {
- t.Error("Expected markdown code fences not found")
- }
- }
-
- // Prefix and suffix checks (common to JSON, YAML, markdown)
- if !strings.Contains(string(data), "PREFIX") {
- t.Errorf("Missing prefix in output: %s", data)
- }
- if !strings.Contains(string(data), "SUFFIX") {
- t.Errorf("Missing suffix in output: %s", data)
- }
+ verifyValidOutput(t, data, tc.format)
+ verifyPrefixSuffix(t, data)
}
})
}
}
+
+// runWriterTest executes the writer with the given format and returns the output data.
+func runWriterTest(t *testing.T, format string) []byte {
+ t.Helper()
+ outFile, err := os.CreateTemp(t.TempDir(), "gibidify_test_output")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer func() {
+ if closeErr := outFile.Close(); closeErr != nil {
+ t.Errorf("close temp file: %v", closeErr)
+ }
+ if removeErr := os.Remove(outFile.Name()); removeErr != nil {
+ t.Errorf("remove temp file: %v", removeErr)
+ }
+ }()
+
+ // Prepare channels
+ writeCh := make(chan fileproc.WriteRequest, 2)
+ doneCh := make(chan struct{})
+
+ // Write a couple of sample requests
+ writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"}
+ writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"}
+ close(writeCh)
+
+ // Start the writer
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ fileproc.StartWriter(outFile, writeCh, doneCh, format, "PREFIX", "SUFFIX")
+ }()
+
+ // Wait until writer signals completion
+ wg.Wait()
+ <-doneCh // make sure all writes finished
+
+ // Read output
+ data, err := os.ReadFile(outFile.Name())
+ if err != nil {
+ t.Fatalf("Error reading output file: %v", err)
+ }
+
+ return data
+}
+
+// verifyErrorOutput checks that error cases produce no output.
+func verifyErrorOutput(t *testing.T, data []byte) {
+ t.Helper()
+ if len(data) != 0 {
+ t.Errorf("Expected no output for invalid format, got:\n%s", data)
+ }
+}
+
+// verifyValidOutput checks format-specific output validity.
+func verifyValidOutput(t *testing.T, data []byte, format string) {
+ t.Helper()
+ content := string(data)
+ switch format {
+ case "json":
+ var outStruct fileproc.OutputData
+ if err := json.Unmarshal(data, &outStruct); err != nil {
+ t.Errorf("JSON unmarshal failed: %v", err)
+ }
+ case "yaml":
+ var outStruct fileproc.OutputData
+ if err := yaml.Unmarshal(data, &outStruct); err != nil {
+ t.Errorf("YAML unmarshal failed: %v", err)
+ }
+ case "markdown":
+ if !strings.Contains(content, "```") {
+ t.Error("Expected markdown code fences not found")
+ }
+ }
+}
+
+// verifyPrefixSuffix checks that output contains expected prefix and suffix.
+func verifyPrefixSuffix(t *testing.T, data []byte) {
+ t.Helper()
+ content := string(data)
+ if !strings.Contains(content, "PREFIX") {
+ t.Errorf("Missing prefix in output: %s", data)
+ }
+ if !strings.Contains(content, "SUFFIX") {
+ t.Errorf("Missing suffix in output: %s", data)
+ }
+}
diff --git a/fileproc/yaml_writer.go b/fileproc/yaml_writer.go
new file mode 100644
index 0000000..3ea60f2
--- /dev/null
+++ b/fileproc/yaml_writer.go
@@ -0,0 +1,148 @@
+package fileproc
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "os"
+ "strings"
+
+ "github.com/ivuorinen/gibidify/utils"
+)
+
+// YAMLWriter handles YAML format output with streaming support.
+type YAMLWriter struct {
+ outFile *os.File
+}
+
+// NewYAMLWriter creates a new YAML writer.
+func NewYAMLWriter(outFile *os.File) *YAMLWriter {
+ return &YAMLWriter{outFile: outFile}
+}
+
+// Start writes the YAML header.
+func (w *YAMLWriter) Start(prefix, suffix string) error {
+ // Write YAML header
+ if _, err := fmt.Fprintf(w.outFile, "prefix: %s\nsuffix: %s\nfiles:\n", yamlQuoteString(prefix), yamlQuoteString(suffix)); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML header")
+ }
+ return nil
+}
+
+// WriteFile writes a file entry in YAML format.
+func (w *YAMLWriter) WriteFile(req WriteRequest) error {
+ if req.IsStream {
+ return w.writeStreaming(req)
+ }
+ return w.writeInline(req)
+}
+
+// Close writes the YAML footer (no footer needed for YAML).
+func (w *YAMLWriter) Close() error {
+ return nil
+}
+
+// writeStreaming writes a large file as YAML in streaming chunks.
+func (w *YAMLWriter) writeStreaming(req WriteRequest) error {
+ defer w.closeReader(req.Reader, req.Path)
+
+ language := detectLanguage(req.Path)
+
+ // Write YAML file entry start
+ if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(req.Path), language); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML file start").WithFilePath(req.Path)
+ }
+
+ // Stream content with YAML indentation
+ return w.streamYAMLContent(req.Reader, req.Path)
+}
+
+// writeInline writes a small file directly as YAML.
+func (w *YAMLWriter) writeInline(req WriteRequest) error {
+ language := detectLanguage(req.Path)
+ fileData := FileData{
+ Path: req.Path,
+ Content: req.Content,
+ Language: language,
+ }
+
+ // Write YAML entry
+ if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(fileData.Path), fileData.Language); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML entry start").WithFilePath(req.Path)
+ }
+
+ // Write indented content
+ lines := strings.Split(fileData.Content, "\n")
+ for _, line := range lines {
+ if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML content line").WithFilePath(req.Path)
+ }
+ }
+
+ return nil
+}
+
+// streamYAMLContent streams content with YAML indentation.
+func (w *YAMLWriter) streamYAMLContent(reader io.Reader, path string) error {
+ scanner := bufio.NewScanner(reader)
+ for scanner.Scan() {
+ line := scanner.Text()
+ if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML line").WithFilePath(path)
+ }
+ }
+
+ if err := scanner.Err(); err != nil {
+ return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to scan YAML content").WithFilePath(path)
+ }
+ return nil
+}
+
+// closeReader safely closes a reader if it implements io.Closer.
+func (w *YAMLWriter) closeReader(reader io.Reader, path string) {
+ if closer, ok := reader.(io.Closer); ok {
+ if err := closer.Close(); err != nil {
+ utils.LogError(
+ "Failed to close file reader",
+ utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
+ )
+ }
+ }
+}
+
+// yamlQuoteString quotes a string for YAML output if needed.
+func yamlQuoteString(s string) string {
+ if s == "" {
+ return `""`
+ }
+ // Simple YAML quoting - use double quotes if string contains special characters
+ if strings.ContainsAny(s, "\n\r\t:\"'\\") {
+ return fmt.Sprintf(`"%s"`, strings.ReplaceAll(s, `"`, `\"`))
+ }
+ return s
+}
+
+// startYAMLWriter handles YAML format output with streaming support.
+func startYAMLWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
+ defer close(done)
+
+ writer := NewYAMLWriter(outFile)
+
+ // Start writing
+ if err := writer.Start(prefix, suffix); err != nil {
+ utils.LogError("Failed to write YAML header", err)
+ return
+ }
+
+ // Process files
+ for req := range writeCh {
+ if err := writer.WriteFile(req); err != nil {
+ utils.LogError("Failed to write YAML file", err)
+ }
+ }
+
+ // Close writer
+ if err := writer.Close(); err != nil {
+ utils.LogError("Failed to write YAML end", err)
+ }
+}
diff --git a/go.mod b/go.mod
index dec5501..d83419a 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,9 @@ module github.com/ivuorinen/gibidify
go 1.24.1
require (
+ github.com/fatih/color v1.18.0
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
+ github.com/schollz/progressbar/v3 v3.18.0
github.com/sirupsen/logrus v1.9.3
github.com/spf13/viper v1.20.0
gopkg.in/yaml.v3 v3.0.1
@@ -12,7 +14,11 @@ require (
require (
github.com/fsnotify/fsnotify v1.8.0 // indirect
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
+ github.com/mattn/go-colorable v0.1.13 // indirect
+ github.com/mattn/go-isatty v0.0.20 // indirect
+ github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
+ github.com/rivo/uniseg v0.4.7 // indirect
github.com/sagikazarmark/locafero v0.8.0 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/spf13/afero v1.14.0 // indirect
@@ -21,5 +27,6 @@ require (
github.com/subosito/gotenv v1.6.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/sys v0.31.0 // indirect
+ golang.org/x/term v0.28.0 // indirect
golang.org/x/text v0.23.0 // indirect
)
diff --git a/go.sum b/go.sum
index 7ce23c5..e8a1c14 100644
--- a/go.sum
+++ b/go.sum
@@ -1,6 +1,10 @@
+github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM=
+github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
+github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
@@ -13,16 +17,29 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
+github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
+github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
+github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI=
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs=
github.com/sagikazarmark/locafero v0.8.0 h1:mXaMVw7IqxNBxfv3LdWt9MDmcWDQ1fagDH918lOdVaQ=
github.com/sagikazarmark/locafero v0.8.0/go.mod h1:UBUyz37V+EdMS3hDF3QWIiVr/2dPrx49OMO0Bn0hJqk=
+github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA=
+github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
@@ -45,8 +62,12 @@ github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSW
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg=
+golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek=
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
diff --git a/main.go b/main.go
index 6510a7c..c768396 100644
--- a/main.go
+++ b/main.go
@@ -4,152 +4,46 @@ package main
import (
"context"
- "flag"
- "fmt"
"os"
- "path/filepath"
- "runtime"
- "sync"
- "github.com/ivuorinen/gibidify/config"
- "github.com/ivuorinen/gibidify/fileproc"
"github.com/sirupsen/logrus"
-)
-var (
- sourceDir string
- destination string
- prefix string
- suffix string
- concurrency int
- format string
+ "github.com/ivuorinen/gibidify/cli"
+ "github.com/ivuorinen/gibidify/config"
)
-func init() {
- flag.StringVar(&sourceDir, "source", "", "Source directory to scan recursively")
- flag.StringVar(&destination, "destination", "", "Output file to write aggregated code")
- flag.StringVar(&prefix, "prefix", "", "Text to add at the beginning of the output file")
- flag.StringVar(&suffix, "suffix", "", "Text to add at the end of the output file")
- flag.StringVar(&format, "format", "markdown", "Output format (json, markdown, yaml)")
- flag.IntVar(&concurrency, "concurrency", runtime.NumCPU(), "Number of concurrent workers (default: number of CPU cores)")
-}
-
func main() {
+ // Initialize UI for error handling
+ ui := cli.NewUIManager()
+ errorFormatter := cli.NewErrorFormatter(ui)
+
// In production, use a background context.
if err := run(context.Background()); err != nil {
- fmt.Println("Error:", err)
- os.Exit(1)
+ // Handle errors with better formatting and suggestions
+ if cli.IsUserError(err) {
+ errorFormatter.FormatError(err)
+ os.Exit(1)
+ } else {
+ // System errors still go to logrus for debugging
+ logrus.Errorf("System error: %v", err)
+ ui.PrintError("An unexpected error occurred. Please check the logs.")
+ os.Exit(2)
+ }
}
}
// Run executes the main logic of the CLI application using the provided context.
func run(ctx context.Context) error {
- flag.Parse()
-
- if err := validateFlags(); err != nil {
- return err
- }
-
- if err := setDestination(); err != nil {
+ // Parse CLI flags
+ flags, err := cli.ParseFlags()
+ if err != nil {
return err
}
+ // Load configuration
config.LoadConfig()
- logrus.Infof(
- "Starting gibidify. Format: %s, Source: %s, Destination: %s, Workers: %d",
- format,
- sourceDir,
- destination,
- concurrency,
- )
-
- files, err := fileproc.CollectFiles(sourceDir)
- if err != nil {
- return fmt.Errorf("error collecting files: %w", err)
- }
- logrus.Infof("Found %d files to process", len(files))
-
- outFile, err := os.Create(destination)
- if err != nil {
- return fmt.Errorf("failed to create output file %s: %w", destination, err)
- }
- defer func(outFile *os.File) {
- if err := outFile.Close(); err != nil {
- logrus.Errorf("Error closing output file: %v", err)
- }
- }(outFile)
-
- fileCh := make(chan string)
- writeCh := make(chan fileproc.WriteRequest)
- writerDone := make(chan struct{})
-
- go fileproc.StartWriter(outFile, writeCh, writerDone, format, prefix, suffix)
-
- var wg sync.WaitGroup
-
- startWorkers(ctx, &wg, fileCh, writeCh)
-
- for _, fp := range files {
- select {
- case <-ctx.Done():
- close(fileCh)
- return ctx.Err()
- case fileCh <- fp:
- }
- }
- close(fileCh)
-
- wg.Wait()
- close(writeCh)
- <-writerDone
-
- logrus.Infof("Processing completed. Output saved to %s", destination)
- return nil
-}
-func validateFlags() error {
- if sourceDir == "" {
- return fmt.Errorf("usage: gibidify -source [--destination ] [--format=json|yaml|markdown] ")
- }
- return nil
-}
-
-func setDestination() error {
- if destination == "" {
- absRoot, err := filepath.Abs(sourceDir)
- if err != nil {
- return fmt.Errorf("failed to get absolute path for %s: %w", sourceDir, err)
- }
- baseName := filepath.Base(absRoot)
- if baseName == "." || baseName == "" {
- baseName = "output"
- }
- destination = baseName + "." + format
- }
- return nil
-}
-
-func startWorkers(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) {
- for i := 0; i < concurrency; i++ {
- wg.Add(1)
- go func() {
- defer wg.Done()
- for {
- select {
- case <-ctx.Done():
- return
- case filePath, ok := <-fileCh:
- if !ok {
- return
- }
- absRoot, err := filepath.Abs(sourceDir)
- if err != nil {
- logrus.Errorf("Failed to get absolute path for %s: %v", sourceDir, err)
- return
- }
- fileproc.ProcessFile(filePath, writeCh, absRoot)
- }
- }
- }()
- }
+ // Create and run processor
+ processor := cli.NewProcessor(flags)
+ return processor.Process(ctx)
}
diff --git a/main_test.go b/main_test.go
index a7372cf..ab89aac 100644
--- a/main_test.go
+++ b/main_test.go
@@ -4,139 +4,103 @@ import (
"context"
"fmt"
"os"
- "path/filepath"
- "strings"
"testing"
"time"
+
+ "github.com/ivuorinen/gibidify/testutil"
+)
+
+const (
+ testFileCount = 1000
)
// TestIntegrationFullCLI simulates a full run of the CLI application using adaptive concurrency.
func TestIntegrationFullCLI(t *testing.T) {
- // Create a temporary source directory and populate it with test files.
- srcDir, err := os.MkdirTemp("", "gibidify_src")
- if err != nil {
- t.Fatalf("Failed to create temp source directory: %v", err)
- }
- defer func() {
- if err := os.RemoveAll(srcDir); err != nil {
- t.Fatalf("cleanup failed: %v", err)
- }
- }()
-
- // Create two test files.
- file1 := filepath.Join(srcDir, "file1.txt")
- if err := os.WriteFile(file1, []byte("Hello World"), 0644); err != nil {
- t.Fatalf("Failed to write file1: %v", err)
- }
- file2 := filepath.Join(srcDir, "file2.go")
- if err := os.WriteFile(file2, []byte("package main\nfunc main() {}"), 0644); err != nil {
- t.Fatalf("Failed to write file2: %v", err)
- }
-
- // Create a temporary output file.
- outFile, err := os.CreateTemp("", "gibidify_output.txt")
- if err != nil {
- t.Fatalf("Failed to create temp output file: %v", err)
- }
- outFilePath := outFile.Name()
- if err := outFile.Close(); err != nil {
- t.Fatalf("close temp file: %v", err)
- }
- defer func() {
- if err := os.Remove(outFilePath); err != nil {
- t.Fatalf("cleanup output file: %v", err)
- }
- }()
-
- // Set up CLI arguments.
- os.Args = []string{
- "gibidify",
- "-source", srcDir,
- "-destination", outFilePath,
- "-prefix", "PREFIX",
- "-suffix", "SUFFIX",
- "-concurrency", "2", // For testing, set concurrency to 2.
- }
+ srcDir := setupTestFiles(t)
+ outFilePath := setupOutputFile(t)
+ setupCLIArgs(srcDir, outFilePath)
// Run the application with a background context.
- ctx := context.Background()
- if err := run(ctx); err != nil {
- t.Fatalf("Run failed: %v", err)
+ ctx := t.Context()
+ if runErr := run(ctx); runErr != nil {
+ t.Fatalf("Run failed: %v", runErr)
}
- // Verify the output file contains the expected prefix, file contents, and suffix.
+ verifyOutput(t, outFilePath)
+}
+
+// setupTestFiles creates test files and returns the source directory.
+func setupTestFiles(t *testing.T) string {
+ t.Helper()
+ srcDir := t.TempDir()
+
+ // Create two test files.
+ testutil.CreateTestFiles(t, srcDir, []testutil.FileSpec{
+ {Name: "file1.txt", Content: "Hello World"},
+ {Name: "file2.go", Content: "package main\nfunc main() {}"},
+ })
+
+ return srcDir
+}
+
+// setupOutputFile creates a temporary output file and returns its path.
+func setupOutputFile(t *testing.T) string {
+ t.Helper()
+ outFile, outFilePath := testutil.CreateTempOutputFile(t, "gibidify_output.txt")
+ testutil.CloseFile(t, outFile)
+
+ return outFilePath
+}
+
+// setupCLIArgs configures the CLI arguments for testing.
+func setupCLIArgs(srcDir, outFilePath string) {
+ testutil.SetupCLIArgs(srcDir, outFilePath, "PREFIX", "SUFFIX", 2)
+}
+
+// verifyOutput checks that the output file contains expected content.
+func verifyOutput(t *testing.T, outFilePath string) {
+ t.Helper()
data, err := os.ReadFile(outFilePath)
if err != nil {
t.Fatalf("Failed to read output file: %v", err)
}
output := string(data)
- if !strings.Contains(output, "PREFIX") {
- t.Error("Output missing prefix")
- }
- if !strings.Contains(output, "Hello World") {
- t.Error("Output missing content from file1.txt")
- }
- if !strings.Contains(output, "SUFFIX") {
- t.Error("Output missing suffix")
- }
+ testutil.VerifyContentContains(t, output, []string{"PREFIX", "Hello World", "SUFFIX"})
}
// TestIntegrationCancellation verifies that the application correctly cancels processing when the context times out.
func TestIntegrationCancellation(t *testing.T) {
// Create a temporary source directory with many files to simulate a long-running process.
- srcDir, err := os.MkdirTemp("", "gibidify_src_long")
- if err != nil {
- t.Fatalf("Failed to create temp source directory: %v", err)
- }
- defer func() {
- if err := os.RemoveAll(srcDir); err != nil {
- t.Fatalf("cleanup failed: %v", err)
- }
- }()
+ srcDir := t.TempDir()
// Create a large number of small files.
- for i := 0; i < 1000; i++ {
- filePath := filepath.Join(srcDir, fmt.Sprintf("file%d.txt", i))
- if err := os.WriteFile(filePath, []byte("Content"), 0644); err != nil {
- t.Fatalf("Failed to write %s: %v", filePath, err)
- }
+ for i := range testFileCount {
+ fileName := fmt.Sprintf("file%d.txt", i)
+ testutil.CreateTestFile(t, srcDir, fileName, []byte("Content"))
}
// Create a temporary output file.
- outFile, err := os.CreateTemp("", "gibidify_output.txt")
- if err != nil {
- t.Fatalf("Failed to create temp output file: %v", err)
- }
- outFilePath := outFile.Name()
- if err := outFile.Close(); err != nil {
- t.Fatalf("close temp file: %v", err)
- }
+ outFile, outFilePath := testutil.CreateTempOutputFile(t, "gibidify_output.txt")
+ testutil.CloseFile(t, outFile)
defer func() {
- if err := os.Remove(outFilePath); err != nil {
- t.Fatalf("cleanup output file: %v", err)
+ if removeErr := os.Remove(outFilePath); removeErr != nil {
+ t.Fatalf("cleanup output file: %v", removeErr)
}
}()
// Set up CLI arguments.
- os.Args = []string{
- "gibidify",
- "-source", srcDir,
- "-destination", outFilePath,
- "-prefix", "PREFIX",
- "-suffix", "SUFFIX",
- "-concurrency", "2",
- }
+ testutil.SetupCLIArgs(srcDir, outFilePath, "PREFIX", "SUFFIX", 2)
// Create a context with a very short timeout to force cancellation.
ctx, cancel := context.WithTimeout(
- context.Background(),
- 10*time.Millisecond,
+ t.Context(),
+ 1*time.Millisecond,
)
defer cancel()
// Run the application; we expect an error due to cancellation.
- err = run(ctx)
- if err == nil {
+ runErr := run(ctx)
+ if runErr == nil {
t.Error("Expected Run to fail due to cancellation, but it succeeded")
}
}
diff --git a/testutil/testutil.go b/testutil/testutil.go
new file mode 100644
index 0000000..ecc43c4
--- /dev/null
+++ b/testutil/testutil.go
@@ -0,0 +1,117 @@
+// Package testutil provides common testing utilities and helper functions.
+package testutil
+
+import (
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "testing"
+
+ "github.com/spf13/viper"
+
+ "github.com/ivuorinen/gibidify/config"
+)
+
+const (
+ // FilePermission is the default file permission for test files.
+ FilePermission = 0o644
+ // DirPermission is the default directory permission for test directories.
+ DirPermission = 0o755
+)
+
+// CreateTestFile creates a test file with the given content and returns its path.
+func CreateTestFile(t *testing.T, dir, filename string, content []byte) string {
+ t.Helper()
+ filePath := filepath.Join(dir, filename)
+ if err := os.WriteFile(filePath, content, FilePermission); err != nil {
+ t.Fatalf("Failed to write file %s: %v", filePath, err)
+ }
+ return filePath
+}
+
+// CreateTempOutputFile creates a temporary output file and returns the file handle and path.
+func CreateTempOutputFile(t *testing.T, pattern string) (file *os.File, path string) {
+ t.Helper()
+ outFile, err := os.CreateTemp(t.TempDir(), pattern)
+ if err != nil {
+ t.Fatalf("Failed to create temp output file: %v", err)
+ }
+ path = outFile.Name()
+ return outFile, path
+}
+
+// CreateTestDirectory creates a test directory and returns its path.
+func CreateTestDirectory(t *testing.T, parent, name string) string {
+ t.Helper()
+ dirPath := filepath.Join(parent, name)
+ if err := os.Mkdir(dirPath, DirPermission); err != nil {
+ t.Fatalf("Failed to create directory %s: %v", dirPath, err)
+ }
+ return dirPath
+}
+
+// FileSpec represents a file specification for creating test files.
+type FileSpec struct {
+ Name string
+ Content string
+}
+
+// CreateTestFiles creates multiple test files from specifications.
+func CreateTestFiles(t *testing.T, rootDir string, fileSpecs []FileSpec) []string {
+ t.Helper()
+ createdFiles := make([]string, 0, len(fileSpecs))
+ for _, spec := range fileSpecs {
+ filePath := CreateTestFile(t, rootDir, spec.Name, []byte(spec.Content))
+ createdFiles = append(createdFiles, filePath)
+ }
+ return createdFiles
+}
+
+// ResetViperConfig resets Viper configuration and optionally sets a config path.
+func ResetViperConfig(t *testing.T, configPath string) {
+ t.Helper()
+ viper.Reset()
+ if configPath != "" {
+ viper.AddConfigPath(configPath)
+ }
+ config.LoadConfig()
+}
+
+// SetupCLIArgs configures os.Args for CLI testing.
+func SetupCLIArgs(srcDir, outFilePath, prefix, suffix string, concurrency int) {
+ os.Args = []string{
+ "gibidify",
+ "-source", srcDir,
+ "-destination", outFilePath,
+ "-prefix", prefix,
+ "-suffix", suffix,
+ "-concurrency", strconv.Itoa(concurrency),
+ }
+}
+
+// VerifyContentContains checks that content contains all expected substrings.
+func VerifyContentContains(t *testing.T, content string, expectedSubstrings []string) {
+ t.Helper()
+ for _, expected := range expectedSubstrings {
+ if !strings.Contains(content, expected) {
+ t.Errorf("Content missing expected substring: %s", expected)
+ }
+ }
+}
+
+// MustSucceed fails the test if the error is not nil.
+func MustSucceed(t *testing.T, err error, operation string) {
+ t.Helper()
+ if err != nil {
+ t.Fatalf("Operation %s failed: %v", operation, err)
+ }
+}
+
+// CloseFile closes a file and reports errors to the test.
+func CloseFile(t *testing.T, file *os.File) {
+ t.Helper()
+ if err := file.Close(); err != nil {
+ t.Errorf("Failed to close file: %v", err)
+ }
+}
diff --git a/testutil/testutil_test.go b/testutil/testutil_test.go
new file mode 100644
index 0000000..b2eaf30
--- /dev/null
+++ b/testutil/testutil_test.go
@@ -0,0 +1,591 @@
+package testutil
+
+import (
+ "errors"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/spf13/viper"
+)
+
+func TestCreateTestFile(t *testing.T) {
+ tests := []struct {
+ name string
+ dir string
+ filename string
+ content []byte
+ wantErr bool
+ }{
+ {
+ name: "create simple test file",
+ filename: "test.txt",
+ content: []byte("hello world"),
+ wantErr: false,
+ },
+ {
+ name: "create file with empty content",
+ filename: "empty.txt",
+ content: []byte{},
+ wantErr: false,
+ },
+ {
+ name: "create file with binary content",
+ filename: "binary.bin",
+ content: []byte{0x00, 0xFF, 0x42},
+ wantErr: false,
+ },
+ {
+ name: "create file with subdirectory",
+ filename: "subdir/test.txt",
+ content: []byte("nested file"),
+ wantErr: false,
+ },
+ {
+ name: "create file with special characters",
+ filename: "special-file_123.go",
+ content: []byte("package main"),
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Use a temporary directory for each test
+ tempDir := t.TempDir()
+ if tt.dir == "" {
+ tt.dir = tempDir
+ }
+
+ // Create subdirectory if needed
+ if strings.Contains(tt.filename, "/") {
+ subdir := filepath.Join(tt.dir, filepath.Dir(tt.filename))
+ if err := os.MkdirAll(subdir, DirPermission); err != nil {
+ t.Fatalf("Failed to create subdirectory: %v", err)
+ }
+ }
+
+ // Test CreateTestFile
+ filePath := CreateTestFile(t, tt.dir, tt.filename, tt.content)
+
+ // Verify file exists
+ info, err := os.Stat(filePath)
+ if err != nil {
+ t.Fatalf("Created file does not exist: %v", err)
+ }
+
+ // Verify it's a regular file
+ if !info.Mode().IsRegular() {
+ t.Errorf("Created path is not a regular file")
+ }
+
+ // Verify permissions
+ if info.Mode().Perm() != FilePermission {
+ t.Errorf("File permissions = %v, want %v", info.Mode().Perm(), FilePermission)
+ }
+
+ // Verify content
+ readContent, err := os.ReadFile(filePath)
+ if err != nil {
+ t.Fatalf("Failed to read created file: %v", err)
+ }
+ if string(readContent) != string(tt.content) {
+ t.Errorf("File content = %q, want %q", readContent, tt.content)
+ }
+ })
+ }
+}
+
+func TestCreateTempOutputFile(t *testing.T) {
+ tests := []struct {
+ name string
+ pattern string
+ }{
+ {
+ name: "simple pattern",
+ pattern: "output-*.txt",
+ },
+ {
+ name: "pattern with prefix only",
+ pattern: "test-",
+ },
+ {
+ name: "pattern with suffix only",
+ pattern: "*.json",
+ },
+ {
+ name: "empty pattern",
+ pattern: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ file, path := CreateTempOutputFile(t, tt.pattern)
+ defer CloseFile(t, file)
+
+ // Verify file exists
+ info, err := os.Stat(path)
+ if err != nil {
+ t.Fatalf("Temp file does not exist: %v", err)
+ }
+
+ // Verify it's a regular file
+ if !info.Mode().IsRegular() {
+ t.Errorf("Created path is not a regular file")
+ }
+
+ // Verify we can write to it
+ testContent := []byte("test content")
+ if _, err := file.Write(testContent); err != nil {
+ t.Errorf("Failed to write to temp file: %v", err)
+ }
+
+ // Verify the path is in a temp directory (any temp directory)
+ if !strings.Contains(path, os.TempDir()) {
+ t.Errorf("Temp file not in temp directory: %s", path)
+ }
+ })
+ }
+}
+
+func TestCreateTestDirectory(t *testing.T) {
+ tests := []struct {
+ name string
+ parent string
+ dir string
+ }{
+ {
+ name: "simple directory",
+ dir: "testdir",
+ },
+ {
+ name: "directory with special characters",
+ dir: "test-dir_123",
+ },
+ {
+ name: "nested directory name",
+ dir: "nested/dir",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tempDir := t.TempDir()
+ if tt.parent == "" {
+ tt.parent = tempDir
+ }
+
+ // For nested directories, create parent first
+ if strings.Contains(tt.dir, "/") {
+ parentPath := filepath.Join(tt.parent, filepath.Dir(tt.dir))
+ if err := os.MkdirAll(parentPath, DirPermission); err != nil {
+ t.Fatalf("Failed to create parent directory: %v", err)
+ }
+ tt.dir = filepath.Base(tt.dir)
+ tt.parent = parentPath
+ }
+
+ dirPath := CreateTestDirectory(t, tt.parent, tt.dir)
+
+ // Verify directory exists
+ info, err := os.Stat(dirPath)
+ if err != nil {
+ t.Fatalf("Created directory does not exist: %v", err)
+ }
+
+ // Verify it's a directory
+ if !info.IsDir() {
+ t.Errorf("Created path is not a directory")
+ }
+
+ // Verify permissions
+ if info.Mode().Perm() != DirPermission {
+ t.Errorf("Directory permissions = %v, want %v", info.Mode().Perm(), DirPermission)
+ }
+
+ // Verify we can create files in it
+ testFile := filepath.Join(dirPath, "test.txt")
+ if err := os.WriteFile(testFile, []byte("test"), FilePermission); err != nil {
+ t.Errorf("Cannot create file in directory: %v", err)
+ }
+ })
+ }
+}
+
+func TestCreateTestFiles(t *testing.T) {
+ tests := []struct {
+ name string
+ fileSpecs []FileSpec
+ wantCount int
+ }{
+ {
+ name: "create multiple files",
+ fileSpecs: []FileSpec{
+ {Name: "file1.txt", Content: "content1"},
+ {Name: "file2.go", Content: "package main"},
+ {Name: "file3.json", Content: `{"key": "value"}`},
+ },
+ wantCount: 3,
+ },
+ {
+ name: "create files with subdirectories",
+ fileSpecs: []FileSpec{
+ {Name: "src/main.go", Content: "package main"},
+ {Name: "test/test.go", Content: "package test"},
+ },
+ wantCount: 2,
+ },
+ {
+ name: "empty file specs",
+ fileSpecs: []FileSpec{},
+ wantCount: 0,
+ },
+ {
+ name: "files with empty content",
+ fileSpecs: []FileSpec{
+ {Name: "empty1.txt", Content: ""},
+ {Name: "empty2.txt", Content: ""},
+ },
+ wantCount: 2,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ rootDir := t.TempDir()
+
+ // Create necessary subdirectories
+ for _, spec := range tt.fileSpecs {
+ if strings.Contains(spec.Name, "/") {
+ subdir := filepath.Join(rootDir, filepath.Dir(spec.Name))
+ if err := os.MkdirAll(subdir, DirPermission); err != nil {
+ t.Fatalf("Failed to create subdirectory: %v", err)
+ }
+ }
+ }
+
+ createdFiles := CreateTestFiles(t, rootDir, tt.fileSpecs)
+
+ // Verify count
+ if len(createdFiles) != tt.wantCount {
+ t.Errorf("Created %d files, want %d", len(createdFiles), tt.wantCount)
+ }
+
+ // Verify each file
+ for i, filePath := range createdFiles {
+ content, err := os.ReadFile(filePath)
+ if err != nil {
+ t.Errorf("Failed to read file %s: %v", filePath, err)
+ continue
+ }
+ if string(content) != tt.fileSpecs[i].Content {
+ t.Errorf("File %s content = %q, want %q", filePath, content, tt.fileSpecs[i].Content)
+ }
+ }
+ })
+ }
+}
+
+func TestResetViperConfig(t *testing.T) {
+ tests := []struct {
+ name string
+ configPath string
+ preSetup func()
+ verify func(t *testing.T)
+ }{
+ {
+ name: "reset with empty config path",
+ configPath: "",
+ preSetup: func() {
+ viper.Set("test.key", "value")
+ },
+ verify: func(t *testing.T) {
+ if viper.IsSet("test.key") {
+ t.Error("Viper config not reset properly")
+ }
+ },
+ },
+ {
+ name: "reset with config path",
+ configPath: t.TempDir(),
+ preSetup: func() {
+ viper.Set("test.key", "value")
+ },
+ verify: func(t *testing.T) {
+ if viper.IsSet("test.key") {
+ t.Error("Viper config not reset properly")
+ }
+ // Verify config path was added
+ paths := viper.ConfigFileUsed()
+ if paths == "" {
+ // This is expected as no config file exists
+ return
+ }
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tt.preSetup()
+ ResetViperConfig(t, tt.configPath)
+ tt.verify(t)
+ })
+ }
+}
+
+func TestSetupCLIArgs(t *testing.T) {
+ // Save original args
+ originalArgs := os.Args
+ defer func() {
+ os.Args = originalArgs
+ }()
+
+ tests := []struct {
+ name string
+ srcDir string
+ outFile string
+ prefix string
+ suffix string
+ concurrency int
+ wantLen int
+ }{
+ {
+ name: "basic CLI args",
+ srcDir: "/src",
+ outFile: "/out.txt",
+ prefix: "PREFIX",
+ suffix: "SUFFIX",
+ concurrency: 4,
+ wantLen: 11,
+ },
+ {
+ name: "empty strings",
+ srcDir: "",
+ outFile: "",
+ prefix: "",
+ suffix: "",
+ concurrency: 1,
+ wantLen: 11,
+ },
+ {
+ name: "special characters in args",
+ srcDir: "/path with spaces/src",
+ outFile: "/path/to/output file.txt",
+ prefix: "Prefix with\nnewline",
+ suffix: "Suffix with\ttab",
+ concurrency: 8,
+ wantLen: 11,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ SetupCLIArgs(tt.srcDir, tt.outFile, tt.prefix, tt.suffix, tt.concurrency)
+
+ if len(os.Args) != tt.wantLen {
+ t.Errorf("os.Args length = %d, want %d", len(os.Args), tt.wantLen)
+ }
+
+ // Verify specific args
+ if os.Args[0] != "gibidify" {
+ t.Errorf("Program name = %s, want gibidify", os.Args[0])
+ }
+ if os.Args[2] != tt.srcDir {
+ t.Errorf("Source dir = %s, want %s", os.Args[2], tt.srcDir)
+ }
+ if os.Args[4] != tt.outFile {
+ t.Errorf("Output file = %s, want %s", os.Args[4], tt.outFile)
+ }
+ if os.Args[6] != tt.prefix {
+ t.Errorf("Prefix = %s, want %s", os.Args[6], tt.prefix)
+ }
+ if os.Args[8] != tt.suffix {
+ t.Errorf("Suffix = %s, want %s", os.Args[8], tt.suffix)
+ }
+ if os.Args[10] != string(rune(tt.concurrency+'0')) {
+ t.Errorf("Concurrency = %s, want %d", os.Args[10], tt.concurrency)
+ }
+ })
+ }
+}
+
+func TestVerifyContentContains(t *testing.T) {
+ // Test successful verification
+ t.Run("all substrings present", func(t *testing.T) {
+ content := "This is a test file with multiple lines"
+ VerifyContentContains(t, content, []string{"test file", "multiple lines"})
+ // If we get here, the test passed
+ })
+
+ // Test empty expected substrings
+ t.Run("empty expected substrings", func(t *testing.T) {
+ content := "Any content"
+ VerifyContentContains(t, content, []string{})
+ // Should pass with no expected strings
+ })
+
+ // For failure cases, we'll test indirectly by verifying behavior
+ t.Run("verify error reporting", func(t *testing.T) {
+ // We can't easily test the failure case directly since it calls t.Errorf
+ // But we can at least verify the function doesn't panic
+ defer func() {
+ if r := recover(); r != nil {
+ t.Errorf("VerifyContentContains panicked: %v", r)
+ }
+ }()
+
+ // This would normally fail but we're just checking it doesn't panic
+ content := "test"
+ expected := []string{"not found"}
+ // Create a sub-test that we expect to fail
+ t.Run("expected_failure", func(t *testing.T) {
+ t.Skip("Skipping actual failure test")
+ VerifyContentContains(t, content, expected)
+ })
+ })
+}
+
+func TestMustSucceed(t *testing.T) {
+ // Test with nil error (should succeed)
+ t.Run("nil error", func(t *testing.T) {
+ MustSucceed(t, nil, "successful operation")
+ // If we get here, the test passed
+ })
+
+ // Test error behavior without causing test failure
+ t.Run("verify error handling", func(t *testing.T) {
+ // We can't test the failure case directly since it calls t.Fatalf
+ // But we can verify the function exists and is callable
+ defer func() {
+ if r := recover(); r != nil {
+ t.Errorf("MustSucceed panicked: %v", r)
+ }
+ }()
+
+ // Create a sub-test that we expect to fail
+ t.Run("expected_failure", func(t *testing.T) {
+ t.Skip("Skipping actual failure test")
+ MustSucceed(t, errors.New("test error"), "failed operation")
+ })
+ })
+}
+
+func TestCloseFile(t *testing.T) {
+ // Test closing a normal file
+ t.Run("close normal file", func(t *testing.T) {
+ file, err := os.CreateTemp(t.TempDir(), "test")
+ if err != nil {
+ t.Fatalf("Failed to create test file: %v", err)
+ }
+
+ CloseFile(t, file)
+
+ // Verify file is closed by trying to write to it
+ _, writeErr := file.Write([]byte("test"))
+ if writeErr == nil {
+ t.Error("Expected write to fail on closed file")
+ }
+ })
+
+ // Test that CloseFile doesn't panic on already closed files
+ // Note: We can't easily test the error case without causing test failure
+ // since CloseFile calls t.Errorf, which is the expected behavior
+ t.Run("verify CloseFile function exists and is callable", func(t *testing.T) {
+ // This test just verifies the function signature and basic functionality
+ // The error case is tested in integration tests where failures are expected
+ file, err := os.CreateTemp(t.TempDir(), "test")
+ if err != nil {
+ t.Fatalf("Failed to create test file: %v", err)
+ }
+
+ // Test normal case - file should close successfully
+ CloseFile(t, file)
+
+ // Verify file is closed
+ _, writeErr := file.Write([]byte("test"))
+ if writeErr == nil {
+ t.Error("Expected write to fail on closed file")
+ }
+ })
+}
+
+// Test thread safety of functions that might be called concurrently
+func TestConcurrentOperations(t *testing.T) {
+ tempDir := t.TempDir()
+ done := make(chan bool)
+
+ // Test concurrent file creation
+ for i := 0; i < 5; i++ {
+ go func(n int) {
+ CreateTestFile(t, tempDir, string(rune('a'+n))+".txt", []byte("content"))
+ done <- true
+ }(i)
+ }
+
+ // Test concurrent directory creation
+ for i := 0; i < 5; i++ {
+ go func(n int) {
+ CreateTestDirectory(t, tempDir, "dir"+string(rune('0'+n)))
+ done <- true
+ }(i)
+ }
+
+ // Wait for all goroutines
+ for i := 0; i < 10; i++ {
+ <-done
+ }
+}
+
+// Benchmarks
+func BenchmarkCreateTestFile(b *testing.B) {
+ tempDir := b.TempDir()
+ content := []byte("benchmark content")
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ // Use a unique filename for each iteration to avoid conflicts
+ filename := "bench" + string(rune(i%26+'a')) + ".txt"
+ filePath := filepath.Join(tempDir, filename)
+ if err := os.WriteFile(filePath, content, FilePermission); err != nil {
+ b.Fatalf("Failed to write file: %v", err)
+ }
+ }
+}
+
+func BenchmarkCreateTestFiles(b *testing.B) {
+ tempDir := b.TempDir()
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ // Create specs with unique names for each iteration
+ specs := []FileSpec{
+ {Name: "file1_" + string(rune(i%26+'a')) + ".txt", Content: "content1"},
+ {Name: "file2_" + string(rune(i%26+'a')) + ".txt", Content: "content2"},
+ {Name: "file3_" + string(rune(i%26+'a')) + ".txt", Content: "content3"},
+ }
+
+ for _, spec := range specs {
+ filePath := filepath.Join(tempDir, spec.Name)
+ if err := os.WriteFile(filePath, []byte(spec.Content), FilePermission); err != nil {
+ b.Fatalf("Failed to write file: %v", err)
+ }
+ }
+ }
+}
+
+func BenchmarkVerifyContentContains(b *testing.B) {
+ content := strings.Repeat("test content with various words ", 100)
+ expected := []string{"test", "content", "various", "words"}
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ // We can't use the actual function in benchmark since it needs testing.T
+ // So we'll benchmark the core logic
+ for _, exp := range expected {
+ _ = strings.Contains(content, exp)
+ }
+ }
+}
diff --git a/utils/errors.go b/utils/errors.go
new file mode 100644
index 0000000..4e74402
--- /dev/null
+++ b/utils/errors.go
@@ -0,0 +1,228 @@
+// Package utils provides common utility functions.
+package utils
+
+import (
+ "fmt"
+
+ "github.com/sirupsen/logrus"
+)
+
+// ErrorType represents the category of error.
+type ErrorType int
+
+const (
+ // ErrorTypeUnknown represents an unknown error type.
+ ErrorTypeUnknown ErrorType = iota
+ // ErrorTypeCLI represents command-line interface errors.
+ ErrorTypeCLI
+ // ErrorTypeFileSystem represents file system operation errors.
+ ErrorTypeFileSystem
+ // ErrorTypeProcessing represents file processing errors.
+ ErrorTypeProcessing
+ // ErrorTypeConfiguration represents configuration errors.
+ ErrorTypeConfiguration
+ // ErrorTypeIO represents input/output errors.
+ ErrorTypeIO
+ // ErrorTypeValidation represents validation errors.
+ ErrorTypeValidation
+)
+
+// String returns the string representation of the error type.
+func (e ErrorType) String() string {
+ switch e {
+ case ErrorTypeCLI:
+ return "CLI"
+ case ErrorTypeFileSystem:
+ return "FileSystem"
+ case ErrorTypeProcessing:
+ return "Processing"
+ case ErrorTypeConfiguration:
+ return "Configuration"
+ case ErrorTypeIO:
+ return "IO"
+ case ErrorTypeValidation:
+ return "Validation"
+ default:
+ return "Unknown"
+ }
+}
+
+// StructuredError represents a structured error with type, code, and context.
+type StructuredError struct {
+ Type ErrorType
+ Code string
+ Message string
+ Cause error
+ Context map[string]any
+ FilePath string
+ Line int
+}
+
+// Error implements the error interface.
+func (e *StructuredError) Error() string {
+ if e.Cause != nil {
+ return fmt.Sprintf("%s [%s]: %s: %v", e.Type, e.Code, e.Message, e.Cause)
+ }
+ return fmt.Sprintf("%s [%s]: %s", e.Type, e.Code, e.Message)
+}
+
+// Unwrap returns the underlying cause error.
+func (e *StructuredError) Unwrap() error {
+ return e.Cause
+}
+
+// WithContext adds context information to the error.
+func (e *StructuredError) WithContext(key string, value any) *StructuredError {
+ if e.Context == nil {
+ e.Context = make(map[string]any)
+ }
+ e.Context[key] = value
+ return e
+}
+
+// WithFilePath adds file path information to the error.
+func (e *StructuredError) WithFilePath(filePath string) *StructuredError {
+ e.FilePath = filePath
+ return e
+}
+
+// WithLine adds line number information to the error.
+func (e *StructuredError) WithLine(line int) *StructuredError {
+ e.Line = line
+ return e
+}
+
+// NewStructuredError creates a new structured error.
+func NewStructuredError(errorType ErrorType, code, message string) *StructuredError {
+ return &StructuredError{
+ Type: errorType,
+ Code: code,
+ Message: message,
+ }
+}
+
+// NewStructuredErrorf creates a new structured error with formatted message.
+func NewStructuredErrorf(errorType ErrorType, code, format string, args ...any) *StructuredError {
+ return &StructuredError{
+ Type: errorType,
+ Code: code,
+ Message: fmt.Sprintf(format, args...),
+ }
+}
+
+// WrapError wraps an existing error with structured error information.
+func WrapError(err error, errorType ErrorType, code, message string) *StructuredError {
+ return &StructuredError{
+ Type: errorType,
+ Code: code,
+ Message: message,
+ Cause: err,
+ }
+}
+
+// WrapErrorf wraps an existing error with formatted message.
+func WrapErrorf(err error, errorType ErrorType, code, format string, args ...any) *StructuredError {
+ return &StructuredError{
+ Type: errorType,
+ Code: code,
+ Message: fmt.Sprintf(format, args...),
+ Cause: err,
+ }
+}
+
+// Common error codes for each type
+const (
+ // CLI Error Codes
+ CodeCLIMissingSource = "MISSING_SOURCE"
+ CodeCLIInvalidArgs = "INVALID_ARGS"
+
+ // FileSystem Error Codes
+ CodeFSPathResolution = "PATH_RESOLUTION"
+ CodeFSPermission = "PERMISSION_DENIED"
+ CodeFSNotFound = "NOT_FOUND"
+ CodeFSAccess = "ACCESS_DENIED"
+
+ // Processing Error Codes
+ CodeProcessingFileRead = "FILE_READ"
+ CodeProcessingCollection = "COLLECTION"
+ CodeProcessingTraversal = "TRAVERSAL"
+ CodeProcessingEncode = "ENCODE"
+
+ // Configuration Error Codes
+ CodeConfigValidation = "VALIDATION"
+ CodeConfigMissing = "MISSING"
+
+ // IO Error Codes
+ CodeIOFileCreate = "FILE_CREATE"
+ CodeIOFileWrite = "FILE_WRITE"
+ CodeIOEncoding = "ENCODING"
+ CodeIOWrite = "WRITE"
+ CodeIORead = "READ"
+ CodeIOClose = "CLOSE"
+
+ // Validation Error Codes
+ CodeValidationFormat = "FORMAT"
+ CodeValidationFileType = "FILE_TYPE"
+ CodeValidationSize = "SIZE_LIMIT"
+)
+
+// Predefined error constructors for common error scenarios
+
+// NewCLIMissingSourceError creates a CLI error for missing source argument.
+func NewCLIMissingSourceError() *StructuredError {
+ return NewStructuredError(ErrorTypeCLI, CodeCLIMissingSource, "usage: gibidify -source [--destination ] [--format=json|yaml|markdown]")
+}
+
+// NewFileSystemError creates a file system error.
+func NewFileSystemError(code, message string) *StructuredError {
+ return NewStructuredError(ErrorTypeFileSystem, code, message)
+}
+
+// NewProcessingError creates a processing error.
+func NewProcessingError(code, message string) *StructuredError {
+ return NewStructuredError(ErrorTypeProcessing, code, message)
+}
+
+// NewIOError creates an IO error.
+func NewIOError(code, message string) *StructuredError {
+ return NewStructuredError(ErrorTypeIO, code, message)
+}
+
+// NewValidationError creates a validation error.
+func NewValidationError(code, message string) *StructuredError {
+ return NewStructuredError(ErrorTypeValidation, code, message)
+}
+
+// LogError logs an error with a consistent format if the error is not nil.
+// The operation parameter describes what was being attempted.
+// Additional context can be provided via the args parameter.
+func LogError(operation string, err error, args ...any) {
+ if err != nil {
+ msg := operation
+ if len(args) > 0 {
+ // Format the operation string with the provided arguments
+ msg = fmt.Sprintf(operation, args...)
+ }
+
+ // Check if it's a structured error and log with additional context
+ if structErr, ok := err.(*StructuredError); ok {
+ logrus.WithFields(logrus.Fields{
+ "error_type": structErr.Type.String(),
+ "error_code": structErr.Code,
+ "context": structErr.Context,
+ "file_path": structErr.FilePath,
+ "line": structErr.Line,
+ }).Errorf("%s: %v", msg, err)
+ } else {
+ logrus.Errorf("%s: %v", msg, err)
+ }
+ }
+}
+
+// LogErrorf logs an error with a formatted message if the error is not nil.
+// This is a convenience wrapper around LogError for cases where formatting is needed.
+func LogErrorf(err error, format string, args ...any) {
+ if err != nil {
+ LogError(format, err, args...)
+ }
+}
diff --git a/utils/errors_test.go b/utils/errors_test.go
new file mode 100644
index 0000000..1831240
--- /dev/null
+++ b/utils/errors_test.go
@@ -0,0 +1,242 @@
+package utils
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/sirupsen/logrus"
+)
+
+// captureLogOutput captures logrus output for testing
+func captureLogOutput(f func()) string {
+ var buf bytes.Buffer
+ logrus.SetOutput(&buf)
+ defer logrus.SetOutput(logrus.StandardLogger().Out)
+ f()
+ return buf.String()
+}
+
+func TestLogError(t *testing.T) {
+ tests := []struct {
+ name string
+ operation string
+ err error
+ args []any
+ wantLog string
+ wantEmpty bool
+ }{
+ {
+ name: "nil error should not log",
+ operation: "test operation",
+ err: nil,
+ args: nil,
+ wantEmpty: true,
+ },
+ {
+ name: "basic error logging",
+ operation: "failed to read file",
+ err: errors.New("permission denied"),
+ args: nil,
+ wantLog: "failed to read file: permission denied",
+ },
+ {
+ name: "error with formatting args",
+ operation: "failed to process file %s",
+ err: errors.New("file too large"),
+ args: []any{"test.txt"},
+ wantLog: "failed to process file test.txt: file too large",
+ },
+ {
+ name: "error with multiple formatting args",
+ operation: "failed to copy from %s to %s",
+ err: errors.New("disk full"),
+ args: []any{"source.txt", "dest.txt"},
+ wantLog: "failed to copy from source.txt to dest.txt: disk full",
+ },
+ {
+ name: "wrapped error",
+ operation: "database operation failed",
+ err: fmt.Errorf("connection error: %w", errors.New("timeout")),
+ args: nil,
+ wantLog: "database operation failed: connection error: timeout",
+ },
+ {
+ name: "empty operation string",
+ operation: "",
+ err: errors.New("some error"),
+ args: nil,
+ wantLog: ": some error",
+ },
+ {
+ name: "operation with percentage sign",
+ operation: "processing 50% complete",
+ err: errors.New("interrupted"),
+ args: nil,
+ wantLog: "processing 50% complete: interrupted",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ output := captureLogOutput(func() {
+ LogError(tt.operation, tt.err, tt.args...)
+ })
+
+ if tt.wantEmpty {
+ if output != "" {
+ t.Errorf("LogError() logged output when error was nil: %q", output)
+ }
+ return
+ }
+
+ if !strings.Contains(output, tt.wantLog) {
+ t.Errorf("LogError() output = %q, want to contain %q", output, tt.wantLog)
+ }
+
+ // Verify it's logged at ERROR level
+ if !strings.Contains(output, "level=error") {
+ t.Errorf("LogError() should log at ERROR level, got: %q", output)
+ }
+ })
+ }
+}
+
+func TestLogErrorf(t *testing.T) {
+ tests := []struct {
+ name string
+ err error
+ format string
+ args []any
+ wantLog string
+ wantEmpty bool
+ }{
+ {
+ name: "nil error should not log",
+ err: nil,
+ format: "operation %s failed",
+ args: []any{"test"},
+ wantEmpty: true,
+ },
+ {
+ name: "basic formatted error",
+ err: errors.New("not found"),
+ format: "file %s not found",
+ args: []any{"config.yaml"},
+ wantLog: "file config.yaml not found: not found",
+ },
+ {
+ name: "multiple format arguments",
+ err: errors.New("invalid range"),
+ format: "value %d is not between %d and %d",
+ args: []any{150, 0, 100},
+ wantLog: "value 150 is not between 0 and 100: invalid range",
+ },
+ {
+ name: "no format arguments",
+ err: errors.New("generic error"),
+ format: "operation failed",
+ args: nil,
+ wantLog: "operation failed: generic error",
+ },
+ {
+ name: "format with different types",
+ err: errors.New("type mismatch"),
+ format: "expected %s but got %d",
+ args: []any{"string", 42},
+ wantLog: "expected string but got 42: type mismatch",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ output := captureLogOutput(func() {
+ LogErrorf(tt.err, tt.format, tt.args...)
+ })
+
+ if tt.wantEmpty {
+ if output != "" {
+ t.Errorf("LogErrorf() logged output when error was nil: %q", output)
+ }
+ return
+ }
+
+ if !strings.Contains(output, tt.wantLog) {
+ t.Errorf("LogErrorf() output = %q, want to contain %q", output, tt.wantLog)
+ }
+
+ // Verify it's logged at ERROR level
+ if !strings.Contains(output, "level=error") {
+ t.Errorf("LogErrorf() should log at ERROR level, got: %q", output)
+ }
+ })
+ }
+}
+
+func TestLogErrorConcurrency(t *testing.T) {
+ // Test that LogError is safe for concurrent use
+ done := make(chan bool)
+ for i := 0; i < 10; i++ {
+ go func(n int) {
+ LogError("concurrent operation", fmt.Errorf("error %d", n))
+ done <- true
+ }(i)
+ }
+
+ // Wait for all goroutines to complete
+ for i := 0; i < 10; i++ {
+ <-done
+ }
+}
+
+func TestLogErrorfConcurrency(t *testing.T) {
+ // Test that LogErrorf is safe for concurrent use
+ done := make(chan bool)
+ for i := 0; i < 10; i++ {
+ go func(n int) {
+ LogErrorf(fmt.Errorf("error %d", n), "concurrent operation %d", n)
+ done <- true
+ }(i)
+ }
+
+ // Wait for all goroutines to complete
+ for i := 0; i < 10; i++ {
+ <-done
+ }
+}
+
+// BenchmarkLogError benchmarks the LogError function
+func BenchmarkLogError(b *testing.B) {
+ err := errors.New("benchmark error")
+ // Disable output during benchmark
+ logrus.SetOutput(bytes.NewBuffer(nil))
+ defer logrus.SetOutput(logrus.StandardLogger().Out)
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ LogError("benchmark operation", err)
+ }
+}
+
+// BenchmarkLogErrorf benchmarks the LogErrorf function
+func BenchmarkLogErrorf(b *testing.B) {
+ err := errors.New("benchmark error")
+ // Disable output during benchmark
+ logrus.SetOutput(bytes.NewBuffer(nil))
+ defer logrus.SetOutput(logrus.StandardLogger().Out)
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ LogErrorf(err, "benchmark operation %d", i)
+ }
+}
+
+// BenchmarkLogErrorNil benchmarks LogError with nil error (no-op case)
+func BenchmarkLogErrorNil(b *testing.B) {
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ LogError("benchmark operation", nil)
+ }
+}
diff --git a/utils/paths.go b/utils/paths.go
new file mode 100644
index 0000000..845d0ca
--- /dev/null
+++ b/utils/paths.go
@@ -0,0 +1,26 @@
+// Package utils provides common utility functions.
+package utils
+
+import (
+ "fmt"
+ "path/filepath"
+)
+
+// GetAbsolutePath returns the absolute path for the given path.
+// It wraps filepath.Abs with consistent error handling.
+func GetAbsolutePath(path string) (string, error) {
+ abs, err := filepath.Abs(path)
+ if err != nil {
+ return "", fmt.Errorf("failed to get absolute path for %s: %w", path, err)
+ }
+ return abs, nil
+}
+
+// GetBaseName returns the base name for the given path, handling special cases.
+func GetBaseName(absPath string) string {
+ baseName := filepath.Base(absPath)
+ if baseName == "." || baseName == "" {
+ return "output"
+ }
+ return baseName
+}
diff --git a/utils/paths_test.go b/utils/paths_test.go
new file mode 100644
index 0000000..fe5b80e
--- /dev/null
+++ b/utils/paths_test.go
@@ -0,0 +1,262 @@
+package utils
+
+import (
+ "os"
+ "path/filepath"
+ "runtime"
+ "strings"
+ "testing"
+)
+
+func TestGetAbsolutePath(t *testing.T) {
+ // Get current working directory for tests
+ cwd, err := os.Getwd()
+ if err != nil {
+ t.Fatalf("Failed to get current directory: %v", err)
+ }
+
+ tests := []struct {
+ name string
+ path string
+ wantPrefix string
+ wantErr bool
+ wantErrMsg string
+ skipWindows bool
+ }{
+ {
+ name: "absolute path unchanged",
+ path: cwd,
+ wantPrefix: cwd,
+ wantErr: false,
+ },
+ {
+ name: "relative path current directory",
+ path: ".",
+ wantPrefix: cwd,
+ wantErr: false,
+ },
+ {
+ name: "relative path parent directory",
+ path: "..",
+ wantPrefix: filepath.Dir(cwd),
+ wantErr: false,
+ },
+ {
+ name: "relative path with file",
+ path: "test.txt",
+ wantPrefix: filepath.Join(cwd, "test.txt"),
+ wantErr: false,
+ },
+ {
+ name: "relative path with subdirectory",
+ path: "subdir/file.go",
+ wantPrefix: filepath.Join(cwd, "subdir", "file.go"),
+ wantErr: false,
+ },
+ {
+ name: "empty path",
+ path: "",
+ wantPrefix: cwd,
+ wantErr: false,
+ },
+ {
+ name: "path with tilde",
+ path: "~/test",
+ wantPrefix: filepath.Join(cwd, "~", "test"),
+ wantErr: false,
+ skipWindows: false,
+ },
+ {
+ name: "path with multiple separators",
+ path: "path//to///file",
+ wantPrefix: filepath.Join(cwd, "path", "to", "file"),
+ wantErr: false,
+ },
+ {
+ name: "path with trailing separator",
+ path: "path/",
+ wantPrefix: filepath.Join(cwd, "path"),
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if tt.skipWindows && runtime.GOOS == "windows" {
+ t.Skip("Skipping test on Windows")
+ }
+
+ got, err := GetAbsolutePath(tt.path)
+
+ if tt.wantErr {
+ if err == nil {
+ t.Errorf("GetAbsolutePath() error = nil, wantErr %v", tt.wantErr)
+ return
+ }
+ if tt.wantErrMsg != "" && !strings.Contains(err.Error(), tt.wantErrMsg) {
+ t.Errorf("GetAbsolutePath() error = %v, want error containing %v", err, tt.wantErrMsg)
+ }
+ return
+ }
+
+ if err != nil {
+ t.Errorf("GetAbsolutePath() unexpected error = %v", err)
+ return
+ }
+
+ // Clean the expected path for comparison
+ wantClean := filepath.Clean(tt.wantPrefix)
+ gotClean := filepath.Clean(got)
+
+ if gotClean != wantClean {
+ t.Errorf("GetAbsolutePath() = %v, want %v", gotClean, wantClean)
+ }
+
+ // Verify the result is actually absolute
+ if !filepath.IsAbs(got) {
+ t.Errorf("GetAbsolutePath() returned non-absolute path: %v", got)
+ }
+ })
+ }
+}
+
+func TestGetAbsolutePathSpecialCases(t *testing.T) {
+ if runtime.GOOS == "windows" {
+ t.Skip("Skipping Unix-specific tests on Windows")
+ }
+
+ tests := []struct {
+ name string
+ setup func() (string, func())
+ path string
+ wantErr bool
+ }{
+ {
+ name: "symlink to directory",
+ setup: func() (string, func()) {
+ tmpDir := t.TempDir()
+ target := filepath.Join(tmpDir, "target")
+ link := filepath.Join(tmpDir, "link")
+
+ if err := os.Mkdir(target, 0o755); err != nil {
+ t.Fatalf("Failed to create target directory: %v", err)
+ }
+ if err := os.Symlink(target, link); err != nil {
+ t.Fatalf("Failed to create symlink: %v", err)
+ }
+
+ return link, func() {}
+ },
+ path: "",
+ wantErr: false,
+ },
+ {
+ name: "broken symlink",
+ setup: func() (string, func()) {
+ tmpDir := t.TempDir()
+ link := filepath.Join(tmpDir, "broken_link")
+
+ if err := os.Symlink("/nonexistent/path", link); err != nil {
+ t.Fatalf("Failed to create broken symlink: %v", err)
+ }
+
+ return link, func() {}
+ },
+ path: "",
+ wantErr: false, // filepath.Abs still works with broken symlinks
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ path, cleanup := tt.setup()
+ defer cleanup()
+
+ if tt.path == "" {
+ tt.path = path
+ }
+
+ got, err := GetAbsolutePath(tt.path)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("GetAbsolutePath() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+
+ if err == nil && !filepath.IsAbs(got) {
+ t.Errorf("GetAbsolutePath() returned non-absolute path: %v", got)
+ }
+ })
+ }
+}
+
+func TestGetAbsolutePathConcurrency(t *testing.T) {
+ // Test that GetAbsolutePath is safe for concurrent use
+ paths := []string{".", "..", "test.go", "subdir/file.txt", "/tmp/test"}
+ done := make(chan bool)
+
+ for _, p := range paths {
+ go func(path string) {
+ _, _ = GetAbsolutePath(path)
+ done <- true
+ }(p)
+ }
+
+ // Wait for all goroutines to complete
+ for range paths {
+ <-done
+ }
+}
+
+func TestGetAbsolutePathErrorFormatting(t *testing.T) {
+ // This test verifies error message formatting
+ // We need to trigger an actual error from filepath.Abs
+ // On Unix systems, we can't easily trigger filepath.Abs errors
+ // so we'll just verify the error wrapping works correctly
+
+ // Create a test that would fail if filepath.Abs returns an error
+ path := "test/path"
+ got, err := GetAbsolutePath(path)
+ if err != nil {
+ // If we somehow get an error, verify it's properly formatted
+ if !strings.Contains(err.Error(), "failed to get absolute path for") {
+ t.Errorf("Error message format incorrect: %v", err)
+ }
+ if !strings.Contains(err.Error(), path) {
+ t.Errorf("Error message should contain original path: %v", err)
+ }
+ } else {
+ // Normal case - just verify we got a valid absolute path
+ if !filepath.IsAbs(got) {
+ t.Errorf("Expected absolute path, got: %v", got)
+ }
+ }
+}
+
+// BenchmarkGetAbsolutePath benchmarks the GetAbsolutePath function
+func BenchmarkGetAbsolutePath(b *testing.B) {
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, _ = GetAbsolutePath("test/path/file.go")
+ }
+}
+
+// BenchmarkGetAbsolutePathAbs benchmarks with already absolute path
+func BenchmarkGetAbsolutePathAbs(b *testing.B) {
+ absPath := "/home/user/test/file.go"
+ if runtime.GOOS == "windows" {
+ absPath = "C:\\Users\\test\\file.go"
+ }
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, _ = GetAbsolutePath(absPath)
+ }
+}
+
+// BenchmarkGetAbsolutePathCurrent benchmarks with current directory
+func BenchmarkGetAbsolutePathCurrent(b *testing.B) {
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, _ = GetAbsolutePath(".")
+ }
+}