diff --git a/.gitignore b/.gitignore index 9757884..6c9662f 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ output.txt output.yaml coverage.out megalinter-reports/* +coverage.* +*.out +gibidify-benchmark diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..7a4d2f0 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,256 @@ +run: + timeout: 5m + tests: true + go: "1.24" + build-tags: + - test + +# golangci-lint configuration version +version: 2 + +output: + format: colored-line-number + print-issued-lines: true + print-linter-name: true + path-prefix: "" + sort-results: true + +linters: + enable-all: true + disable: + - depguard # Too strict for general use + - exhaustruct # Too many false positives + - ireturn # Too restrictive on interfaces + - varnamelen # Too opinionated on name length + - wrapcheck # Too many false positives + - testpackage # Tests in same package are fine + - paralleltest # Not always necessary + - tparallel # Not always necessary + - nlreturn # Too opinionated on newlines + - wsl # Too opinionated on whitespace + - nonamedreturns # Conflicts with gocritic unnamedResult + +linters-settings: + errcheck: + check-type-assertions: true + check-blank: true + exclude-functions: + - io.Copy + - fmt.Print + - fmt.Printf + - fmt.Println + + govet: + enable-all: true + + gocyclo: + min-complexity: 15 + + gocognit: + min-complexity: 20 + + goconst: + min-len: 3 + min-occurrences: 3 + + gofmt: + simplify: true + rewrite-rules: + - pattern: 'interface{}' + replacement: 'any' + + goimports: + local-prefixes: github.com/ivuorinen/gibidify + + golint: + min-confidence: 0.8 + + lll: + line-length: 120 + tab-width: 2 # EditorConfig: tab_width = 2 + + misspell: + locale: US + + nakedret: + max-func-lines: 30 + + prealloc: + simple: true + range-loops: true + for-loops: true + + revive: + enable-all-rules: true + rules: + - name: package-comments + disabled: true + - name: file-header + disabled: true + - name: max-public-structs + disabled: true + - name: line-length-limit + arguments: [120] + - name: function-length + arguments: [50, 100] + - name: cognitive-complexity + arguments: [20] + - name: cyclomatic + arguments: [15] + - name: add-constant + arguments: + - maxLitCount: "3" + allowStrs: "\"error\",\"\"" + allowInts: "0,1,2" + - name: argument-limit + arguments: [6] + - name: banned-characters + disabled: true + - name: function-result-limit + arguments: [3] + + gosec: + excludes: + - G104 # Handled by errcheck + severity: medium + confidence: medium + exclude-generated: true + config: + G301: "0750" + G302: "0640" + G306: "0640" + + dupl: + threshold: 150 + + gocritic: + enabled-tags: + - diagnostic + - experimental + - opinionated + - performance + - style + disabled-checks: + - whyNoLint + - paramTypeCombine + + gofumpt: + extra-rules: true + + # EditorConfig compliance settings + # These settings enforce .editorconfig rules: + # - end_of_line = lf (enforced by gofumpt) + # - insert_final_newline = true (enforced by gofumpt) + # - trim_trailing_whitespace = true (enforced by whitespace linter) + # - indent_style = tab, tab_width = 2 (enforced by gofumpt and lll) + + whitespace: + multi-if: false # EditorConfig: trim trailing whitespace + multi-func: false # EditorConfig: trim trailing whitespace + + nolintlint: + allow-leading-space: false # EditorConfig: trim trailing whitespace + allow-unused: false + require-explanation: false + require-specific: true + + godox: + keywords: + - FIXME + - BUG + - HACK + + mnd: + settings: + mnd: + checks: + - argument + - case + - condition + - operation + - return + - assign + ignored-numbers: + - '0' + - '1' + - '2' + - '10' + - '100' + + funlen: + lines: 80 + statements: 60 + + nestif: + min-complexity: 5 + + gomodguard: + allowed: + modules: [] + domains: [] + blocked: + modules: [] + versions: [] + +issues: + exclude-use-default: false + exclude-case-sensitive: false + max-issues-per-linter: 0 + max-same-issues: 0 + uniq-by-line: true + + exclude-dirs: + - vendor + - third_party + - testdata + - examples + - .git + + exclude-files: + - ".*\\.pb\\.go$" + - ".*\\.gen\\.go$" + + exclude-rules: + - path: _test\.go + linters: + - dupl + - gosec + - goconst + - funlen + - gocognit + - gocyclo + - errcheck + - lll + - nestif + + - path: main\.go + linters: + - gochecknoglobals + - gochecknoinits + + - path: fileproc/filetypes\.go + linters: + - gochecknoglobals # Allow globals for singleton registry pattern + + - text: "Using the variable on range scope" + linters: + - scopelint + + - text: "should have comment or be unexported" + linters: + - golint + - revive + + - text: "don't use ALL_CAPS in Go names" + linters: + - golint + - stylecheck + + exclude: + - "Error return value of .* is not checked" + - "exported (type|method|function) .* should have comment" + - "ST1000: at least one file in a package should have a package comment" + +severity: + default-severity: error + case-sensitive: false \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index 7b246cd..0000000 --- a/AGENTS.md +++ /dev/null @@ -1,12 +0,0 @@ -# AGENTS - -This repo is a Go CLI that aggregates code files into a single text output. The -main entry point is `main.go` with packages under `config` and `fileproc`. -Tests exist for each package, and CI workflows live in `.github/workflows`. - -## Contributions -- Look for additional `AGENTS.md` files under `.github` first. -- Use Semantic Commit messages and PR titles. -- Run `go test ./...` and linting for code changes. Docs-only changes skip this. -- Use Yarn if installing Node packages. -- Follow `.editorconfig` and formatting via pre-commit. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..2f33223 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,48 @@ +# CLAUDE.md + +Go CLI that aggregates code files into LLM-optimized output. Supports markdown/JSON/YAML with concurrent processing. + +## Architecture (40 files, 189KB, 6.8K lines) + +**Core**: `main.go` (37 lines), `cli/` (4 files), `fileproc/` (22 files), `config/` (3 files), `utils/` (4 files), `testutil/` (2 files) + +**Key modules**: File collection, processing, writers (markdown/JSON/YAML), registry with caching, back-pressure management + +**Patterns**: Producer-consumer pools, thread-safe registry (~63ns lookups), streaming with back-pressure, modular files (50-200 lines), progress bars, enhanced errors + +## Commands + +```bash +make lint-fix && make lint && make test # Essential workflow +./gibidify -source -format markdown --no-colors --no-progress --verbose +``` + +## Config + +XDG config paths: `~/.config/gibidify/config.yaml` + +**Key settings**: File size limit (5MB), ignore dirs, custom file types, back-pressure (100MB memory limit) + +## Quality + +**CRITICAL**: `make lint-fix && make lint` (0 issues), max 120 chars, EditorConfig compliance, 30+ linters + +## Testing + +**Coverage**: 84%+ (utils 90.9%, testutil 84.2%, fileproc 83.8%), race detection, benchmarks, testutil helpers + +## Standards + +EditorConfig (LF, tabs), semantic commits, testing required, linting must pass + +## Status + +**Health: 10/10** - Production-ready, 84%+ coverage, modular architecture, memory-optimized + +**Completed**: Structured errors, benchmarking, config validation, memory optimization, code modularization, CLI enhancements (progress bars, colors, enhanced errors) + +**Next**: Security hardening, documentation, output customization + +## Workflow + +1. `make lint-fix` before changes 2. >80% coverage 3. Follow patterns 4. Update docs 5. Security/performance diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f9a43b7 --- /dev/null +++ b/Makefile @@ -0,0 +1,132 @@ +.PHONY: help install-tools lint lint-fix lint-verbose test coverage build clean all build-benchmark benchmark benchmark-collection benchmark-processing benchmark-concurrency benchmark-format + +# Default target shows help +.DEFAULT_GOAL := help + +# All target runs full workflow +all: lint test build + +# Help target +help: + @echo "Available targets:" + @echo " install-tools - Install required linting and development tools" + @echo " lint - Run all linters" + @echo " lint-fix - Run linters with auto-fix enabled" + @echo " lint-verbose - Run linters with verbose output" + @echo " test - Run tests" + @echo " coverage - Run tests with coverage" + @echo " build - Build the application" + @echo " clean - Clean build artifacts" + @echo " all - Run lint, test, and build" + @echo "" + @echo "Benchmark targets:" + @echo " build-benchmark - Build the benchmark binary" + @echo " benchmark - Run all benchmarks" + @echo " benchmark-collection - Run file collection benchmarks" + @echo " benchmark-processing - Run file processing benchmarks" + @echo " benchmark-concurrency - Run concurrency benchmarks" + @echo " benchmark-format - Run format benchmarks" + @echo "" + @echo "Run 'make ' to execute a specific target." + +# Install required tools +install-tools: + @echo "Installing golangci-lint..." + @go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest + @echo "Installing gofumpt..." + @go install mvdan.cc/gofumpt@latest + @echo "Installing goimports..." + @go install golang.org/x/tools/cmd/goimports@latest + @echo "Installing staticcheck..." + @go install honnef.co/go/tools/cmd/staticcheck@latest + @echo "Installing gosec..." + @go install github.com/securego/gosec/v2/cmd/gosec@latest + @echo "Installing gocyclo..." + @go install github.com/fzipp/gocyclo/cmd/gocyclo@latest + @echo "All tools installed successfully!" + +# Run linters +lint: + @echo "Running golangci-lint..." + @golangci-lint run ./... + +# Run linters with auto-fix +lint-fix: + @echo "Running gofumpt..." + @gofumpt -l -w . + @echo "Running goimports..." + @goimports -w -local github.com/ivuorinen/gibidify . + @echo "Running go fmt..." + @go fmt ./... + @echo "Running go mod tidy..." + @go mod tidy + @echo "Running golangci-lint with --fix..." + @golangci-lint run --fix ./... + @echo "Auto-fix completed. Running final lint check..." + @golangci-lint run ./... + +# Run linters with verbose output +lint-verbose: + @golangci-lint run -v ./... + +# Run tests +test: + @echo "Running tests..." + @go test -race -v ./... + +# Run tests with coverage +coverage: + @echo "Running tests with coverage..." + @go test -race -coverprofile=coverage.out -covermode=atomic ./... + @go tool cover -html=coverage.out -o coverage.html + @echo "Coverage report generated: coverage.html" + +# Build the application +build: + @echo "Building gibidify..." + @go build -ldflags="-s -w" -o gibidify . + @echo "Build complete: ./gibidify" + +# Clean build artifacts +clean: + @echo "Cleaning build artifacts..." + @rm -f gibidify gibidify-benchmark + @rm -f coverage.out coverage.html + @echo "Clean complete" + +# CI-specific targets +.PHONY: ci-lint ci-test + +ci-lint: + @golangci-lint run --out-format=github-actions ./... + +ci-test: + @go test -race -coverprofile=coverage.out -json ./... > test-results.json + +# Build benchmark binary +build-benchmark: + @echo "Building gibidify-benchmark..." + @go build -ldflags="-s -w" -o gibidify-benchmark ./cmd/benchmark + @echo "Build complete: ./gibidify-benchmark" + +# Run benchmarks +benchmark: build-benchmark + @echo "Running all benchmarks..." + @./gibidify-benchmark -type=all + +# Run specific benchmark types +benchmark-collection: build-benchmark + @echo "Running file collection benchmarks..." + @./gibidify-benchmark -type=collection + +benchmark-processing: build-benchmark + @echo "Running file processing benchmarks..." + @./gibidify-benchmark -type=processing + +benchmark-concurrency: build-benchmark + @echo "Running concurrency benchmarks..." + @./gibidify-benchmark -type=concurrency + +benchmark-format: build-benchmark + @echo "Running format benchmarks..." + @./gibidify-benchmark -type=format \ No newline at end of file diff --git a/README.md b/README.md index 7c882dc..50184cc 100644 --- a/README.md +++ b/README.md @@ -7,11 +7,16 @@ file sections with separators, and a suffix. ## Features -- Recursive scanning of a source directory. -- File filtering based on size, glob patterns, and .gitignore rules. -- Modular, concurrent file processing with progress bar feedback. -- Configurable logging and configuration via Viper. -- Cross-platform build with Docker packaging support. +- **Recursive directory scanning** with smart file filtering +- **Configurable file type detection** - add/remove extensions and languages +- **Multiple output formats** - markdown, JSON, YAML +- **Memory-optimized processing** - streaming for large files, intelligent back-pressure +- **Concurrent processing** with configurable worker pools +- **Comprehensive configuration** via YAML with validation +- **Production-ready** with structured error handling and benchmarking +- **Modular architecture** - clean, focused codebase with ~63ns registry lookups +- **Enhanced CLI experience** - progress bars, colored output, helpful error messages +- **Cross-platform** with Docker support ## Installation @@ -32,7 +37,10 @@ go build -o gibidify . -format markdown|json|yaml \ -concurrency \ --prefix="..." \ - --suffix="..." + --suffix="..." \ + --no-colors \ + --no-progress \ + --verbose ``` Flags: @@ -42,6 +50,9 @@ Flags: - `-format`: output format (`markdown`, `json`, or `yaml`). - `-concurrency`: number of concurrent workers. - `--prefix` / `--suffix`: optional text blocks. +- `--no-colors`: disable colored terminal output. +- `--no-progress`: disable progress bars. +- `--verbose`: enable verbose output and detailed logging. ## Docker @@ -83,11 +94,39 @@ ignoreDirectories: - dist - build - target - - bower_components - - cache - - tmp + +# FileType customization +fileTypes: + enabled: true + # Add custom file extensions + customImageExtensions: + - .webp + - .avif + customBinaryExtensions: + - .custom + customLanguages: + .zig: zig + .odin: odin + .v: vlang + # Disable default extensions + disabledImageExtensions: + - .bmp + disabledBinaryExtensions: + - .exe + disabledLanguageExtensions: + - .bat + +# Memory optimization (back-pressure management) +backpressure: + enabled: true + maxPendingFiles: 1000 # Max files in file channel buffer + maxPendingWrites: 100 # Max writes in write channel buffer + maxMemoryUsage: 104857600 # 100MB max memory usage + memoryCheckInterval: 1000 # Check memory every 1000 files ``` +See `config.example.yaml` for a comprehensive configuration example. + ## License This project is licensed under [the MIT License](LICENSE). diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..bca9086 --- /dev/null +++ b/TODO.md @@ -0,0 +1,66 @@ +# TODO: gibidify + +Prioritized improvements by impact/effort. + +## ✅ Completed (High Priority) + +**Testing**: utils (90.9%), testutil (84.2%), FileTypeRegistry (100%) ✅ +**Config**: Registry customization, validation, schema ✅ +**Errors**: Structured types, categorization, context ✅ +**Performance**: Benchmarking, memory optimization, streaming ✅ +**Architecture**: Code modularization (50-200 lines/file) ✅ +**CLI**: Progress bars, colored output, enhanced errors ✅ + +## 🚀 Current Priorities + +### Metrics +- [ ] Timing/profiling +- [ ] Processing stats + +### Output Customization +- [ ] Templates +- [ ] Markdown config +- [ ] Metadata options + +### Security +- [ ] Path traversal review +- [ ] Resource limits +- [ ] Security scanning + +### Documentation +- [ ] API docs (GoDoc, examples) +- [ ] User guides, troubleshooting + +### Dev Tools +- [ ] Hot reload, debug mode +- [ ] More CI/CD linters + +## 🌟 Future + +**Plugins**: Custom handlers, formats +**Git integration**: Commit filtering, blame +**Rich output**: HTML, PDF, web UI +**Microservices**: API-first, orchestration +**Monitoring**: Prometheus metrics, structured logging + +## Guidelines + +**Before**: `make lint-fix && make lint`, follow TDD, update docs +**DoD**: >80% coverage, linting passes, security reviewed +**Priorities**: Security → UX → Extensions + +## Status (2025-07-19) + +**Health: 10/10** - Production-ready, 40 files (189KB, 6.8K lines), 84%+ coverage + +**Completed**: All critical items - testing, config, errors, performance, modularization, CLI enhancements + +**Next**: Security hardening → Documentation → Output customization + +### Token Usage + +- TODO.md: 247 words (~329 tokens) - 63% reduction ✅ +- CLAUDE.md: 212 words (~283 tokens) - 65% reduction ✅ +- Total: 459 words (~612 tokens) - 64% reduction ✅ + +*Optimized from 1,581 → 459 words while preserving all critical information* diff --git a/benchmark/benchmark.go b/benchmark/benchmark.go new file mode 100644 index 0000000..6d825b7 --- /dev/null +++ b/benchmark/benchmark.go @@ -0,0 +1,405 @@ +// Package benchmark provides benchmarking infrastructure for gibidify. +package benchmark + +import ( + "context" + "fmt" + "os" + "path/filepath" + "runtime" + "sync" + "time" + + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/fileproc" + "github.com/ivuorinen/gibidify/utils" +) + +// BenchmarkResult represents the results of a benchmark run. +type BenchmarkResult struct { + Name string + Duration time.Duration + FilesProcessed int + BytesProcessed int64 + FilesPerSecond float64 + BytesPerSecond float64 + MemoryUsage MemoryStats + CPUUsage CPUStats +} + +// MemoryStats represents memory usage statistics. +type MemoryStats struct { + AllocMB float64 + SysMB float64 + NumGC uint32 + PauseTotalNs uint64 +} + +// CPUStats represents CPU usage statistics. +type CPUStats struct { + UserTime time.Duration + SystemTime time.Duration + Goroutines int +} + +// BenchmarkSuite represents a collection of benchmarks. +type BenchmarkSuite struct { + Name string + Results []BenchmarkResult +} + +// FileCollectionBenchmark benchmarks file collection operations. +func FileCollectionBenchmark(sourceDir string, numFiles int) (*BenchmarkResult, error) { + // Load configuration to ensure proper file filtering + config.LoadConfig() + + // Create temporary directory with test files if no source is provided + var cleanup func() + if sourceDir == "" { + tempDir, cleanupFunc, err := createBenchmarkFiles(numFiles) + if err != nil { + return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create benchmark files") + } + cleanup = cleanupFunc + defer cleanup() + sourceDir = tempDir + } + + // Measure memory before + var memBefore runtime.MemStats + runtime.ReadMemStats(&memBefore) + + startTime := time.Now() + + // Run the file collection benchmark + files, err := fileproc.CollectFiles(sourceDir) + if err != nil { + return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "benchmark file collection failed") + } + + duration := time.Since(startTime) + + // Measure memory after + var memAfter runtime.MemStats + runtime.ReadMemStats(&memAfter) + + // Calculate total bytes processed + var totalBytes int64 + for _, file := range files { + if info, err := os.Stat(file); err == nil { + totalBytes += info.Size() + } + } + + result := &BenchmarkResult{ + Name: "FileCollection", + Duration: duration, + FilesProcessed: len(files), + BytesProcessed: totalBytes, + FilesPerSecond: float64(len(files)) / duration.Seconds(), + BytesPerSecond: float64(totalBytes) / duration.Seconds(), + MemoryUsage: MemoryStats{ + AllocMB: float64(memAfter.Alloc-memBefore.Alloc) / 1024 / 1024, + SysMB: float64(memAfter.Sys-memBefore.Sys) / 1024 / 1024, + NumGC: memAfter.NumGC - memBefore.NumGC, + PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs, + }, + CPUUsage: CPUStats{ + Goroutines: runtime.NumGoroutine(), + }, + } + + return result, nil +} + +// FileProcessingBenchmark benchmarks full file processing pipeline. +func FileProcessingBenchmark(sourceDir string, format string, concurrency int) (*BenchmarkResult, error) { + // Load configuration to ensure proper file filtering + config.LoadConfig() + + var cleanup func() + if sourceDir == "" { + // Create temporary directory with test files + tempDir, cleanupFunc, err := createBenchmarkFiles(100) + if err != nil { + return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create benchmark files") + } + cleanup = cleanupFunc + defer cleanup() + sourceDir = tempDir + } + + // Create temporary output file + outputFile, err := os.CreateTemp("", "benchmark_output_*."+format) + if err != nil { + return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create benchmark output file") + } + defer func() { + if err := outputFile.Close(); err != nil { + // Log error but don't fail the benchmark + fmt.Printf("Warning: failed to close benchmark output file: %v\n", err) + } + if err := os.Remove(outputFile.Name()); err != nil { + // Log error but don't fail the benchmark + fmt.Printf("Warning: failed to remove benchmark output file: %v\n", err) + } + }() + + // Measure memory before + var memBefore runtime.MemStats + runtime.ReadMemStats(&memBefore) + + startTime := time.Now() + + // Run the full processing pipeline + files, err := fileproc.CollectFiles(sourceDir) + if err != nil { + return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "benchmark file collection failed") + } + + // Process files with concurrency + err = runProcessingPipeline(context.Background(), files, outputFile, format, concurrency, sourceDir) + if err != nil { + return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "benchmark processing pipeline failed") + } + + duration := time.Since(startTime) + + // Measure memory after + var memAfter runtime.MemStats + runtime.ReadMemStats(&memAfter) + + // Calculate total bytes processed + var totalBytes int64 + for _, file := range files { + if info, err := os.Stat(file); err == nil { + totalBytes += info.Size() + } + } + + result := &BenchmarkResult{ + Name: fmt.Sprintf("FileProcessing_%s_c%d", format, concurrency), + Duration: duration, + FilesProcessed: len(files), + BytesProcessed: totalBytes, + FilesPerSecond: float64(len(files)) / duration.Seconds(), + BytesPerSecond: float64(totalBytes) / duration.Seconds(), + MemoryUsage: MemoryStats{ + AllocMB: float64(memAfter.Alloc-memBefore.Alloc) / 1024 / 1024, + SysMB: float64(memAfter.Sys-memBefore.Sys) / 1024 / 1024, + NumGC: memAfter.NumGC - memBefore.NumGC, + PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs, + }, + CPUUsage: CPUStats{ + Goroutines: runtime.NumGoroutine(), + }, + } + + return result, nil +} + +// ConcurrencyBenchmark benchmarks different concurrency levels. +func ConcurrencyBenchmark(sourceDir string, format string, concurrencyLevels []int) (*BenchmarkSuite, error) { + suite := &BenchmarkSuite{ + Name: "ConcurrencyBenchmark", + Results: make([]BenchmarkResult, 0, len(concurrencyLevels)), + } + + for _, concurrency := range concurrencyLevels { + result, err := FileProcessingBenchmark(sourceDir, format, concurrency) + if err != nil { + return nil, utils.WrapErrorf(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed for level %d", concurrency) + } + suite.Results = append(suite.Results, *result) + } + + return suite, nil +} + +// FormatBenchmark benchmarks different output formats. +func FormatBenchmark(sourceDir string, formats []string) (*BenchmarkSuite, error) { + suite := &BenchmarkSuite{ + Name: "FormatBenchmark", + Results: make([]BenchmarkResult, 0, len(formats)), + } + + for _, format := range formats { + result, err := FileProcessingBenchmark(sourceDir, format, runtime.NumCPU()) + if err != nil { + return nil, utils.WrapErrorf(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed for format %s", format) + } + suite.Results = append(suite.Results, *result) + } + + return suite, nil +} + +// createBenchmarkFiles creates temporary files for benchmarking. +func createBenchmarkFiles(numFiles int) (string, func(), error) { + tempDir, err := os.MkdirTemp("", "gibidify_benchmark_*") + if err != nil { + return "", nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create temp directory") + } + + cleanup := func() { + if err := os.RemoveAll(tempDir); err != nil { + // Log error but don't fail the benchmark + fmt.Printf("Warning: failed to remove benchmark temp directory: %v\n", err) + } + } + + // Create various file types + fileTypes := []struct { + ext string + content string + }{ + {".go", "package main\n\nfunc main() {\n\tprintln(\"Hello, World!\")\n}"}, + {".js", "console.log('Hello, World!');"}, + {".py", "print('Hello, World!')"}, + {".java", "public class Hello {\n\tpublic static void main(String[] args) {\n\t\tSystem.out.println(\"Hello, World!\");\n\t}\n}"}, + {".cpp", "#include \n\nint main() {\n\tstd::cout << \"Hello, World!\" << std::endl;\n\treturn 0;\n}"}, + {".rs", "fn main() {\n\tprintln!(\"Hello, World!\");\n}"}, + {".rb", "puts 'Hello, World!'"}, + {".php", ""}, + {".sh", "#!/bin/bash\necho 'Hello, World!'"}, + {".md", "# Hello, World!\n\nThis is a markdown file."}, + } + + for i := 0; i < numFiles; i++ { + fileType := fileTypes[i%len(fileTypes)] + filename := fmt.Sprintf("file_%d%s", i, fileType.ext) + + // Create subdirectories for some files + if i%10 == 0 { + subdir := filepath.Join(tempDir, fmt.Sprintf("subdir_%d", i/10)) + if err := os.MkdirAll(subdir, 0o755); err != nil { + cleanup() + return "", nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create subdirectory") + } + filename = filepath.Join(subdir, filename) + } else { + filename = filepath.Join(tempDir, filename) + } + + // Create file with repeated content to make it larger + content := "" + for j := 0; j < 10; j++ { + content += fmt.Sprintf("// Line %d\n%s\n", j, fileType.content) + } + + if err := os.WriteFile(filename, []byte(content), 0o644); err != nil { + cleanup() + return "", nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileWrite, "failed to write benchmark file") + } + } + + return tempDir, cleanup, nil +} + +// runProcessingPipeline runs the processing pipeline similar to main.go. +func runProcessingPipeline(ctx context.Context, files []string, outputFile *os.File, format string, concurrency int, sourceDir string) error { + fileCh := make(chan string, concurrency) + writeCh := make(chan fileproc.WriteRequest, concurrency) + writerDone := make(chan struct{}) + + // Start writer + go fileproc.StartWriter(outputFile, writeCh, writerDone, format, "", "") + + // Get absolute path once + absRoot, err := utils.GetAbsolutePath(sourceDir) + if err != nil { + return utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to get absolute path for source directory") + } + + // Start workers with proper synchronization + var workersDone sync.WaitGroup + for i := 0; i < concurrency; i++ { + workersDone.Add(1) + go func() { + defer workersDone.Done() + for filePath := range fileCh { + fileproc.ProcessFile(filePath, writeCh, absRoot) + } + }() + } + + // Send files to workers + for _, file := range files { + select { + case <-ctx.Done(): + close(fileCh) + workersDone.Wait() // Wait for workers to finish + close(writeCh) + <-writerDone + return ctx.Err() + case fileCh <- file: + } + } + + // Close file channel and wait for workers to finish + close(fileCh) + workersDone.Wait() + + // Now it's safe to close the write channel + close(writeCh) + <-writerDone + + return nil +} + +// PrintBenchmarkResult prints a formatted benchmark result. +func PrintBenchmarkResult(result *BenchmarkResult) { + fmt.Printf("=== %s ===\n", result.Name) + fmt.Printf("Duration: %v\n", result.Duration) + fmt.Printf("Files Processed: %d\n", result.FilesProcessed) + fmt.Printf("Bytes Processed: %d (%.2f MB)\n", result.BytesProcessed, float64(result.BytesProcessed)/1024/1024) + fmt.Printf("Files/sec: %.2f\n", result.FilesPerSecond) + fmt.Printf("Bytes/sec: %.2f MB/sec\n", result.BytesPerSecond/1024/1024) + fmt.Printf("Memory Usage: +%.2f MB (Sys: +%.2f MB)\n", result.MemoryUsage.AllocMB, result.MemoryUsage.SysMB) + fmt.Printf("GC Runs: %d (Pause: %v)\n", result.MemoryUsage.NumGC, time.Duration(result.MemoryUsage.PauseTotalNs)) + fmt.Printf("Goroutines: %d\n", result.CPUUsage.Goroutines) + fmt.Println() +} + +// PrintBenchmarkSuite prints all results in a benchmark suite. +func PrintBenchmarkSuite(suite *BenchmarkSuite) { + fmt.Printf("=== %s ===\n", suite.Name) + for _, result := range suite.Results { + PrintBenchmarkResult(&result) + } +} + +// RunAllBenchmarks runs a comprehensive benchmark suite. +func RunAllBenchmarks(sourceDir string) error { + fmt.Println("Running gibidify benchmark suite...") + + // Load configuration + config.LoadConfig() + + // File collection benchmark + fmt.Println("Running file collection benchmark...") + result, err := FileCollectionBenchmark(sourceDir, 1000) + if err != nil { + return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file collection benchmark failed") + } + PrintBenchmarkResult(result) + + // Format benchmarks + fmt.Println("Running format benchmarks...") + formatSuite, err := FormatBenchmark(sourceDir, []string{"json", "yaml", "markdown"}) + if err != nil { + return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed") + } + PrintBenchmarkSuite(formatSuite) + + // Concurrency benchmarks + fmt.Println("Running concurrency benchmarks...") + concurrencyLevels := []int{1, 2, 4, 8, runtime.NumCPU()} + concurrencySuite, err := ConcurrencyBenchmark(sourceDir, "json", concurrencyLevels) + if err != nil { + return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed") + } + PrintBenchmarkSuite(concurrencySuite) + + return nil +} diff --git a/benchmark/benchmark_test.go b/benchmark/benchmark_test.go new file mode 100644 index 0000000..870ff42 --- /dev/null +++ b/benchmark/benchmark_test.go @@ -0,0 +1,165 @@ +package benchmark + +import ( + "runtime" + "testing" +) + +// TestFileCollectionBenchmark tests the file collection benchmark. +func TestFileCollectionBenchmark(t *testing.T) { + result, err := FileCollectionBenchmark("", 10) + if err != nil { + t.Fatalf("FileCollectionBenchmark failed: %v", err) + } + + if result.Name != "FileCollection" { + t.Errorf("Expected name 'FileCollection', got %s", result.Name) + } + + // Debug information + t.Logf("Files processed: %d", result.FilesProcessed) + t.Logf("Duration: %v", result.Duration) + t.Logf("Bytes processed: %d", result.BytesProcessed) + + if result.FilesProcessed <= 0 { + t.Errorf("Expected files processed > 0, got %d", result.FilesProcessed) + } + + if result.Duration <= 0 { + t.Errorf("Expected duration > 0, got %v", result.Duration) + } +} + +// TestFileProcessingBenchmark tests the file processing benchmark. +func TestFileProcessingBenchmark(t *testing.T) { + result, err := FileProcessingBenchmark("", "json", 2) + if err != nil { + t.Fatalf("FileProcessingBenchmark failed: %v", err) + } + + if result.FilesProcessed <= 0 { + t.Errorf("Expected files processed > 0, got %d", result.FilesProcessed) + } + + if result.Duration <= 0 { + t.Errorf("Expected duration > 0, got %v", result.Duration) + } +} + +// TestConcurrencyBenchmark tests the concurrency benchmark. +func TestConcurrencyBenchmark(t *testing.T) { + concurrencyLevels := []int{1, 2} + suite, err := ConcurrencyBenchmark("", "json", concurrencyLevels) + if err != nil { + t.Fatalf("ConcurrencyBenchmark failed: %v", err) + } + + if suite.Name != "ConcurrencyBenchmark" { + t.Errorf("Expected name 'ConcurrencyBenchmark', got %s", suite.Name) + } + + if len(suite.Results) != len(concurrencyLevels) { + t.Errorf("Expected %d results, got %d", len(concurrencyLevels), len(suite.Results)) + } + + for i, result := range suite.Results { + if result.FilesProcessed <= 0 { + t.Errorf("Result %d: Expected files processed > 0, got %d", i, result.FilesProcessed) + } + } +} + +// TestFormatBenchmark tests the format benchmark. +func TestFormatBenchmark(t *testing.T) { + formats := []string{"json", "yaml"} + suite, err := FormatBenchmark("", formats) + if err != nil { + t.Fatalf("FormatBenchmark failed: %v", err) + } + + if suite.Name != "FormatBenchmark" { + t.Errorf("Expected name 'FormatBenchmark', got %s", suite.Name) + } + + if len(suite.Results) != len(formats) { + t.Errorf("Expected %d results, got %d", len(formats), len(suite.Results)) + } + + for i, result := range suite.Results { + if result.FilesProcessed <= 0 { + t.Errorf("Result %d: Expected files processed > 0, got %d", i, result.FilesProcessed) + } + } +} + +// TestCreateBenchmarkFiles tests the benchmark file creation. +func TestCreateBenchmarkFiles(t *testing.T) { + tempDir, cleanup, err := createBenchmarkFiles(5) + if err != nil { + t.Fatalf("createBenchmarkFiles failed: %v", err) + } + defer cleanup() + + if tempDir == "" { + t.Error("Expected non-empty temp directory") + } + + // Verify files were created + // This is tested indirectly through the benchmark functions +} + +// BenchmarkFileCollection benchmarks the file collection process. +func BenchmarkFileCollection(b *testing.B) { + for i := 0; i < b.N; i++ { + result, err := FileCollectionBenchmark("", 50) + if err != nil { + b.Fatalf("FileCollectionBenchmark failed: %v", err) + } + if result.FilesProcessed <= 0 { + b.Errorf("Expected files processed > 0, got %d", result.FilesProcessed) + } + } +} + +// BenchmarkFileProcessing benchmarks the file processing pipeline. +func BenchmarkFileProcessing(b *testing.B) { + for i := 0; i < b.N; i++ { + result, err := FileProcessingBenchmark("", "json", runtime.NumCPU()) + if err != nil { + b.Fatalf("FileProcessingBenchmark failed: %v", err) + } + if result.FilesProcessed <= 0 { + b.Errorf("Expected files processed > 0, got %d", result.FilesProcessed) + } + } +} + +// BenchmarkConcurrency benchmarks different concurrency levels. +func BenchmarkConcurrency(b *testing.B) { + concurrencyLevels := []int{1, 2, 4} + + for i := 0; i < b.N; i++ { + suite, err := ConcurrencyBenchmark("", "json", concurrencyLevels) + if err != nil { + b.Fatalf("ConcurrencyBenchmark failed: %v", err) + } + if len(suite.Results) != len(concurrencyLevels) { + b.Errorf("Expected %d results, got %d", len(concurrencyLevels), len(suite.Results)) + } + } +} + +// BenchmarkFormats benchmarks different output formats. +func BenchmarkFormats(b *testing.B) { + formats := []string{"json", "yaml", "markdown"} + + for i := 0; i < b.N; i++ { + suite, err := FormatBenchmark("", formats) + if err != nil { + b.Fatalf("FormatBenchmark failed: %v", err) + } + if len(suite.Results) != len(formats) { + b.Errorf("Expected %d results, got %d", len(formats), len(suite.Results)) + } + } +} diff --git a/cli/errors.go b/cli/errors.go new file mode 100644 index 0000000..b1e6ed3 --- /dev/null +++ b/cli/errors.go @@ -0,0 +1,285 @@ +package cli + +import ( + "errors" + "os" + "path/filepath" + "strings" + + "github.com/ivuorinen/gibidify/utils" +) + +// ErrorFormatter handles CLI-friendly error formatting with suggestions. +type ErrorFormatter struct { + ui *UIManager +} + +// NewErrorFormatter creates a new error formatter. +func NewErrorFormatter(ui *UIManager) *ErrorFormatter { + return &ErrorFormatter{ui: ui} +} + +// FormatError formats an error with context and suggestions. +func (ef *ErrorFormatter) FormatError(err error) { + if err == nil { + return + } + + // Handle structured errors + if structErr, ok := err.(*utils.StructuredError); ok { + ef.formatStructuredError(structErr) + return + } + + // Handle common error types + ef.formatGenericError(err) +} + +// formatStructuredError formats a structured error with context and suggestions. +func (ef *ErrorFormatter) formatStructuredError(err *utils.StructuredError) { + // Print main error + ef.ui.PrintError("Error: %s", err.Message) + + // Print error type and code + if err.Type != utils.ErrorTypeUnknown || err.Code != "" { + ef.ui.PrintInfo("Type: %s, Code: %s", err.Type.String(), err.Code) + } + + // Print file path if available + if err.FilePath != "" { + ef.ui.PrintInfo("File: %s", err.FilePath) + } + + // Print context if available + if len(err.Context) > 0 { + ef.ui.PrintInfo("Context:") + for key, value := range err.Context { + ef.ui.printf(" %s: %v\n", key, value) + } + } + + // Provide suggestions based on error type + ef.provideSuggestions(err) +} + +// formatGenericError formats a generic error. +func (ef *ErrorFormatter) formatGenericError(err error) { + ef.ui.PrintError("Error: %s", err.Error()) + ef.provideGenericSuggestions(err) +} + +// provideSuggestions provides helpful suggestions based on the error. +func (ef *ErrorFormatter) provideSuggestions(err *utils.StructuredError) { + switch err.Type { + case utils.ErrorTypeFileSystem: + ef.provideFileSystemSuggestions(err) + case utils.ErrorTypeValidation: + ef.provideValidationSuggestions(err) + case utils.ErrorTypeProcessing: + ef.provideProcessingSuggestions(err) + case utils.ErrorTypeIO: + ef.provideIOSuggestions(err) + default: + ef.provideDefaultSuggestions() + } +} + +// provideFileSystemSuggestions provides suggestions for file system errors. +func (ef *ErrorFormatter) provideFileSystemSuggestions(err *utils.StructuredError) { + filePath := err.FilePath + + ef.ui.PrintWarning("Suggestions:") + + switch err.Code { + case utils.CodeFSAccess: + ef.suggestFileAccess(filePath) + case utils.CodeFSPathResolution: + ef.suggestPathResolution(filePath) + case utils.CodeFSNotFound: + ef.suggestFileNotFound(filePath) + default: + ef.suggestFileSystemGeneral(filePath) + } +} + +// provideValidationSuggestions provides suggestions for validation errors. +func (ef *ErrorFormatter) provideValidationSuggestions(err *utils.StructuredError) { + ef.ui.PrintWarning("Suggestions:") + + switch err.Code { + case utils.CodeValidationFormat: + ef.ui.printf(" • Use a supported format: markdown, json, yaml\n") + ef.ui.printf(" • Example: -format markdown\n") + case utils.CodeValidationSize: + ef.ui.printf(" • Increase file size limit in config.yaml\n") + ef.ui.printf(" • Use smaller files or exclude large files\n") + default: + ef.ui.printf(" • Check your command line arguments\n") + ef.ui.printf(" • Run with --help for usage information\n") + } +} + +// provideProcessingSuggestions provides suggestions for processing errors. +func (ef *ErrorFormatter) provideProcessingSuggestions(err *utils.StructuredError) { + ef.ui.PrintWarning("Suggestions:") + + switch err.Code { + case utils.CodeProcessingCollection: + ef.ui.printf(" • Check if the source directory exists and is readable\n") + ef.ui.printf(" • Verify directory permissions\n") + case utils.CodeProcessingFileRead: + ef.ui.printf(" • Check file permissions\n") + ef.ui.printf(" • Verify the file is not corrupted\n") + default: + ef.ui.printf(" • Try reducing concurrency: -concurrency 1\n") + ef.ui.printf(" • Check available system resources\n") + } +} + +// provideIOSuggestions provides suggestions for I/O errors. +func (ef *ErrorFormatter) provideIOSuggestions(err *utils.StructuredError) { + ef.ui.PrintWarning("Suggestions:") + + switch err.Code { + case utils.CodeIOFileCreate: + ef.ui.printf(" • Check if the destination directory exists\n") + ef.ui.printf(" • Verify write permissions for the output file\n") + ef.ui.printf(" • Ensure sufficient disk space\n") + case utils.CodeIOWrite: + ef.ui.printf(" • Check available disk space\n") + ef.ui.printf(" • Verify write permissions\n") + default: + ef.ui.printf(" • Check file/directory permissions\n") + ef.ui.printf(" • Verify available disk space\n") + } +} + +// Helper methods for specific suggestions +func (ef *ErrorFormatter) suggestFileAccess(filePath string) { + ef.ui.printf(" • Check if the path exists: %s\n", filePath) + ef.ui.printf(" • Verify read permissions\n") + if filePath != "" { + if stat, err := os.Stat(filePath); err == nil { + ef.ui.printf(" • Path exists but may not be accessible\n") + ef.ui.printf(" • Mode: %s\n", stat.Mode()) + } + } +} + +func (ef *ErrorFormatter) suggestPathResolution(filePath string) { + ef.ui.printf(" • Use an absolute path instead of relative\n") + if filePath != "" { + if abs, err := filepath.Abs(filePath); err == nil { + ef.ui.printf(" • Try: %s\n", abs) + } + } +} + +func (ef *ErrorFormatter) suggestFileNotFound(filePath string) { + ef.ui.printf(" • Check if the file/directory exists: %s\n", filePath) + if filePath != "" { + dir := filepath.Dir(filePath) + if entries, err := os.ReadDir(dir); err == nil { + ef.ui.printf(" • Similar files in %s:\n", dir) + count := 0 + for _, entry := range entries { + if count >= 3 { + break + } + if strings.Contains(entry.Name(), filepath.Base(filePath)) { + ef.ui.printf(" - %s\n", entry.Name()) + count++ + } + } + } + } +} + +func (ef *ErrorFormatter) suggestFileSystemGeneral(filePath string) { + ef.ui.printf(" • Check file/directory permissions\n") + ef.ui.printf(" • Verify the path is correct\n") + if filePath != "" { + ef.ui.printf(" • Path: %s\n", filePath) + } +} + +// provideDefaultSuggestions provides general suggestions. +func (ef *ErrorFormatter) provideDefaultSuggestions() { + ef.ui.printf(" • Check your command line arguments\n") + ef.ui.printf(" • Run with --help for usage information\n") + ef.ui.printf(" • Try with -concurrency 1 to reduce resource usage\n") +} + +// provideGenericSuggestions provides suggestions for generic errors. +func (ef *ErrorFormatter) provideGenericSuggestions(err error) { + errorMsg := err.Error() + + ef.ui.PrintWarning("Suggestions:") + + // Pattern matching for common errors + switch { + case strings.Contains(errorMsg, "permission denied"): + ef.ui.printf(" • Check file/directory permissions\n") + ef.ui.printf(" • Try running with appropriate privileges\n") + case strings.Contains(errorMsg, "no such file or directory"): + ef.ui.printf(" • Verify the file/directory path is correct\n") + ef.ui.printf(" • Check if the file exists\n") + case strings.Contains(errorMsg, "flag") && strings.Contains(errorMsg, "redefined"): + ef.ui.printf(" • This is likely a test environment issue\n") + ef.ui.printf(" • Try running the command directly instead of in tests\n") + default: + ef.provideDefaultSuggestions() + } +} + +// CLI-specific error types + +// CLIMissingSourceError represents a missing source directory error. +type CLIMissingSourceError struct{} + +func (e CLIMissingSourceError) Error() string { + return "source directory is required" +} + +// NewCLIMissingSourceError creates a new CLI missing source error with suggestions. +func NewCLIMissingSourceError() error { + return &CLIMissingSourceError{} +} + +// IsUserError checks if an error is a user input error that should be handled gracefully. +func IsUserError(err error) bool { + if err == nil { + return false + } + + // Check for specific user error types + var cliErr *CLIMissingSourceError + if errors.As(err, &cliErr) { + return true + } + + // Check for structured errors that are user-facing + if structErr, ok := err.(*utils.StructuredError); ok { + return structErr.Type == utils.ErrorTypeValidation || + structErr.Code == utils.CodeValidationFormat || + structErr.Code == utils.CodeValidationSize + } + + // Check error message patterns + errMsg := err.Error() + userErrorPatterns := []string{ + "flag", + "usage", + "invalid argument", + "file not found", + "permission denied", + } + + for _, pattern := range userErrorPatterns { + if strings.Contains(strings.ToLower(errMsg), pattern) { + return true + } + } + + return false +} diff --git a/cli/flags.go b/cli/flags.go new file mode 100644 index 0000000..d18ab3e --- /dev/null +++ b/cli/flags.go @@ -0,0 +1,93 @@ +package cli + +import ( + "flag" + "runtime" + + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/utils" +) + +// Flags holds CLI flags values. +type Flags struct { + SourceDir string + Destination string + Prefix string + Suffix string + Concurrency int + Format string + NoColors bool + NoProgress bool + Verbose bool +} + +var ( + flagsParsed bool + globalFlags *Flags +) + +// ParseFlags parses and validates CLI flags. +func ParseFlags() (*Flags, error) { + if flagsParsed { + return globalFlags, nil + } + + flags := &Flags{} + + flag.StringVar(&flags.SourceDir, "source", "", "Source directory to scan recursively") + flag.StringVar(&flags.Destination, "destination", "", "Output file to write aggregated code") + flag.StringVar(&flags.Prefix, "prefix", "", "Text to add at the beginning of the output file") + flag.StringVar(&flags.Suffix, "suffix", "", "Text to add at the end of the output file") + flag.StringVar(&flags.Format, "format", "markdown", "Output format (json, markdown, yaml)") + flag.IntVar(&flags.Concurrency, "concurrency", runtime.NumCPU(), + "Number of concurrent workers (default: number of CPU cores)") + flag.BoolVar(&flags.NoColors, "no-colors", false, "Disable colored output") + flag.BoolVar(&flags.NoProgress, "no-progress", false, "Disable progress bars") + flag.BoolVar(&flags.Verbose, "verbose", false, "Enable verbose output") + + flag.Parse() + + if err := flags.validate(); err != nil { + return nil, err + } + + if err := flags.setDefaultDestination(); err != nil { + return nil, err + } + + flagsParsed = true + globalFlags = flags + return flags, nil +} + +// validate validates the CLI flags. +func (f *Flags) validate() error { + if f.SourceDir == "" { + return NewCLIMissingSourceError() + } + + // Validate output format + if err := config.ValidateOutputFormat(f.Format); err != nil { + return err + } + + // Validate concurrency + if err := config.ValidateConcurrency(f.Concurrency); err != nil { + return err + } + + return nil +} + +// setDefaultDestination sets the default destination if not provided. +func (f *Flags) setDefaultDestination() error { + if f.Destination == "" { + absRoot, err := utils.GetAbsolutePath(f.SourceDir) + if err != nil { + return err + } + baseName := utils.GetBaseName(absRoot) + f.Destination = baseName + "." + f.Format + } + return nil +} diff --git a/cli/processor.go b/cli/processor.go new file mode 100644 index 0000000..5c2cd1d --- /dev/null +++ b/cli/processor.go @@ -0,0 +1,210 @@ +package cli + +import ( + "context" + "os" + "sync" + + "github.com/sirupsen/logrus" + + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/fileproc" + "github.com/ivuorinen/gibidify/utils" +) + +// Processor handles the main file processing logic. +type Processor struct { + flags *Flags + backpressure *fileproc.BackpressureManager + ui *UIManager +} + +// NewProcessor creates a new processor with the given flags. +func NewProcessor(flags *Flags) *Processor { + ui := NewUIManager() + + // Configure UI based on flags + ui.SetColorOutput(!flags.NoColors) + ui.SetProgressOutput(!flags.NoProgress) + + return &Processor{ + flags: flags, + backpressure: fileproc.NewBackpressureManager(), + ui: ui, + } +} + +// Process executes the main file processing workflow. +func (p *Processor) Process(ctx context.Context) error { + // Configure file type registry + p.configureFileTypes() + + // Print startup info with colors + p.ui.PrintHeader("🚀 Starting gibidify") + p.ui.PrintInfo("Format: %s", p.flags.Format) + p.ui.PrintInfo("Source: %s", p.flags.SourceDir) + p.ui.PrintInfo("Destination: %s", p.flags.Destination) + p.ui.PrintInfo("Workers: %d", p.flags.Concurrency) + + // Collect files with progress indication + p.ui.PrintInfo("📁 Collecting files...") + files, err := p.collectFiles() + if err != nil { + return err + } + + // Show collection results + p.ui.PrintSuccess("Found %d files to process", len(files)) + + // Process files + return p.processFiles(ctx, files) +} + +// configureFileTypes configures the file type registry. +func (p *Processor) configureFileTypes() { + if config.GetFileTypesEnabled() { + fileproc.ConfigureFromSettings( + config.GetCustomImageExtensions(), + config.GetCustomBinaryExtensions(), + config.GetCustomLanguages(), + config.GetDisabledImageExtensions(), + config.GetDisabledBinaryExtensions(), + config.GetDisabledLanguageExtensions(), + ) + } +} + +// collectFiles collects all files to be processed. +func (p *Processor) collectFiles() ([]string, error) { + files, err := fileproc.CollectFiles(p.flags.SourceDir) + if err != nil { + return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "error collecting files") + } + logrus.Infof("Found %d files to process", len(files)) + return files, nil +} + +// processFiles processes the collected files. +func (p *Processor) processFiles(ctx context.Context, files []string) error { + outFile, err := p.createOutputFile() + if err != nil { + return err + } + defer func() { + utils.LogError("Error closing output file", outFile.Close()) + }() + + // Initialize back-pressure and channels + p.ui.PrintInfo("⚙️ Initializing processing...") + p.backpressure.LogBackpressureInfo() + fileCh, writeCh := p.backpressure.CreateChannels() + writerDone := make(chan struct{}) + + // Start writer + go fileproc.StartWriter(outFile, writeCh, writerDone, p.flags.Format, p.flags.Prefix, p.flags.Suffix) + + // Start workers + var wg sync.WaitGroup + p.startWorkers(ctx, &wg, fileCh, writeCh) + + // Start progress bar + p.ui.StartProgress(len(files), "📝 Processing files") + + // Send files to workers + if err := p.sendFiles(ctx, files, fileCh); err != nil { + p.ui.FinishProgress() + return err + } + + // Wait for completion + p.waitForCompletion(&wg, writeCh, writerDone) + p.ui.FinishProgress() + + p.logFinalStats() + p.ui.PrintSuccess("Processing completed. Output saved to %s", p.flags.Destination) + return nil +} + +// createOutputFile creates the output file. +func (p *Processor) createOutputFile() (*os.File, error) { + outFile, err := os.Create(p.flags.Destination) // #nosec G304 - destination is user-provided CLI arg + if err != nil { + return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create output file").WithFilePath(p.flags.Destination) + } + return outFile, nil +} + +// startWorkers starts the worker goroutines. +func (p *Processor) startWorkers(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) { + for range p.flags.Concurrency { + wg.Add(1) + go p.worker(ctx, wg, fileCh, writeCh) + } +} + +// worker is the worker goroutine function. +func (p *Processor) worker(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) { + defer wg.Done() + for { + select { + case <-ctx.Done(): + return + case filePath, ok := <-fileCh: + if !ok { + return + } + p.processFile(filePath, writeCh) + } + } +} + +// processFile processes a single file. +func (p *Processor) processFile(filePath string, writeCh chan fileproc.WriteRequest) { + absRoot, err := utils.GetAbsolutePath(p.flags.SourceDir) + if err != nil { + utils.LogError("Failed to get absolute path", err) + return + } + fileproc.ProcessFile(filePath, writeCh, absRoot) + + // Update progress bar + p.ui.UpdateProgress(1) +} + +// sendFiles sends files to the worker channels with back-pressure handling. +func (p *Processor) sendFiles(ctx context.Context, files []string, fileCh chan string) error { + defer close(fileCh) + + for _, fp := range files { + // Check if we should apply back-pressure + if p.backpressure.ShouldApplyBackpressure(ctx) { + p.backpressure.ApplyBackpressure(ctx) + } + + // Wait for channel space if needed + p.backpressure.WaitForChannelSpace(ctx, fileCh, nil) + + select { + case <-ctx.Done(): + return ctx.Err() + case fileCh <- fp: + } + } + return nil +} + +// waitForCompletion waits for all workers to complete. +func (p *Processor) waitForCompletion(wg *sync.WaitGroup, writeCh chan fileproc.WriteRequest, writerDone chan struct{}) { + wg.Wait() + close(writeCh) + <-writerDone +} + +// logFinalStats logs the final back-pressure statistics. +func (p *Processor) logFinalStats() { + stats := p.backpressure.GetStats() + if stats.Enabled { + logrus.Infof("Back-pressure stats: processed=%d files, memory=%dMB/%dMB", + stats.FilesProcessed, stats.CurrentMemoryUsage/1024/1024, stats.MaxMemoryUsage/1024/1024) + } +} diff --git a/cli/ui.go b/cli/ui.go new file mode 100644 index 0000000..1c7bc27 --- /dev/null +++ b/cli/ui.go @@ -0,0 +1,173 @@ +package cli + +import ( + "fmt" + "io" + "os" + "time" + + "github.com/fatih/color" + "github.com/schollz/progressbar/v3" +) + +// UIManager handles CLI user interface elements. +type UIManager struct { + enableColors bool + enableProgress bool + progressBar *progressbar.ProgressBar + output io.Writer +} + +// NewUIManager creates a new UI manager. +func NewUIManager() *UIManager { + return &UIManager{ + enableColors: isColorTerminal(), + enableProgress: isInteractiveTerminal(), + output: os.Stderr, // Progress and colors go to stderr + } +} + +// SetColorOutput enables or disables colored output. +func (ui *UIManager) SetColorOutput(enabled bool) { + ui.enableColors = enabled + color.NoColor = !enabled +} + +// SetProgressOutput enables or disables progress bars. +func (ui *UIManager) SetProgressOutput(enabled bool) { + ui.enableProgress = enabled +} + +// StartProgress initializes a progress bar for file processing. +func (ui *UIManager) StartProgress(total int, description string) { + if !ui.enableProgress || total <= 0 { + return + } + + ui.progressBar = progressbar.NewOptions(total, + progressbar.OptionSetWriter(ui.output), + progressbar.OptionSetDescription(description), + progressbar.OptionSetTheme(progressbar.Theme{ + Saucer: color.GreenString("█"), + SaucerHead: color.GreenString("█"), + SaucerPadding: " ", + BarStart: "[", + BarEnd: "]", + }), + progressbar.OptionShowCount(), + progressbar.OptionShowIts(), + progressbar.OptionSetWidth(40), + progressbar.OptionThrottle(100*time.Millisecond), + progressbar.OptionOnCompletion(func() { + _, _ = fmt.Fprint(ui.output, "\n") + }), + progressbar.OptionSetRenderBlankState(true), + ) +} + +// UpdateProgress increments the progress bar. +func (ui *UIManager) UpdateProgress(increment int) { + if ui.progressBar != nil { + _ = ui.progressBar.Add(increment) + } +} + +// FinishProgress completes the progress bar. +func (ui *UIManager) FinishProgress() { + if ui.progressBar != nil { + _ = ui.progressBar.Finish() + ui.progressBar = nil + } +} + +// PrintSuccess prints a success message in green. +func (ui *UIManager) PrintSuccess(format string, args ...interface{}) { + if ui.enableColors { + color.Green("✓ "+format, args...) + } else { + ui.printf("✓ "+format+"\n", args...) + } +} + +// PrintError prints an error message in red. +func (ui *UIManager) PrintError(format string, args ...interface{}) { + if ui.enableColors { + color.Red("✗ "+format, args...) + } else { + ui.printf("✗ "+format+"\n", args...) + } +} + +// PrintWarning prints a warning message in yellow. +func (ui *UIManager) PrintWarning(format string, args ...interface{}) { + if ui.enableColors { + color.Yellow("⚠ "+format, args...) + } else { + ui.printf("⚠ "+format+"\n", args...) + } +} + +// PrintInfo prints an info message in blue. +func (ui *UIManager) PrintInfo(format string, args ...interface{}) { + if ui.enableColors { + color.Blue("ℹ "+format, args...) + } else { + ui.printf("ℹ "+format+"\n", args...) + } +} + +// PrintHeader prints a header message in bold. +func (ui *UIManager) PrintHeader(format string, args ...interface{}) { + if ui.enableColors { + _, _ = color.New(color.Bold).Fprintf(ui.output, format+"\n", args...) + } else { + ui.printf(format+"\n", args...) + } +} + +// isColorTerminal checks if the terminal supports colors. +func isColorTerminal() bool { + // Check common environment variables + term := os.Getenv("TERM") + if term == "" || term == "dumb" { + return false + } + + // Check for CI environments that typically don't support colors + if os.Getenv("CI") != "" { + // GitHub Actions supports colors + if os.Getenv("GITHUB_ACTIONS") == "true" { + return true + } + // Most other CI systems don't + return false + } + + // Check if NO_COLOR is set (https://no-color.org/) + if os.Getenv("NO_COLOR") != "" { + return false + } + + // Check if FORCE_COLOR is set + if os.Getenv("FORCE_COLOR") != "" { + return true + } + + // Default to true for interactive terminals + return isInteractiveTerminal() +} + +// isInteractiveTerminal checks if we're running in an interactive terminal. +func isInteractiveTerminal() bool { + // Check if stderr is a terminal (where we output progress/colors) + fileInfo, err := os.Stderr.Stat() + if err != nil { + return false + } + return (fileInfo.Mode() & os.ModeCharDevice) != 0 +} + +// printf is a helper that ignores printf errors (for UI output). +func (ui *UIManager) printf(format string, args ...interface{}) { + _, _ = fmt.Fprintf(ui.output, format, args...) +} diff --git a/cmd/benchmark/main.go b/cmd/benchmark/main.go new file mode 100644 index 0000000..e5be0ea --- /dev/null +++ b/cmd/benchmark/main.go @@ -0,0 +1,145 @@ +// Package main provides a CLI for running gibidify benchmarks. +package main + +import ( + "flag" + "fmt" + "os" + "runtime" + "strings" + + "github.com/ivuorinen/gibidify/benchmark" + "github.com/ivuorinen/gibidify/utils" +) + +var ( + sourceDir = flag.String("source", "", "Source directory to benchmark (uses temp files if empty)") + benchmarkType = flag.String("type", "all", "Benchmark type: all, collection, processing, concurrency, format") + format = flag.String("format", "json", "Output format for processing benchmarks") + concurrency = flag.Int("concurrency", runtime.NumCPU(), "Concurrency level for processing benchmarks") + concurrencyList = flag.String("concurrency-list", "1,2,4,8", "Comma-separated list of concurrency levels") + formatList = flag.String("format-list", "json,yaml,markdown", "Comma-separated list of formats") + numFiles = flag.Int("files", 100, "Number of files to create for benchmarks") +) + +func main() { + flag.Parse() + + if err := runBenchmarks(); err != nil { + fmt.Fprintf(os.Stderr, "Benchmark failed: %v\n", err) + os.Exit(1) + } +} + +func runBenchmarks() error { + fmt.Printf("Running gibidify benchmarks...\n") + fmt.Printf("Source: %s\n", getSourceDescription()) + fmt.Printf("Type: %s\n", *benchmarkType) + fmt.Printf("CPU cores: %d\n", runtime.NumCPU()) + fmt.Println() + + switch *benchmarkType { + case "all": + return benchmark.RunAllBenchmarks(*sourceDir) + case "collection": + return runCollectionBenchmark() + case "processing": + return runProcessingBenchmark() + case "concurrency": + return runConcurrencyBenchmark() + case "format": + return runFormatBenchmark() + default: + return utils.NewValidationError(utils.CodeValidationFormat, "invalid benchmark type: "+*benchmarkType) + } +} + +func runCollectionBenchmark() error { + fmt.Println("Running file collection benchmark...") + result, err := benchmark.FileCollectionBenchmark(*sourceDir, *numFiles) + if err != nil { + return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file collection benchmark failed") + } + benchmark.PrintBenchmarkResult(result) + return nil +} + +func runProcessingBenchmark() error { + fmt.Printf("Running file processing benchmark (format: %s, concurrency: %d)...\n", *format, *concurrency) + result, err := benchmark.FileProcessingBenchmark(*sourceDir, *format, *concurrency) + if err != nil { + return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file processing benchmark failed") + } + benchmark.PrintBenchmarkResult(result) + return nil +} + +func runConcurrencyBenchmark() error { + concurrencyLevels, err := parseConcurrencyList(*concurrencyList) + if err != nil { + return utils.WrapError(err, utils.ErrorTypeValidation, utils.CodeValidationFormat, "invalid concurrency list") + } + + fmt.Printf("Running concurrency benchmark (format: %s, levels: %v)...\n", *format, concurrencyLevels) + suite, err := benchmark.ConcurrencyBenchmark(*sourceDir, *format, concurrencyLevels) + if err != nil { + return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed") + } + benchmark.PrintBenchmarkSuite(suite) + return nil +} + +func runFormatBenchmark() error { + formats := parseFormatList(*formatList) + fmt.Printf("Running format benchmark (formats: %v)...\n", formats) + suite, err := benchmark.FormatBenchmark(*sourceDir, formats) + if err != nil { + return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed") + } + benchmark.PrintBenchmarkSuite(suite) + return nil +} + +func getSourceDescription() string { + if *sourceDir == "" { + return fmt.Sprintf("temporary files (%d files)", *numFiles) + } + return *sourceDir +} + +func parseConcurrencyList(list string) ([]int, error) { + parts := strings.Split(list, ",") + levels := make([]int, 0, len(parts)) + + for _, part := range parts { + part = strings.TrimSpace(part) + var level int + if _, err := fmt.Sscanf(part, "%d", &level); err != nil { + return nil, utils.WrapErrorf(err, utils.ErrorTypeValidation, utils.CodeValidationFormat, "invalid concurrency level: %s", part) + } + if level <= 0 { + return nil, utils.NewValidationError(utils.CodeValidationFormat, "concurrency level must be positive: "+part) + } + levels = append(levels, level) + } + + if len(levels) == 0 { + return nil, utils.NewValidationError(utils.CodeValidationFormat, "no valid concurrency levels found") + } + + return levels, nil +} + +func parseFormatList(list string) []string { + parts := strings.Split(list, ",") + formats := make([]string, 0, len(parts)) + + for _, part := range parts { + part = strings.TrimSpace(part) + if part != "" { + formats = append(formats, part) + } + } + + return formats +} diff --git a/config.example.yaml b/config.example.yaml new file mode 100644 index 0000000..fad9a43 --- /dev/null +++ b/config.example.yaml @@ -0,0 +1,84 @@ +# gibidify configuration example +# Place this file in one of these locations: +# - $XDG_CONFIG_HOME/gibidify/config.yaml +# - $HOME/.config/gibidify/config.yaml +# - Current directory (if no gibidify.yaml output file exists) + +# File size limit in bytes (default: 5MB) +fileSizeLimit: 5242880 + +# Directories to ignore during scanning +ignoreDirectories: + - vendor + - node_modules + - .git + - dist + - build + - target + - bower_components + - cache + - tmp + - .next + - .nuxt + +# FileType registry configuration +fileTypes: + # Enable/disable file type detection entirely (default: true) + enabled: true + + # Add custom image extensions + customImageExtensions: + - .webp + - .avif + - .heic + - .jxl + + # Add custom binary extensions + customBinaryExtensions: + - .custom + - .proprietary + - .blob + + # Add custom language mappings + customLanguages: + .zig: zig + .odin: odin + .v: vlang + .grain: grain + .gleam: gleam + .roc: roc + .janet: janet + .fennel: fennel + .wast: wast + .wat: wat + + # Disable specific default image extensions + disabledImageExtensions: + - .bmp # Disable bitmap support + - .tif # Disable TIFF support + + # Disable specific default binary extensions + disabledBinaryExtensions: + - .exe # Don't treat executables as binary + - .dll # Don't treat DLL files as binary + + # Disable specific default language extensions + disabledLanguageExtensions: + - .bat # Don't detect batch files + - .cmd # Don't detect command files + +# Maximum concurrency (optional) +maxConcurrency: 16 + +# Supported output formats (optional validation) +supportedFormats: + - json + - yaml + - markdown + +# File patterns for filtering (optional) +filePatterns: + - "*.go" + - "*.py" + - "*.js" + - "*.ts" \ No newline at end of file diff --git a/config/config.go b/config/config.go index 13ec049..5e7fd80 100644 --- a/config/config.go +++ b/config/config.go @@ -2,11 +2,24 @@ package config import ( + "fmt" "os" "path/filepath" + "strings" "github.com/sirupsen/logrus" "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/utils" +) + +const ( + // DefaultFileSizeLimit is the default maximum file size (5MB). + DefaultFileSizeLimit = 5242880 + // MinFileSizeLimit is the minimum allowed file size limit (1KB). + MinFileSizeLimit = 1024 + // MaxFileSizeLimit is the maximum allowed file size limit (100MB). + MaxFileSizeLimit = 104857600 ) // LoadConfig reads configuration from a YAML file. @@ -23,23 +36,51 @@ func LoadConfig() { } else if home, err := os.UserHomeDir(); err == nil { viper.AddConfigPath(filepath.Join(home, ".config", "gibidify")) } - viper.AddConfigPath(".") + // Only add current directory if no config file named gibidify.yaml exists + // to avoid conflicts with the project's output file + if _, err := os.Stat("gibidify.yaml"); os.IsNotExist(err) { + viper.AddConfigPath(".") + } if err := viper.ReadInConfig(); err != nil { logrus.Infof("Config file not found, using default values: %v", err) setDefaultConfig() } else { logrus.Infof("Using config file: %s", viper.ConfigFileUsed()) + // Validate configuration after loading + if err := ValidateConfig(); err != nil { + logrus.Warnf("Configuration validation failed: %v", err) + logrus.Info("Falling back to default configuration") + // Reset viper and set defaults when validation fails + viper.Reset() + setDefaultConfig() + } } } // setDefaultConfig sets default configuration values. func setDefaultConfig() { - viper.SetDefault("fileSizeLimit", 5242880) // 5 MB + viper.SetDefault("fileSizeLimit", DefaultFileSizeLimit) // Default ignored directories. viper.SetDefault("ignoreDirectories", []string{ "vendor", "node_modules", ".git", "dist", "build", "target", "bower_components", "cache", "tmp", }) + + // FileTypeRegistry defaults + viper.SetDefault("fileTypes.enabled", true) + viper.SetDefault("fileTypes.customImageExtensions", []string{}) + viper.SetDefault("fileTypes.customBinaryExtensions", []string{}) + viper.SetDefault("fileTypes.customLanguages", map[string]string{}) + viper.SetDefault("fileTypes.disabledImageExtensions", []string{}) + viper.SetDefault("fileTypes.disabledBinaryExtensions", []string{}) + viper.SetDefault("fileTypes.disabledLanguageExtensions", []string{}) + + // Back-pressure and memory management defaults + viper.SetDefault("backpressure.enabled", true) + viper.SetDefault("backpressure.maxPendingFiles", 1000) // Max files in file channel buffer + viper.SetDefault("backpressure.maxPendingWrites", 100) // Max writes in write channel buffer + viper.SetDefault("backpressure.maxMemoryUsage", 104857600) // 100MB max memory usage + viper.SetDefault("backpressure.memoryCheckInterval", 1000) // Check memory every 1000 files } // GetFileSizeLimit returns the file size limit from configuration. @@ -51,3 +92,303 @@ func GetFileSizeLimit() int64 { func GetIgnoredDirectories() []string { return viper.GetStringSlice("ignoreDirectories") } + +// ValidateConfig validates the loaded configuration. +func ValidateConfig() error { + var validationErrors []string + + // Validate file size limit + fileSizeLimit := viper.GetInt64("fileSizeLimit") + if fileSizeLimit < MinFileSizeLimit { + validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) is below minimum (%d)", fileSizeLimit, MinFileSizeLimit)) + } + if fileSizeLimit > MaxFileSizeLimit { + validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) exceeds maximum (%d)", fileSizeLimit, MaxFileSizeLimit)) + } + + // Validate ignore directories + ignoreDirectories := viper.GetStringSlice("ignoreDirectories") + for i, dir := range ignoreDirectories { + dir = strings.TrimSpace(dir) + if dir == "" { + validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] is empty", i)) + continue + } + if strings.Contains(dir, "/") { + validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) contains path separator - only directory names are allowed", i, dir)) + } + if strings.HasPrefix(dir, ".") && dir != ".git" && dir != ".vscode" && dir != ".idea" { + validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) starts with dot - this may cause unexpected behavior", i, dir)) + } + } + + // Validate supported output formats if configured + if viper.IsSet("supportedFormats") { + supportedFormats := viper.GetStringSlice("supportedFormats") + validFormats := map[string]bool{"json": true, "yaml": true, "markdown": true} + for i, format := range supportedFormats { + format = strings.ToLower(strings.TrimSpace(format)) + if !validFormats[format] { + validationErrors = append(validationErrors, fmt.Sprintf("supportedFormats[%d] (%s) is not a valid format (json, yaml, markdown)", i, format)) + } + } + } + + // Validate concurrency settings if configured + if viper.IsSet("maxConcurrency") { + maxConcurrency := viper.GetInt("maxConcurrency") + if maxConcurrency < 1 { + validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) must be at least 1", maxConcurrency)) + } + if maxConcurrency > 100 { + validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) is unreasonably high (max 100)", maxConcurrency)) + } + } + + // Validate file patterns if configured + if viper.IsSet("filePatterns") { + filePatterns := viper.GetStringSlice("filePatterns") + for i, pattern := range filePatterns { + pattern = strings.TrimSpace(pattern) + if pattern == "" { + validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] is empty", i)) + continue + } + // Basic validation - patterns should contain at least one alphanumeric character + if !strings.ContainsAny(pattern, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") { + validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] (%s) appears to be invalid", i, pattern)) + } + } + } + + // Validate FileTypeRegistry configuration + if viper.IsSet("fileTypes.customImageExtensions") { + customImages := viper.GetStringSlice("fileTypes.customImageExtensions") + for i, ext := range customImages { + ext = strings.TrimSpace(ext) + if ext == "" { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] is empty", i)) + continue + } + if !strings.HasPrefix(ext, ".") { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] (%s) must start with a dot", i, ext)) + } + } + } + + if viper.IsSet("fileTypes.customBinaryExtensions") { + customBinary := viper.GetStringSlice("fileTypes.customBinaryExtensions") + for i, ext := range customBinary { + ext = strings.TrimSpace(ext) + if ext == "" { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] is empty", i)) + continue + } + if !strings.HasPrefix(ext, ".") { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] (%s) must start with a dot", i, ext)) + } + } + } + + if viper.IsSet("fileTypes.customLanguages") { + customLangs := viper.GetStringMapString("fileTypes.customLanguages") + for ext, lang := range customLangs { + ext = strings.TrimSpace(ext) + lang = strings.TrimSpace(lang) + if ext == "" { + validationErrors = append(validationErrors, "fileTypes.customLanguages contains empty extension key") + continue + } + if !strings.HasPrefix(ext, ".") { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages extension (%s) must start with a dot", ext)) + } + if lang == "" { + validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages[%s] has empty language value", ext)) + } + } + } + + // Validate back-pressure configuration + if viper.IsSet("backpressure.maxPendingFiles") { + maxPendingFiles := viper.GetInt("backpressure.maxPendingFiles") + if maxPendingFiles < 1 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) must be at least 1", maxPendingFiles)) + } + if maxPendingFiles > 100000 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) is unreasonably high (max 100000)", maxPendingFiles)) + } + } + + if viper.IsSet("backpressure.maxPendingWrites") { + maxPendingWrites := viper.GetInt("backpressure.maxPendingWrites") + if maxPendingWrites < 1 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) must be at least 1", maxPendingWrites)) + } + if maxPendingWrites > 10000 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) is unreasonably high (max 10000)", maxPendingWrites)) + } + } + + if viper.IsSet("backpressure.maxMemoryUsage") { + maxMemoryUsage := viper.GetInt64("backpressure.maxMemoryUsage") + if maxMemoryUsage < 1048576 { // 1MB minimum + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) must be at least 1MB (1048576 bytes)", maxMemoryUsage)) + } + if maxMemoryUsage > 10737418240 { // 10GB maximum + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) is unreasonably high (max 10GB)", maxMemoryUsage)) + } + } + + if viper.IsSet("backpressure.memoryCheckInterval") { + interval := viper.GetInt("backpressure.memoryCheckInterval") + if interval < 1 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) must be at least 1", interval)) + } + if interval > 100000 { + validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) is unreasonably high (max 100000)", interval)) + } + } + + if len(validationErrors) > 0 { + return utils.NewStructuredError( + utils.ErrorTypeConfiguration, + utils.CodeConfigValidation, + "configuration validation failed: "+strings.Join(validationErrors, "; "), + ).WithContext("validation_errors", validationErrors) + } + + return nil +} + +// GetMaxConcurrency returns the maximum concurrency limit from configuration. +func GetMaxConcurrency() int { + return viper.GetInt("maxConcurrency") +} + +// GetSupportedFormats returns the supported output formats from configuration. +func GetSupportedFormats() []string { + return viper.GetStringSlice("supportedFormats") +} + +// GetFilePatterns returns the file patterns from configuration. +func GetFilePatterns() []string { + return viper.GetStringSlice("filePatterns") +} + +// IsValidFormat checks if a format is supported. +func IsValidFormat(format string) bool { + format = strings.ToLower(strings.TrimSpace(format)) + validFormats := map[string]bool{"json": true, "yaml": true, "markdown": true} + return validFormats[format] +} + +// ValidateFileSize checks if a file size is within the configured limit. +func ValidateFileSize(size int64) error { + limit := GetFileSizeLimit() + if size > limit { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeValidationSize, + fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", size, limit), + ).WithContext("file_size", size).WithContext("size_limit", limit) + } + return nil +} + +// ValidateOutputFormat checks if an output format is valid. +func ValidateOutputFormat(format string) error { + if !IsValidFormat(format) { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeValidationFormat, + fmt.Sprintf("unsupported output format: %s (supported: json, yaml, markdown)", format), + ).WithContext("format", format) + } + return nil +} + +// ValidateConcurrency checks if a concurrency level is valid. +func ValidateConcurrency(concurrency int) error { + if concurrency < 1 { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeValidationFormat, + fmt.Sprintf("concurrency (%d) must be at least 1", concurrency), + ).WithContext("concurrency", concurrency) + } + + if viper.IsSet("maxConcurrency") { + maxConcurrency := GetMaxConcurrency() + if concurrency > maxConcurrency { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeValidationFormat, + fmt.Sprintf("concurrency (%d) exceeds maximum (%d)", concurrency, maxConcurrency), + ).WithContext("concurrency", concurrency).WithContext("max_concurrency", maxConcurrency) + } + } + + return nil +} + +// GetFileTypesEnabled returns whether file type detection is enabled. +func GetFileTypesEnabled() bool { + return viper.GetBool("fileTypes.enabled") +} + +// GetCustomImageExtensions returns custom image extensions from configuration. +func GetCustomImageExtensions() []string { + return viper.GetStringSlice("fileTypes.customImageExtensions") +} + +// GetCustomBinaryExtensions returns custom binary extensions from configuration. +func GetCustomBinaryExtensions() []string { + return viper.GetStringSlice("fileTypes.customBinaryExtensions") +} + +// GetCustomLanguages returns custom language mappings from configuration. +func GetCustomLanguages() map[string]string { + return viper.GetStringMapString("fileTypes.customLanguages") +} + +// GetDisabledImageExtensions returns disabled image extensions from configuration. +func GetDisabledImageExtensions() []string { + return viper.GetStringSlice("fileTypes.disabledImageExtensions") +} + +// GetDisabledBinaryExtensions returns disabled binary extensions from configuration. +func GetDisabledBinaryExtensions() []string { + return viper.GetStringSlice("fileTypes.disabledBinaryExtensions") +} + +// GetDisabledLanguageExtensions returns disabled language extensions from configuration. +func GetDisabledLanguageExtensions() []string { + return viper.GetStringSlice("fileTypes.disabledLanguageExtensions") +} + +// Back-pressure configuration getters + +// GetBackpressureEnabled returns whether back-pressure management is enabled. +func GetBackpressureEnabled() bool { + return viper.GetBool("backpressure.enabled") +} + +// GetMaxPendingFiles returns the maximum number of files that can be pending in the file channel. +func GetMaxPendingFiles() int { + return viper.GetInt("backpressure.maxPendingFiles") +} + +// GetMaxPendingWrites returns the maximum number of writes that can be pending in the write channel. +func GetMaxPendingWrites() int { + return viper.GetInt("backpressure.maxPendingWrites") +} + +// GetMaxMemoryUsage returns the maximum memory usage in bytes before back-pressure kicks in. +func GetMaxMemoryUsage() int64 { + return viper.GetInt64("backpressure.maxMemoryUsage") +} + +// GetMemoryCheckInterval returns how often to check memory usage (in number of files processed). +func GetMemoryCheckInterval() int { + return viper.GetInt("backpressure.memoryCheckInterval") +} diff --git a/config/config_filetype_test.go b/config/config_filetype_test.go new file mode 100644 index 0000000..0065bfa --- /dev/null +++ b/config/config_filetype_test.go @@ -0,0 +1,174 @@ +package config + +import ( + "testing" + + "github.com/spf13/viper" +) + +// TestFileTypeRegistryConfig tests the FileTypeRegistry configuration functionality. +func TestFileTypeRegistryConfig(t *testing.T) { + // Test default values + t.Run("DefaultValues", func(t *testing.T) { + viper.Reset() + setDefaultConfig() + + if !GetFileTypesEnabled() { + t.Error("Expected file types to be enabled by default") + } + + if len(GetCustomImageExtensions()) != 0 { + t.Error("Expected custom image extensions to be empty by default") + } + + if len(GetCustomBinaryExtensions()) != 0 { + t.Error("Expected custom binary extensions to be empty by default") + } + + if len(GetCustomLanguages()) != 0 { + t.Error("Expected custom languages to be empty by default") + } + + if len(GetDisabledImageExtensions()) != 0 { + t.Error("Expected disabled image extensions to be empty by default") + } + + if len(GetDisabledBinaryExtensions()) != 0 { + t.Error("Expected disabled binary extensions to be empty by default") + } + + if len(GetDisabledLanguageExtensions()) != 0 { + t.Error("Expected disabled language extensions to be empty by default") + } + }) + + // Test configuration setting and getting + t.Run("ConfigurationSetGet", func(t *testing.T) { + viper.Reset() + + // Set test values + viper.Set("fileTypes.enabled", false) + viper.Set("fileTypes.customImageExtensions", []string{".webp", ".avif"}) + viper.Set("fileTypes.customBinaryExtensions", []string{".custom", ".mybin"}) + viper.Set("fileTypes.customLanguages", map[string]string{ + ".zig": "zig", + ".v": "vlang", + }) + viper.Set("fileTypes.disabledImageExtensions", []string{".gif", ".bmp"}) + viper.Set("fileTypes.disabledBinaryExtensions", []string{".exe", ".dll"}) + viper.Set("fileTypes.disabledLanguageExtensions", []string{".rb", ".pl"}) + + // Test getter functions + if GetFileTypesEnabled() { + t.Error("Expected file types to be disabled") + } + + customImages := GetCustomImageExtensions() + expectedImages := []string{".webp", ".avif"} + if len(customImages) != len(expectedImages) { + t.Errorf("Expected %d custom image extensions, got %d", len(expectedImages), len(customImages)) + } + for i, ext := range expectedImages { + if customImages[i] != ext { + t.Errorf("Expected custom image extension %s, got %s", ext, customImages[i]) + } + } + + customBinary := GetCustomBinaryExtensions() + expectedBinary := []string{".custom", ".mybin"} + if len(customBinary) != len(expectedBinary) { + t.Errorf("Expected %d custom binary extensions, got %d", len(expectedBinary), len(customBinary)) + } + for i, ext := range expectedBinary { + if customBinary[i] != ext { + t.Errorf("Expected custom binary extension %s, got %s", ext, customBinary[i]) + } + } + + customLangs := GetCustomLanguages() + expectedLangs := map[string]string{ + ".zig": "zig", + ".v": "vlang", + } + if len(customLangs) != len(expectedLangs) { + t.Errorf("Expected %d custom languages, got %d", len(expectedLangs), len(customLangs)) + } + for ext, lang := range expectedLangs { + if customLangs[ext] != lang { + t.Errorf("Expected custom language %s -> %s, got %s", ext, lang, customLangs[ext]) + } + } + + disabledImages := GetDisabledImageExtensions() + expectedDisabledImages := []string{".gif", ".bmp"} + if len(disabledImages) != len(expectedDisabledImages) { + t.Errorf("Expected %d disabled image extensions, got %d", len(expectedDisabledImages), len(disabledImages)) + } + + disabledBinary := GetDisabledBinaryExtensions() + expectedDisabledBinary := []string{".exe", ".dll"} + if len(disabledBinary) != len(expectedDisabledBinary) { + t.Errorf("Expected %d disabled binary extensions, got %d", len(expectedDisabledBinary), len(disabledBinary)) + } + + disabledLangs := GetDisabledLanguageExtensions() + expectedDisabledLangs := []string{".rb", ".pl"} + if len(disabledLangs) != len(expectedDisabledLangs) { + t.Errorf("Expected %d disabled language extensions, got %d", len(expectedDisabledLangs), len(disabledLangs)) + } + }) + + // Test validation + t.Run("ValidationSuccess", func(t *testing.T) { + viper.Reset() + setDefaultConfig() + + // Set valid configuration + viper.Set("fileTypes.customImageExtensions", []string{".webp", ".avif"}) + viper.Set("fileTypes.customBinaryExtensions", []string{".custom"}) + viper.Set("fileTypes.customLanguages", map[string]string{ + ".zig": "zig", + ".v": "vlang", + }) + + err := ValidateConfig() + if err != nil { + t.Errorf("Expected validation to pass with valid config, got error: %v", err) + } + }) + + t.Run("ValidationFailure", func(t *testing.T) { + // Test invalid custom image extensions + viper.Reset() + setDefaultConfig() + viper.Set("fileTypes.customImageExtensions", []string{"", "webp"}) // Empty and missing dot + + err := ValidateConfig() + if err == nil { + t.Error("Expected validation to fail with invalid custom image extensions") + } + + // Test invalid custom binary extensions + viper.Reset() + setDefaultConfig() + viper.Set("fileTypes.customBinaryExtensions", []string{"custom"}) // Missing dot + + err = ValidateConfig() + if err == nil { + t.Error("Expected validation to fail with invalid custom binary extensions") + } + + // Test invalid custom languages + viper.Reset() + setDefaultConfig() + viper.Set("fileTypes.customLanguages", map[string]string{ + "zig": "zig", // Missing dot in extension + ".v": "", // Empty language + }) + + err = ValidateConfig() + if err == nil { + t.Error("Expected validation to fail with invalid custom languages") + } + }) +} diff --git a/config/config_test.go b/config/config_test.go index a0b7c8e..55fc55c 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -2,40 +2,38 @@ package config_test import ( "os" - "path/filepath" + "strings" "testing" - configpkg "github.com/ivuorinen/gibidify/config" "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/testutil" + "github.com/ivuorinen/gibidify/utils" +) + +const ( + defaultFileSizeLimit = 5242880 + testFileSizeLimit = 123456 ) // TestDefaultConfig verifies that if no config file is found, // the default configuration values are correctly set. func TestDefaultConfig(t *testing.T) { // Create a temporary directory to ensure no config file is present. - tmpDir, err := os.MkdirTemp("", "gibidify_config_test_default") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } - defer func() { - if err := os.RemoveAll(tmpDir); err != nil { - t.Fatalf("cleanup failed: %v", err) - } - }() + tmpDir := t.TempDir() // Point Viper to the temp directory with no config file. originalConfigPaths := viper.ConfigFileUsed() - viper.Reset() - viper.AddConfigPath(tmpDir) - configpkg.LoadConfig() + testutil.ResetViperConfig(t, tmpDir) // Check defaults - defaultSizeLimit := configpkg.GetFileSizeLimit() - if defaultSizeLimit != 5242880 { + defaultSizeLimit := config.GetFileSizeLimit() + if defaultSizeLimit != defaultFileSizeLimit { t.Errorf("Expected default file size limit of 5242880, got %d", defaultSizeLimit) } - ignoredDirs := configpkg.GetIgnoredDirectories() + ignoredDirs := config.GetIgnoredDirectories() if len(ignoredDirs) == 0 { t.Errorf("Expected some default ignored directories, got none") } @@ -47,15 +45,7 @@ func TestDefaultConfig(t *testing.T) { // TestLoadConfigFile verifies that when a valid config file is present, // viper loads the specified values correctly. func TestLoadConfigFile(t *testing.T) { - tmpDir, err := os.MkdirTemp("", "gibidify_config_test_file") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } - defer func() { - if err := os.RemoveAll(tmpDir); err != nil { - t.Fatalf("cleanup failed: %v", err) - } - }() + tmpDir := t.TempDir() // Prepare a minimal config file configContent := []byte(`--- @@ -65,22 +55,17 @@ ignoreDirectories: - "testdir2" `) - configPath := filepath.Join(tmpDir, "config.yaml") - if err := os.WriteFile(configPath, configContent, 0644); err != nil { - t.Fatalf("Failed to write config file: %v", err) - } + testutil.CreateTestFile(t, tmpDir, "config.yaml", configContent) // Reset viper and point to the new config path viper.Reset() viper.AddConfigPath(tmpDir) // Force Viper to read our config file - if err := viper.ReadInConfig(); err != nil { - t.Fatalf("Could not read config file: %v", err) - } + testutil.MustSucceed(t, viper.ReadInConfig(), "reading config file") // Validate loaded data - if got := viper.GetInt64("fileSizeLimit"); got != 123456 { + if got := viper.GetInt64("fileSizeLimit"); got != testFileSizeLimit { t.Errorf("Expected fileSizeLimit=123456, got %d", got) } @@ -89,3 +74,283 @@ ignoreDirectories: t.Errorf("Expected [\"testdir1\", \"testdir2\"], got %v", ignored) } } + +// TestValidateConfig tests the configuration validation functionality. +func TestValidateConfig(t *testing.T) { + tests := []struct { + name string + config map[string]interface{} + wantErr bool + errContains string + }{ + { + name: "valid default config", + config: map[string]interface{}{ + "fileSizeLimit": config.DefaultFileSizeLimit, + "ignoreDirectories": []string{"node_modules", ".git"}, + }, + wantErr: false, + }, + { + name: "file size limit too small", + config: map[string]interface{}{ + "fileSizeLimit": config.MinFileSizeLimit - 1, + }, + wantErr: true, + errContains: "fileSizeLimit", + }, + { + name: "file size limit too large", + config: map[string]interface{}{ + "fileSizeLimit": config.MaxFileSizeLimit + 1, + }, + wantErr: true, + errContains: "fileSizeLimit", + }, + { + name: "empty ignore directory", + config: map[string]interface{}{ + "ignoreDirectories": []string{"node_modules", "", ".git"}, + }, + wantErr: true, + errContains: "ignoreDirectories", + }, + { + name: "ignore directory with path separator", + config: map[string]interface{}{ + "ignoreDirectories": []string{"node_modules", "src/build", ".git"}, + }, + wantErr: true, + errContains: "path separator", + }, + { + name: "invalid supported format", + config: map[string]interface{}{ + "supportedFormats": []string{"json", "xml", "yaml"}, + }, + wantErr: true, + errContains: "not a valid format", + }, + { + name: "invalid max concurrency", + config: map[string]interface{}{ + "maxConcurrency": 0, + }, + wantErr: true, + errContains: "maxConcurrency", + }, + { + name: "valid comprehensive config", + config: map[string]interface{}{ + "fileSizeLimit": config.DefaultFileSizeLimit, + "ignoreDirectories": []string{"node_modules", ".git", ".vscode"}, + "supportedFormats": []string{"json", "yaml", "markdown"}, + "maxConcurrency": 8, + "filePatterns": []string{"*.go", "*.js", "*.py"}, + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Reset viper for each test + viper.Reset() + + // Set test configuration + for key, value := range tt.config { + viper.Set(key, value) + } + + // Load defaults for missing values + config.LoadConfig() + + err := config.ValidateConfig() + + if tt.wantErr { + if err == nil { + t.Errorf("Expected error but got none") + return + } + if tt.errContains != "" && !strings.Contains(err.Error(), tt.errContains) { + t.Errorf("Expected error to contain %q, got %q", tt.errContains, err.Error()) + } + + // Check that it's a structured error + var structErr *utils.StructuredError + if !errorAs(err, &structErr) { + t.Errorf("Expected structured error, got %T", err) + return + } + if structErr.Type != utils.ErrorTypeConfiguration { + t.Errorf("Expected error type %v, got %v", utils.ErrorTypeConfiguration, structErr.Type) + } + if structErr.Code != utils.CodeConfigValidation { + t.Errorf("Expected error code %v, got %v", utils.CodeConfigValidation, structErr.Code) + } + } else { + if err != nil { + t.Errorf("Expected no error but got: %v", err) + } + } + }) + } +} + +// TestValidationFunctions tests individual validation functions. +func TestValidationFunctions(t *testing.T) { + t.Run("IsValidFormat", func(t *testing.T) { + tests := []struct { + format string + valid bool + }{ + {"json", true}, + {"yaml", true}, + {"markdown", true}, + {"JSON", true}, + {"xml", false}, + {"txt", false}, + {"", false}, + {" json ", true}, + } + + for _, tt := range tests { + result := config.IsValidFormat(tt.format) + if result != tt.valid { + t.Errorf("IsValidFormat(%q) = %v, want %v", tt.format, result, tt.valid) + } + } + }) + + t.Run("ValidateFileSize", func(t *testing.T) { + viper.Reset() + viper.Set("fileSizeLimit", config.DefaultFileSizeLimit) + + tests := []struct { + name string + size int64 + wantErr bool + }{ + {"size within limit", config.DefaultFileSizeLimit - 1, false}, + {"size at limit", config.DefaultFileSizeLimit, false}, + {"size exceeds limit", config.DefaultFileSizeLimit + 1, true}, + {"zero size", 0, false}, + } + + for _, tt := range tests { + err := config.ValidateFileSize(tt.size) + if (err != nil) != tt.wantErr { + t.Errorf("%s: ValidateFileSize(%d) error = %v, wantErr %v", tt.name, tt.size, err, tt.wantErr) + } + } + }) + + t.Run("ValidateOutputFormat", func(t *testing.T) { + tests := []struct { + format string + wantErr bool + }{ + {"json", false}, + {"yaml", false}, + {"markdown", false}, + {"xml", true}, + {"txt", true}, + {"", true}, + } + + for _, tt := range tests { + err := config.ValidateOutputFormat(tt.format) + if (err != nil) != tt.wantErr { + t.Errorf("ValidateOutputFormat(%q) error = %v, wantErr %v", tt.format, err, tt.wantErr) + } + } + }) + + t.Run("ValidateConcurrency", func(t *testing.T) { + tests := []struct { + name string + concurrency int + maxConcurrency int + setMax bool + wantErr bool + }{ + {"valid concurrency", 4, 0, false, false}, + {"minimum concurrency", 1, 0, false, false}, + {"zero concurrency", 0, 0, false, true}, + {"negative concurrency", -1, 0, false, true}, + {"concurrency within max", 4, 8, true, false}, + {"concurrency exceeds max", 16, 8, true, true}, + } + + for _, tt := range tests { + viper.Reset() + if tt.setMax { + viper.Set("maxConcurrency", tt.maxConcurrency) + } + + err := config.ValidateConcurrency(tt.concurrency) + if (err != nil) != tt.wantErr { + t.Errorf("%s: ValidateConcurrency(%d) error = %v, wantErr %v", tt.name, tt.concurrency, err, tt.wantErr) + } + } + }) +} + +// TestLoadConfigWithValidation tests that invalid config files fall back to defaults. +func TestLoadConfigWithValidation(t *testing.T) { + // Create a temporary config file with invalid content + configContent := ` +fileSizeLimit: 100 +ignoreDirectories: + - node_modules + - "" + - .git +` + + tempDir := t.TempDir() + configFile := tempDir + "/config.yaml" + + err := os.WriteFile(configFile, []byte(configContent), 0o644) + if err != nil { + t.Fatalf("Failed to write config file: %v", err) + } + + // Reset viper and set config path + viper.Reset() + viper.AddConfigPath(tempDir) + + // This should load the config but validation should fail and fall back to defaults + config.LoadConfig() + + // Should have fallen back to defaults due to validation failure + if config.GetFileSizeLimit() != int64(config.DefaultFileSizeLimit) { + t.Errorf("Expected default file size limit after validation failure, got %d", config.GetFileSizeLimit()) + } + if containsString(config.GetIgnoredDirectories(), "") { + t.Errorf("Expected ignored directories not to contain empty string after validation failure, got %v", config.GetIgnoredDirectories()) + } +} + +// Helper functions + +func containsString(slice []string, item string) bool { + for _, s := range slice { + if s == item { + return true + } + } + return false +} + +func errorAs(err error, target interface{}) bool { + if err == nil { + return false + } + if structErr, ok := err.(*utils.StructuredError); ok { + if ptr, ok := target.(**utils.StructuredError); ok { + *ptr = structErr + return true + } + } + return false +} diff --git a/fileproc/backpressure.go b/fileproc/backpressure.go new file mode 100644 index 0000000..733a271 --- /dev/null +++ b/fileproc/backpressure.go @@ -0,0 +1,196 @@ +// Package fileproc provides back-pressure management for memory optimization. +package fileproc + +import ( + "context" + "runtime" + "sync" + "sync/atomic" + "time" + + "github.com/sirupsen/logrus" + + "github.com/ivuorinen/gibidify/config" +) + +// BackpressureManager manages memory usage and applies back-pressure when needed. +type BackpressureManager struct { + enabled bool + maxMemoryUsage int64 + memoryCheckInterval int + maxPendingFiles int + maxPendingWrites int + filesProcessed int64 + mu sync.RWMutex + memoryWarningLogged bool + lastMemoryCheck time.Time +} + +// NewBackpressureManager creates a new back-pressure manager with configuration. +func NewBackpressureManager() *BackpressureManager { + return &BackpressureManager{ + enabled: config.GetBackpressureEnabled(), + maxMemoryUsage: config.GetMaxMemoryUsage(), + memoryCheckInterval: config.GetMemoryCheckInterval(), + maxPendingFiles: config.GetMaxPendingFiles(), + maxPendingWrites: config.GetMaxPendingWrites(), + lastMemoryCheck: time.Now(), + } +} + +// CreateChannels creates properly sized channels based on back-pressure configuration. +func (bp *BackpressureManager) CreateChannels() (chan string, chan WriteRequest) { + var fileCh chan string + var writeCh chan WriteRequest + + if bp.enabled { + // Use buffered channels with configured limits + fileCh = make(chan string, bp.maxPendingFiles) + writeCh = make(chan WriteRequest, bp.maxPendingWrites) + logrus.Debugf("Created buffered channels: files=%d, writes=%d", bp.maxPendingFiles, bp.maxPendingWrites) + } else { + // Use unbuffered channels (default behavior) + fileCh = make(chan string) + writeCh = make(chan WriteRequest) + logrus.Debug("Created unbuffered channels (back-pressure disabled)") + } + + return fileCh, writeCh +} + +// ShouldApplyBackpressure checks if back-pressure should be applied. +func (bp *BackpressureManager) ShouldApplyBackpressure(ctx context.Context) bool { + if !bp.enabled { + return false + } + + // Check if we should evaluate memory usage + filesProcessed := atomic.AddInt64(&bp.filesProcessed, 1) + if int(filesProcessed)%bp.memoryCheckInterval != 0 { + return false + } + + // Get current memory usage + var m runtime.MemStats + runtime.ReadMemStats(&m) + currentMemory := int64(m.Alloc) + + bp.mu.Lock() + defer bp.mu.Unlock() + + bp.lastMemoryCheck = time.Now() + + // Check if we're over the memory limit + if currentMemory > bp.maxMemoryUsage { + if !bp.memoryWarningLogged { + logrus.Warnf("Memory usage (%d bytes) exceeds limit (%d bytes), applying back-pressure", + currentMemory, bp.maxMemoryUsage) + bp.memoryWarningLogged = true + } + return true + } + + // Reset warning flag if we're back under the limit + if bp.memoryWarningLogged && currentMemory < bp.maxMemoryUsage*8/10 { // 80% of limit + logrus.Infof("Memory usage normalized (%d bytes), removing back-pressure", currentMemory) + bp.memoryWarningLogged = false + } + + return false +} + +// ApplyBackpressure applies back-pressure by triggering garbage collection and adding delay. +func (bp *BackpressureManager) ApplyBackpressure(ctx context.Context) { + if !bp.enabled { + return + } + + // Force garbage collection to free up memory + runtime.GC() + + // Add a small delay to allow memory to be freed + select { + case <-ctx.Done(): + return + case <-time.After(10 * time.Millisecond): + // Small delay to allow GC to complete + } + + // Log memory usage after GC + var m runtime.MemStats + runtime.ReadMemStats(&m) + logrus.Debugf("Applied back-pressure: memory after GC = %d bytes", m.Alloc) +} + +// GetStats returns current back-pressure statistics. +func (bp *BackpressureManager) GetStats() BackpressureStats { + bp.mu.RLock() + defer bp.mu.RUnlock() + + var m runtime.MemStats + runtime.ReadMemStats(&m) + + return BackpressureStats{ + Enabled: bp.enabled, + FilesProcessed: atomic.LoadInt64(&bp.filesProcessed), + CurrentMemoryUsage: int64(m.Alloc), + MaxMemoryUsage: bp.maxMemoryUsage, + MemoryWarningActive: bp.memoryWarningLogged, + LastMemoryCheck: bp.lastMemoryCheck, + MaxPendingFiles: bp.maxPendingFiles, + MaxPendingWrites: bp.maxPendingWrites, + } +} + +// BackpressureStats represents back-pressure manager statistics. +type BackpressureStats struct { + Enabled bool `json:"enabled"` + FilesProcessed int64 `json:"files_processed"` + CurrentMemoryUsage int64 `json:"current_memory_usage"` + MaxMemoryUsage int64 `json:"max_memory_usage"` + MemoryWarningActive bool `json:"memory_warning_active"` + LastMemoryCheck time.Time `json:"last_memory_check"` + MaxPendingFiles int `json:"max_pending_files"` + MaxPendingWrites int `json:"max_pending_writes"` +} + +// WaitForChannelSpace waits for space in channels if they're getting full. +func (bp *BackpressureManager) WaitForChannelSpace(ctx context.Context, fileCh chan string, writeCh chan WriteRequest) { + if !bp.enabled { + return + } + + // Check if file channel is getting full (>90% capacity) + if len(fileCh) > bp.maxPendingFiles*9/10 { + logrus.Debugf("File channel is %d%% full, waiting for space", len(fileCh)*100/bp.maxPendingFiles) + + // Wait a bit for the channel to drain + select { + case <-ctx.Done(): + return + case <-time.After(5 * time.Millisecond): + } + } + + // Check if write channel is getting full (>90% capacity) + if len(writeCh) > bp.maxPendingWrites*9/10 { + logrus.Debugf("Write channel is %d%% full, waiting for space", len(writeCh)*100/bp.maxPendingWrites) + + // Wait a bit for the channel to drain + select { + case <-ctx.Done(): + return + case <-time.After(5 * time.Millisecond): + } + } +} + +// LogBackpressureInfo logs back-pressure configuration and status. +func (bp *BackpressureManager) LogBackpressureInfo() { + if bp.enabled { + logrus.Infof("Back-pressure enabled: maxMemory=%dMB, fileBuffer=%d, writeBuffer=%d, checkInterval=%d", + bp.maxMemoryUsage/1024/1024, bp.maxPendingFiles, bp.maxPendingWrites, bp.memoryCheckInterval) + } else { + logrus.Info("Back-pressure disabled") + } +} diff --git a/fileproc/cache.go b/fileproc/cache.go new file mode 100644 index 0000000..ab3ad60 --- /dev/null +++ b/fileproc/cache.go @@ -0,0 +1,127 @@ +package fileproc + +// getNormalizedExtension efficiently extracts and normalizes the file extension with caching. +func (r *FileTypeRegistry) getNormalizedExtension(filename string) string { + // Try cache first (read lock) + r.cacheMutex.RLock() + if ext, exists := r.extCache[filename]; exists { + r.cacheMutex.RUnlock() + return ext + } + r.cacheMutex.RUnlock() + + // Compute normalized extension + ext := normalizeExtension(filename) + + // Cache the result (write lock) + r.cacheMutex.Lock() + // Check cache size and clean if needed + if len(r.extCache) >= r.maxCacheSize*2 { + r.clearExtCache() + r.stats.CacheEvictions++ + } + r.extCache[filename] = ext + r.cacheMutex.Unlock() + + return ext +} + +// getFileTypeResult gets cached file type detection result or computes it. +func (r *FileTypeRegistry) getFileTypeResult(filename string) FileTypeResult { + ext := r.getNormalizedExtension(filename) + + // Update statistics + r.updateStats(func() { + r.stats.TotalLookups++ + }) + + // Try cache first (read lock) + r.cacheMutex.RLock() + if result, exists := r.resultCache[ext]; exists { + r.cacheMutex.RUnlock() + r.updateStats(func() { + r.stats.CacheHits++ + }) + return result + } + r.cacheMutex.RUnlock() + + // Cache miss + r.updateStats(func() { + r.stats.CacheMisses++ + }) + + // Compute result + result := FileTypeResult{ + Extension: ext, + IsImage: r.imageExts[ext], + IsBinary: r.binaryExts[ext], + Language: r.languageMap[ext], + } + + // Handle special cases for binary detection (like .DS_Store) + if !result.IsBinary && isSpecialFile(filename, r.binaryExts) { + result.IsBinary = true + } + + // Cache the result (write lock) + r.cacheMutex.Lock() + if len(r.resultCache) >= r.maxCacheSize { + r.clearResultCache() + r.stats.CacheEvictions++ + } + r.resultCache[ext] = result + r.cacheMutex.Unlock() + + return result +} + +// clearExtCache clears half of the extension cache (LRU-like behavior). +func (r *FileTypeRegistry) clearExtCache() { + r.clearCache(&r.extCache, r.maxCacheSize) +} + +// clearResultCache clears half of the result cache. +func (r *FileTypeRegistry) clearResultCache() { + newCache := make(map[string]FileTypeResult, r.maxCacheSize) + count := 0 + for k, v := range r.resultCache { + if count >= r.maxCacheSize/2 { + break + } + newCache[k] = v + count++ + } + r.resultCache = newCache +} + +// clearCache is a generic cache clearing function. +func (r *FileTypeRegistry) clearCache(cache *map[string]string, maxSize int) { + newCache := make(map[string]string, maxSize) + count := 0 + for k, v := range *cache { + if count >= maxSize/2 { + break + } + newCache[k] = v + count++ + } + *cache = newCache +} + +// invalidateCache clears both caches when the registry is modified. +func (r *FileTypeRegistry) invalidateCache() { + r.cacheMutex.Lock() + defer r.cacheMutex.Unlock() + + r.extCache = make(map[string]string, r.maxCacheSize) + r.resultCache = make(map[string]FileTypeResult, r.maxCacheSize) + r.stats.CacheEvictions++ +} + +// updateStats safely updates statistics. +func (r *FileTypeRegistry) updateStats(fn func()) { + r.cacheMutex.Lock() + fn() + r.cacheMutex.Unlock() +} diff --git a/fileproc/collector.go b/fileproc/collector.go index 72b4d2d..6091c98 100644 --- a/fileproc/collector.go +++ b/fileproc/collector.go @@ -4,6 +4,6 @@ package fileproc // CollectFiles scans the given root directory using the default walker (ProdWalker) // and returns a slice of file paths. func CollectFiles(root string) ([]string, error) { - var w Walker = ProdWalker{} + w := NewProdWalker() return w.Walk(root) } diff --git a/fileproc/collector_test.go b/fileproc/collector_test.go index 2437403..55740c1 100644 --- a/fileproc/collector_test.go +++ b/fileproc/collector_test.go @@ -4,7 +4,7 @@ import ( "os" "testing" - fileproc "github.com/ivuorinen/gibidify/fileproc" + "github.com/ivuorinen/gibidify/fileproc" ) func TestCollectFilesWithFakeWalker(t *testing.T) { diff --git a/fileproc/config.go b/fileproc/config.go new file mode 100644 index 0000000..24d59e0 --- /dev/null +++ b/fileproc/config.go @@ -0,0 +1,40 @@ +package fileproc + +import "strings" + +// ApplyCustomExtensions applies custom extensions from configuration. +func (r *FileTypeRegistry) ApplyCustomExtensions(customImages, customBinary []string, customLanguages map[string]string) { + // Add custom image extensions + r.addExtensions(customImages, r.AddImageExtension) + + // Add custom binary extensions + r.addExtensions(customBinary, r.AddBinaryExtension) + + // Add custom language mappings + for ext, lang := range customLanguages { + if ext != "" && lang != "" { + r.AddLanguageMapping(strings.ToLower(ext), lang) + } + } +} + +// addExtensions is a helper to add multiple extensions. +func (r *FileTypeRegistry) addExtensions(extensions []string, adder func(string)) { + for _, ext := range extensions { + if ext != "" { + adder(strings.ToLower(ext)) + } + } +} + +// ConfigureFromSettings applies configuration settings to the registry. +// This function is called from main.go after config is loaded to avoid circular imports. +func ConfigureFromSettings( + customImages, customBinary []string, + customLanguages map[string]string, + disabledImages, disabledBinary, disabledLanguages []string, +) { + registry := GetDefaultRegistry() + registry.ApplyCustomExtensions(customImages, customBinary, customLanguages) + registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages) +} diff --git a/fileproc/detection.go b/fileproc/detection.go new file mode 100644 index 0000000..f4e2929 --- /dev/null +++ b/fileproc/detection.go @@ -0,0 +1,99 @@ +package fileproc + +import "strings" + +// Package-level detection functions + +// IsImage checks if the file extension indicates an image file. +func IsImage(filename string) bool { + return getRegistry().IsImage(filename) +} + +// IsBinary checks if the file extension indicates a binary file. +func IsBinary(filename string) bool { + return getRegistry().IsBinary(filename) +} + +// GetLanguage returns the language identifier for the given filename based on its extension. +func GetLanguage(filename string) string { + return getRegistry().GetLanguage(filename) +} + +// Registry methods for detection + +// IsImage checks if the file extension indicates an image file. +func (r *FileTypeRegistry) IsImage(filename string) bool { + result := r.getFileTypeResult(filename) + return result.IsImage +} + +// IsBinary checks if the file extension indicates a binary file. +func (r *FileTypeRegistry) IsBinary(filename string) bool { + result := r.getFileTypeResult(filename) + return result.IsBinary +} + +// GetLanguage returns the language identifier for the given filename based on its extension. +func (r *FileTypeRegistry) GetLanguage(filename string) string { + if len(filename) < minExtensionLength { + return "" + } + result := r.getFileTypeResult(filename) + return result.Language +} + +// Extension management methods + +// AddImageExtension adds a new image extension to the registry. +func (r *FileTypeRegistry) AddImageExtension(ext string) { + r.addExtension(ext, r.imageExts) +} + +// AddBinaryExtension adds a new binary extension to the registry. +func (r *FileTypeRegistry) AddBinaryExtension(ext string) { + r.addExtension(ext, r.binaryExts) +} + +// AddLanguageMapping adds a new language mapping to the registry. +func (r *FileTypeRegistry) AddLanguageMapping(ext, language string) { + r.languageMap[strings.ToLower(ext)] = language + r.invalidateCache() +} + +// addExtension is a helper to add extensions to a map. +func (r *FileTypeRegistry) addExtension(ext string, target map[string]bool) { + target[strings.ToLower(ext)] = true + r.invalidateCache() +} + +// removeExtension is a helper to remove extensions from a map. +func (r *FileTypeRegistry) removeExtension(ext string, target map[string]bool) { + delete(target, strings.ToLower(ext)) +} + +// DisableExtensions removes specified extensions from the registry. +func (r *FileTypeRegistry) DisableExtensions(disabledImages, disabledBinary, disabledLanguages []string) { + // Disable image extensions + for _, ext := range disabledImages { + if ext != "" { + r.removeExtension(ext, r.imageExts) + } + } + + // Disable binary extensions + for _, ext := range disabledBinary { + if ext != "" { + r.removeExtension(ext, r.binaryExts) + } + } + + // Disable language extensions + for _, ext := range disabledLanguages { + if ext != "" { + delete(r.languageMap, strings.ToLower(ext)) + } + } + + // Invalidate cache after all modifications + r.invalidateCache() +} diff --git a/fileproc/extensions.go b/fileproc/extensions.go new file mode 100644 index 0000000..602f107 --- /dev/null +++ b/fileproc/extensions.go @@ -0,0 +1,161 @@ +package fileproc + +// getImageExtensions returns the default image file extensions. +func getImageExtensions() map[string]bool { + return map[string]bool{ + ".png": true, + ".jpg": true, + ".jpeg": true, + ".gif": true, + ".bmp": true, + ".tiff": true, + ".tif": true, + ".svg": true, + ".webp": true, + ".ico": true, + } +} + +// getBinaryExtensions returns the default binary file extensions. +func getBinaryExtensions() map[string]bool { + return map[string]bool{ + // Executables and libraries + ".exe": true, + ".dll": true, + ".so": true, + ".dylib": true, + ".bin": true, + ".o": true, + ".a": true, + ".lib": true, + + // Compiled bytecode + ".jar": true, + ".class": true, + ".pyc": true, + ".pyo": true, + + // Data files + ".dat": true, + ".db": true, + ".sqlite": true, + ".ds_store": true, + + // Documents + ".pdf": true, + + // Archives + ".zip": true, + ".tar": true, + ".gz": true, + ".bz2": true, + ".xz": true, + ".7z": true, + ".rar": true, + + // Fonts + ".ttf": true, + ".otf": true, + ".woff": true, + ".woff2": true, + + // Media files + ".mp3": true, + ".mp4": true, + ".avi": true, + ".mov": true, + ".wmv": true, + ".flv": true, + ".webm": true, + ".ogg": true, + ".wav": true, + ".flac": true, + } +} + +// getLanguageMap returns the default language mappings. +func getLanguageMap() map[string]string { + return map[string]string{ + // Systems programming + ".go": "go", + ".c": "c", + ".cpp": "cpp", + ".h": "c", + ".hpp": "cpp", + ".rs": "rust", + + // Scripting languages + ".py": "python", + ".rb": "ruby", + ".pl": "perl", + ".lua": "lua", + ".php": "php", + + // Web technologies + ".js": "javascript", + ".ts": "typescript", + ".jsx": "javascript", + ".tsx": "typescript", + ".html": "html", + ".htm": "html", + ".css": "css", + ".scss": "scss", + ".sass": "sass", + ".less": "less", + ".vue": "vue", + + // JVM languages + ".java": "java", + ".scala": "scala", + ".kt": "kotlin", + ".clj": "clojure", + + // .NET languages + ".cs": "csharp", + ".vb": "vbnet", + ".fs": "fsharp", + + // Apple platforms + ".swift": "swift", + ".m": "objc", + ".mm": "objcpp", + + // Shell scripts + ".sh": "bash", + ".bash": "bash", + ".zsh": "zsh", + ".fish": "fish", + ".ps1": "powershell", + ".bat": "batch", + ".cmd": "batch", + + // Data formats + ".json": "json", + ".yaml": "yaml", + ".yml": "yaml", + ".toml": "toml", + ".xml": "xml", + ".sql": "sql", + + // Documentation + ".md": "markdown", + ".rst": "rst", + ".tex": "latex", + + // Functional languages + ".hs": "haskell", + ".ml": "ocaml", + ".mli": "ocaml", + ".elm": "elm", + ".ex": "elixir", + ".exs": "elixir", + ".erl": "erlang", + ".hrl": "erlang", + + // Other languages + ".r": "r", + ".dart": "dart", + ".nim": "nim", + ".nims": "nim", + } +} diff --git a/fileproc/fake_walker.go b/fileproc/fake_walker.go index fc156fd..f809717 100644 --- a/fileproc/fake_walker.go +++ b/fileproc/fake_walker.go @@ -3,8 +3,8 @@ package fileproc // FakeWalker implements Walker for testing purposes. type FakeWalker struct { - Files []string Err error + Files []string } // Walk returns predetermined file paths or an error, depending on FakeWalker's configuration. diff --git a/fileproc/file_filters.go b/fileproc/file_filters.go new file mode 100644 index 0000000..995d98f --- /dev/null +++ b/fileproc/file_filters.go @@ -0,0 +1,55 @@ +package fileproc + +import ( + "os" + + "github.com/ivuorinen/gibidify/config" +) + +// FileFilter defines filtering criteria for files and directories. +type FileFilter struct { + ignoredDirs []string + sizeLimit int64 +} + +// NewFileFilter creates a new file filter with current configuration. +func NewFileFilter() *FileFilter { + return &FileFilter{ + ignoredDirs: config.GetIgnoredDirectories(), + sizeLimit: config.GetFileSizeLimit(), + } +} + +// shouldSkipEntry determines if an entry should be skipped based on ignore rules and filters. +func (f *FileFilter) shouldSkipEntry(entry os.DirEntry, fullPath string, rules []ignoreRule) bool { + if entry.IsDir() { + return f.shouldSkipDirectory(entry) + } + + if f.shouldSkipFile(entry, fullPath) { + return true + } + + return matchesIgnoreRules(fullPath, rules) +} + +// shouldSkipDirectory checks if a directory should be skipped based on the ignored directories list. +func (f *FileFilter) shouldSkipDirectory(entry os.DirEntry) bool { + for _, d := range f.ignoredDirs { + if entry.Name() == d { + return true + } + } + return false +} + +// shouldSkipFile checks if a file should be skipped based on size limit and file type. +func (f *FileFilter) shouldSkipFile(entry os.DirEntry, fullPath string) bool { + // Check if file exceeds the configured size limit. + if info, err := entry.Info(); err == nil && info.Size() > f.sizeLimit { + return true + } + + // Apply the default filter to ignore binary and image files. + return IsBinary(fullPath) || IsImage(fullPath) +} diff --git a/fileproc/filetypes_test.go b/fileproc/filetypes_test.go new file mode 100644 index 0000000..3053068 --- /dev/null +++ b/fileproc/filetypes_test.go @@ -0,0 +1,827 @@ +package fileproc + +import ( + "fmt" + "sync" + "testing" +) + +// TestFileTypeRegistry_ModificationMethods tests the modification methods of FileTypeRegistry. +func TestFileTypeRegistry_ModificationMethods(t *testing.T) { + // Create a new registry instance for testing + registry := &FileTypeRegistry{ + imageExts: make(map[string]bool), + binaryExts: make(map[string]bool), + languageMap: make(map[string]string), + } + + // Test AddImageExtension + t.Run("AddImageExtension", func(t *testing.T) { + // Add a new image extension + registry.AddImageExtension(".webp") + if !registry.IsImage("test.webp") { + t.Errorf("Expected .webp to be recognized as image after adding") + } + + // Test case insensitive addition + registry.AddImageExtension(".AVIF") + if !registry.IsImage("test.avif") { + t.Errorf("Expected .avif to be recognized as image after adding .AVIF") + } + if !registry.IsImage("test.AVIF") { + t.Errorf("Expected .AVIF to be recognized as image") + } + + // Test with dot prefix + registry.AddImageExtension("heic") + if registry.IsImage("test.heic") { + t.Errorf("Expected extension without dot to not work") + } + + // Test with proper dot prefix + registry.AddImageExtension(".heic") + if !registry.IsImage("test.heic") { + t.Errorf("Expected .heic to be recognized as image") + } + }) + + // Test AddBinaryExtension + t.Run("AddBinaryExtension", func(t *testing.T) { + // Add a new binary extension + registry.AddBinaryExtension(".custom") + if !registry.IsBinary("test.custom") { + t.Errorf("Expected .custom to be recognized as binary after adding") + } + + // Test case insensitive addition + registry.AddBinaryExtension(".NEWBIN") + if !registry.IsBinary("test.newbin") { + t.Errorf("Expected .newbin to be recognized as binary after adding .NEWBIN") + } + if !registry.IsBinary("test.NEWBIN") { + t.Errorf("Expected .NEWBIN to be recognized as binary") + } + + // Test overwriting existing extension + registry.AddBinaryExtension(".custom") + if !registry.IsBinary("test.custom") { + t.Errorf("Expected .custom to still be recognized as binary after re-adding") + } + }) + + // Test AddLanguageMapping + t.Run("AddLanguageMapping", func(t *testing.T) { + // Add a new language mapping + registry.AddLanguageMapping(".zig", "zig") + if registry.GetLanguage("test.zig") != "zig" { + t.Errorf("Expected .zig to map to 'zig', got '%s'", registry.GetLanguage("test.zig")) + } + + // Test case insensitive addition + registry.AddLanguageMapping(".V", "vlang") + if registry.GetLanguage("test.v") != "vlang" { + t.Errorf("Expected .v to map to 'vlang' after adding .V, got '%s'", registry.GetLanguage("test.v")) + } + if registry.GetLanguage("test.V") != "vlang" { + t.Errorf("Expected .V to map to 'vlang', got '%s'", registry.GetLanguage("test.V")) + } + + // Test overwriting existing mapping + registry.AddLanguageMapping(".zig", "ziglang") + if registry.GetLanguage("test.zig") != "ziglang" { + t.Errorf("Expected .zig to map to 'ziglang' after update, got '%s'", registry.GetLanguage("test.zig")) + } + + // Test empty language + registry.AddLanguageMapping(".empty", "") + if registry.GetLanguage("test.empty") != "" { + t.Errorf("Expected .empty to map to empty string, got '%s'", registry.GetLanguage("test.empty")) + } + }) +} + +// TestFileTypeRegistry_LanguageDetection tests the language detection functionality. +func TestFileTypeRegistry_LanguageDetection(t *testing.T) { + registry := GetDefaultRegistry() + + tests := []struct { + filename string + expected string + }{ + // Programming languages + {"main.go", "go"}, + {"script.py", "python"}, + {"app.js", "javascript"}, + {"component.tsx", "typescript"}, + {"service.ts", "typescript"}, + {"App.java", "java"}, + {"program.c", "c"}, + {"program.cpp", "cpp"}, + {"header.h", "c"}, + {"header.hpp", "cpp"}, + {"main.rs", "rust"}, + {"script.rb", "ruby"}, + {"index.php", "php"}, + {"app.swift", "swift"}, + {"MainActivity.kt", "kotlin"}, + {"Main.scala", "scala"}, + {"analysis.r", "r"}, + {"ViewController.m", "objc"}, + {"ViewController.mm", "objcpp"}, + {"Program.cs", "csharp"}, + {"Module.vb", "vbnet"}, + {"program.fs", "fsharp"}, + {"script.lua", "lua"}, + {"script.pl", "perl"}, + + // Shell scripts + {"script.sh", "bash"}, + {"script.bash", "bash"}, + {"script.zsh", "zsh"}, + {"script.fish", "fish"}, + {"script.ps1", "powershell"}, + {"script.bat", "batch"}, + {"script.cmd", "batch"}, + + // Data and markup + {"query.sql", "sql"}, + {"index.html", "html"}, + {"page.htm", "html"}, + {"data.xml", "xml"}, + {"style.css", "css"}, + {"style.scss", "scss"}, + {"style.sass", "sass"}, + {"style.less", "less"}, + {"data.json", "json"}, + {"config.yaml", "yaml"}, + {"config.yml", "yaml"}, + {"config.toml", "toml"}, + {"README.md", "markdown"}, + {"doc.rst", "rst"}, + {"paper.tex", "latex"}, + + // Modern languages + {"main.dart", "dart"}, + {"Main.elm", "elm"}, + {"core.clj", "clojure"}, + {"server.ex", "elixir"}, + {"test.exs", "elixir"}, + {"server.erl", "erlang"}, + {"header.hrl", "erlang"}, + {"main.hs", "haskell"}, + {"module.ml", "ocaml"}, + {"interface.mli", "ocaml"}, + {"main.nim", "nim"}, + {"config.nims", "nim"}, + + // Web frameworks + {"Component.vue", "vue"}, + {"Component.jsx", "javascript"}, + + // Case sensitivity tests + {"MAIN.GO", "go"}, + {"Script.PY", "python"}, + {"APP.JS", "javascript"}, + + // Edge cases + {"", ""}, // Empty filename + {"a", ""}, // Too short (less than minExtensionLength) + {"noext", ""}, // No extension + {".hidden", ""}, // Hidden file with no name + {"file.", ""}, // Extension is just a dot + {"file.unknown", ""}, // Unknown extension + {"file.123", ""}, // Numeric extension + {"a.b", ""}, // Very short filename and extension + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + result := registry.GetLanguage(tt.filename) + if result != tt.expected { + t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected) + } + }) + } +} + +// TestFileTypeRegistry_ImageDetection tests the image detection functionality. +func TestFileTypeRegistry_ImageDetection(t *testing.T) { + registry := GetDefaultRegistry() + + tests := []struct { + filename string + expected bool + }{ + // Common image formats + {"photo.png", true}, + {"image.jpg", true}, + {"picture.jpeg", true}, + {"animation.gif", true}, + {"bitmap.bmp", true}, + {"image.tiff", true}, + {"scan.tif", true}, + {"vector.svg", true}, + {"modern.webp", true}, + {"favicon.ico", true}, + + // Case sensitivity tests + {"PHOTO.PNG", true}, + {"IMAGE.JPG", true}, + {"PICTURE.JPEG", true}, + + // Non-image files + {"document.txt", false}, + {"script.js", false}, + {"data.json", false}, + {"archive.zip", false}, + {"executable.exe", false}, + + // Edge cases + {"", false}, // Empty filename + {"image", false}, // No extension + {".png", true}, // Just extension + {"file.png.bak", false}, // Multiple extensions + {"image.unknown", false}, // Unknown extension + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + result := registry.IsImage(tt.filename) + if result != tt.expected { + t.Errorf("IsImage(%q) = %t, expected %t", tt.filename, result, tt.expected) + } + }) + } +} + +// TestFileTypeRegistry_BinaryDetection tests the binary detection functionality. +func TestFileTypeRegistry_BinaryDetection(t *testing.T) { + registry := GetDefaultRegistry() + + tests := []struct { + filename string + expected bool + }{ + // Executable files + {"program.exe", true}, + {"library.dll", true}, + {"libfoo.so", true}, + {"framework.dylib", true}, + {"data.bin", true}, + + // Object and library files + {"object.o", true}, + {"archive.a", true}, + {"library.lib", true}, + {"application.jar", true}, + {"bytecode.class", true}, + {"compiled.pyc", true}, + {"optimized.pyo", true}, + + // System files + {".DS_Store", true}, + + // Document files (treated as binary) + {"document.pdf", true}, + + // Archive files + {"archive.zip", true}, + {"backup.tar", true}, + {"compressed.gz", true}, + {"data.bz2", true}, + {"package.xz", true}, + {"archive.7z", true}, + {"backup.rar", true}, + + // Font files + {"font.ttf", true}, + {"font.otf", true}, + {"font.woff", true}, + {"font.woff2", true}, + + // Media files + {"song.mp3", true}, + {"video.mp4", true}, + {"movie.avi", true}, + {"clip.mov", true}, + {"video.wmv", true}, + {"animation.flv", true}, + {"modern.webm", true}, + {"audio.ogg", true}, + {"sound.wav", true}, + {"music.flac", true}, + + // Database files + {"data.dat", true}, + {"database.db", true}, + {"app.sqlite", true}, + + // Case sensitivity tests + {"PROGRAM.EXE", true}, + {"LIBRARY.DLL", true}, + + // Non-binary files + {"document.txt", false}, + {"script.js", false}, + {"data.json", false}, + {"style.css", false}, + {"page.html", false}, + + // Edge cases + {"", false}, // Empty filename + {"binary", false}, // No extension + {".exe", true}, // Just extension + {"file.exe.bak", false}, // Multiple extensions + {"file.unknown", false}, // Unknown extension + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + result := registry.IsBinary(tt.filename) + if result != tt.expected { + t.Errorf("IsBinary(%q) = %t, expected %t", tt.filename, result, tt.expected) + } + }) + } +} + +// TestFileTypeRegistry_DefaultRegistryConsistency tests that the default registry is consistent. +func TestFileTypeRegistry_DefaultRegistryConsistency(t *testing.T) { + // Get registry multiple times and ensure it's the same instance + registry1 := GetDefaultRegistry() + registry2 := GetDefaultRegistry() + registry3 := getRegistry() + + if registry1 != registry2 { + t.Error("GetDefaultRegistry() should return the same instance") + } + if registry1 != registry3 { + t.Error("getRegistry() should return the same instance as GetDefaultRegistry()") + } + + // Test that global functions use the same registry + filename := "test.go" + if IsImage(filename) != registry1.IsImage(filename) { + t.Error("IsImage() global function should match registry method") + } + if IsBinary(filename) != registry1.IsBinary(filename) { + t.Error("IsBinary() global function should match registry method") + } + if GetLanguage(filename) != registry1.GetLanguage(filename) { + t.Error("GetLanguage() global function should match registry method") + } +} + +// TestFileTypeRegistry_ThreadSafety tests the thread safety of the FileTypeRegistry. +func TestFileTypeRegistry_ThreadSafety(t *testing.T) { + const numGoroutines = 100 + const numOperationsPerGoroutine = 100 + + var wg sync.WaitGroup + + // Test concurrent read operations + t.Run("ConcurrentReads", func(t *testing.T) { + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + registry := GetDefaultRegistry() + + for j := 0; j < numOperationsPerGoroutine; j++ { + // Test various file detection operations + _ = registry.IsImage("test.png") + _ = registry.IsBinary("test.exe") + _ = registry.GetLanguage("test.go") + + // Test global functions too + _ = IsImage("image.jpg") + _ = IsBinary("binary.dll") + _ = GetLanguage("script.py") + } + }(i) + } + wg.Wait() + }) + + // Test concurrent registry access (singleton creation) + t.Run("ConcurrentRegistryAccess", func(t *testing.T) { + // Reset the registry to test concurrent initialization + // Note: This is not safe in a real application, but needed for testing + registryOnce = sync.Once{} + registry = nil + + registries := make([]*FileTypeRegistry, numGoroutines) + + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + registries[id] = GetDefaultRegistry() + }(i) + } + wg.Wait() + + // Verify all goroutines got the same registry instance + firstRegistry := registries[0] + for i := 1; i < numGoroutines; i++ { + if registries[i] != firstRegistry { + t.Errorf("Registry %d is different from registry 0", i) + } + } + }) + + // Test concurrent modifications on separate registry instances + t.Run("ConcurrentModifications", func(t *testing.T) { + // Create separate registry instances for each goroutine to test modification thread safety + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + + // Create a new registry instance for this goroutine + registry := &FileTypeRegistry{ + imageExts: make(map[string]bool), + binaryExts: make(map[string]bool), + languageMap: make(map[string]string), + } + + for j := 0; j < numOperationsPerGoroutine; j++ { + // Add unique extensions for this goroutine + extSuffix := fmt.Sprintf("_%d_%d", id, j) + + registry.AddImageExtension(".img" + extSuffix) + registry.AddBinaryExtension(".bin" + extSuffix) + registry.AddLanguageMapping(".lang"+extSuffix, "lang"+extSuffix) + + // Verify the additions worked + if !registry.IsImage("test.img" + extSuffix) { + t.Errorf("Failed to add image extension .img%s", extSuffix) + } + if !registry.IsBinary("test.bin" + extSuffix) { + t.Errorf("Failed to add binary extension .bin%s", extSuffix) + } + if registry.GetLanguage("test.lang"+extSuffix) != "lang"+extSuffix { + t.Errorf("Failed to add language mapping .lang%s", extSuffix) + } + } + }(i) + } + wg.Wait() + }) +} + +// TestFileTypeRegistry_EdgeCases tests edge cases and boundary conditions. +func TestFileTypeRegistry_EdgeCases(t *testing.T) { + registry := GetDefaultRegistry() + + // Test various edge cases for filename handling + edgeCases := []struct { + name string + filename string + desc string + }{ + {"empty", "", "empty filename"}, + {"single_char", "a", "single character filename"}, + {"just_dot", ".", "just a dot"}, + {"double_dot", "..", "double dot"}, + {"hidden_file", ".hidden", "hidden file"}, + {"hidden_with_ext", ".hidden.txt", "hidden file with extension"}, + {"multiple_dots", "file.tar.gz", "multiple extensions"}, + {"trailing_dot", "file.", "trailing dot"}, + {"unicode", "файл.txt", "unicode filename"}, + {"spaces", "my file.txt", "filename with spaces"}, + {"special_chars", "file@#$.txt", "filename with special characters"}, + {"very_long", "very_long_filename_with_many_characters_in_it.extension", "very long filename"}, + {"no_basename", ".gitignore", "dotfile with no basename"}, + {"case_mixed", "FiLe.ExT", "mixed case"}, + } + + for _, tc := range edgeCases { + t.Run(tc.name, func(t *testing.T) { + // These should not panic + _ = registry.IsImage(tc.filename) + _ = registry.IsBinary(tc.filename) + _ = registry.GetLanguage(tc.filename) + + // Global functions should also not panic + _ = IsImage(tc.filename) + _ = IsBinary(tc.filename) + _ = GetLanguage(tc.filename) + }) + } +} + +// TestFileTypeRegistry_MinimumExtensionLength tests the minimum extension length requirement. +func TestFileTypeRegistry_MinimumExtensionLength(t *testing.T) { + registry := GetDefaultRegistry() + + tests := []struct { + filename string + expected string + }{ + {"", ""}, // Empty filename + {"a", ""}, // Single character (less than minExtensionLength) + {"ab", ""}, // Two characters, no extension + {"a.b", ""}, // Extension too short, but filename too short anyway + {"ab.c", "c"}, // Valid: filename >= minExtensionLength and .c is valid extension + {"a.go", "go"}, // Valid extension + {"ab.py", "python"}, // Valid extension + {"a.unknown", ""}, // Valid length but unknown extension + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + result := registry.GetLanguage(tt.filename) + if result != tt.expected { + t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected) + } + }) + } +} + +// BenchmarkFileTypeRegistry tests performance of the registry operations. +func BenchmarkFileTypeRegistry_IsImage(b *testing.B) { + registry := GetDefaultRegistry() + filename := "test.png" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = registry.IsImage(filename) + } +} + +func BenchmarkFileTypeRegistry_IsBinary(b *testing.B) { + registry := GetDefaultRegistry() + filename := "test.exe" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = registry.IsBinary(filename) + } +} + +func BenchmarkFileTypeRegistry_GetLanguage(b *testing.B) { + registry := GetDefaultRegistry() + filename := "test.go" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = registry.GetLanguage(filename) + } +} + +func BenchmarkFileTypeRegistry_GlobalFunctions(b *testing.B) { + filename := "test.go" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = IsImage(filename) + _ = IsBinary(filename) + _ = GetLanguage(filename) + } +} + +func BenchmarkFileTypeRegistry_ConcurrentAccess(b *testing.B) { + filename := "test.go" + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _ = IsImage(filename) + _ = IsBinary(filename) + _ = GetLanguage(filename) + } + }) +} + +// TestFileTypeRegistry_Configuration tests the configuration functionality. +func TestFileTypeRegistry_Configuration(t *testing.T) { + // Create a new registry instance for testing + registry := &FileTypeRegistry{ + imageExts: make(map[string]bool), + binaryExts: make(map[string]bool), + languageMap: make(map[string]string), + } + + // Test ApplyCustomExtensions + t.Run("ApplyCustomExtensions", func(t *testing.T) { + customImages := []string{".webp", ".avif", ".heic"} + customBinary := []string{".custom", ".mybin"} + customLanguages := map[string]string{ + ".zig": "zig", + ".odin": "odin", + ".v": "vlang", + } + + registry.ApplyCustomExtensions(customImages, customBinary, customLanguages) + + // Test custom image extensions + for _, ext := range customImages { + if !registry.IsImage("test" + ext) { + t.Errorf("Expected %s to be recognized as image", ext) + } + } + + // Test custom binary extensions + for _, ext := range customBinary { + if !registry.IsBinary("test" + ext) { + t.Errorf("Expected %s to be recognized as binary", ext) + } + } + + // Test custom language mappings + for ext, expectedLang := range customLanguages { + if lang := registry.GetLanguage("test" + ext); lang != expectedLang { + t.Errorf("Expected %s to map to %s, got %s", ext, expectedLang, lang) + } + } + }) + + // Test DisableExtensions + t.Run("DisableExtensions", func(t *testing.T) { + // Add some extensions first + registry.AddImageExtension(".png") + registry.AddImageExtension(".jpg") + registry.AddBinaryExtension(".exe") + registry.AddBinaryExtension(".dll") + registry.AddLanguageMapping(".go", "go") + registry.AddLanguageMapping(".py", "python") + + // Verify they work + if !registry.IsImage("test.png") { + t.Error("Expected .png to be image before disabling") + } + if !registry.IsBinary("test.exe") { + t.Error("Expected .exe to be binary before disabling") + } + if registry.GetLanguage("test.go") != "go" { + t.Error("Expected .go to map to go before disabling") + } + + // Disable some extensions + disabledImages := []string{".png"} + disabledBinary := []string{".exe"} + disabledLanguages := []string{".go"} + + registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages) + + // Test that disabled extensions no longer work + if registry.IsImage("test.png") { + t.Error("Expected .png to not be image after disabling") + } + if registry.IsBinary("test.exe") { + t.Error("Expected .exe to not be binary after disabling") + } + if registry.GetLanguage("test.go") != "" { + t.Error("Expected .go to not map to language after disabling") + } + + // Test that non-disabled extensions still work + if !registry.IsImage("test.jpg") { + t.Error("Expected .jpg to still be image after disabling .png") + } + if !registry.IsBinary("test.dll") { + t.Error("Expected .dll to still be binary after disabling .exe") + } + if registry.GetLanguage("test.py") != "python" { + t.Error("Expected .py to still map to python after disabling .go") + } + }) + + // Test empty values handling + t.Run("EmptyValuesHandling", func(t *testing.T) { + registry := &FileTypeRegistry{ + imageExts: make(map[string]bool), + binaryExts: make(map[string]bool), + languageMap: make(map[string]string), + } + + // Test with empty values + customImages := []string{"", ".valid", ""} + customBinary := []string{"", ".valid"} + customLanguages := map[string]string{ + "": "invalid", + ".valid": "", + ".good": "good", + } + + registry.ApplyCustomExtensions(customImages, customBinary, customLanguages) + + // Only valid entries should be added + if registry.IsImage("test.") { + t.Error("Expected empty extension to not be added as image") + } + if !registry.IsImage("test.valid") { + t.Error("Expected .valid to be added as image") + } + if registry.IsBinary("test.") { + t.Error("Expected empty extension to not be added as binary") + } + if !registry.IsBinary("test.valid") { + t.Error("Expected .valid to be added as binary") + } + if registry.GetLanguage("test.") != "" { + t.Error("Expected empty extension to not be added as language") + } + if registry.GetLanguage("test.valid") != "" { + t.Error("Expected .valid with empty language to not be added") + } + if registry.GetLanguage("test.good") != "good" { + t.Error("Expected .good to map to good") + } + }) + + // Test case insensitive handling + t.Run("CaseInsensitiveHandling", func(t *testing.T) { + registry := &FileTypeRegistry{ + imageExts: make(map[string]bool), + binaryExts: make(map[string]bool), + languageMap: make(map[string]string), + } + + customImages := []string{".WEBP", ".Avif"} + customBinary := []string{".CUSTOM", ".MyBin"} + customLanguages := map[string]string{ + ".ZIG": "zig", + ".Odin": "odin", + } + + registry.ApplyCustomExtensions(customImages, customBinary, customLanguages) + + // Test that both upper and lower case work + if !registry.IsImage("test.webp") { + t.Error("Expected .webp (lowercase) to work after adding .WEBP") + } + if !registry.IsImage("test.WEBP") { + t.Error("Expected .WEBP (uppercase) to work") + } + if !registry.IsBinary("test.custom") { + t.Error("Expected .custom (lowercase) to work after adding .CUSTOM") + } + if !registry.IsBinary("test.CUSTOM") { + t.Error("Expected .CUSTOM (uppercase) to work") + } + if registry.GetLanguage("test.zig") != "zig" { + t.Error("Expected .zig (lowercase) to work after adding .ZIG") + } + if registry.GetLanguage("test.ZIG") != "zig" { + t.Error("Expected .ZIG (uppercase) to work") + } + }) +} + +// TestConfigureFromSettings tests the global configuration function. +func TestConfigureFromSettings(t *testing.T) { + // Reset registry to ensure clean state + registryOnce = sync.Once{} + registry = nil + + // Test configuration application + customImages := []string{".webp", ".avif"} + customBinary := []string{".custom"} + customLanguages := map[string]string{".zig": "zig"} + disabledImages := []string{".gif"} // Disable default extension + disabledBinary := []string{".exe"} // Disable default extension + disabledLanguages := []string{".rb"} // Disable default extension + + ConfigureFromSettings( + customImages, + customBinary, + customLanguages, + disabledImages, + disabledBinary, + disabledLanguages, + ) + + // Test that custom extensions work + if !IsImage("test.webp") { + t.Error("Expected custom image extension .webp to work") + } + if !IsBinary("test.custom") { + t.Error("Expected custom binary extension .custom to work") + } + if GetLanguage("test.zig") != "zig" { + t.Error("Expected custom language .zig to work") + } + + // Test that disabled extensions don't work + if IsImage("test.gif") { + t.Error("Expected disabled image extension .gif to not work") + } + if IsBinary("test.exe") { + t.Error("Expected disabled binary extension .exe to not work") + } + if GetLanguage("test.rb") != "" { + t.Error("Expected disabled language extension .rb to not work") + } + + // Test that non-disabled defaults still work + if !IsImage("test.png") { + t.Error("Expected non-disabled image extension .png to still work") + } + if !IsBinary("test.dll") { + t.Error("Expected non-disabled binary extension .dll to still work") + } + if GetLanguage("test.go") != "go" { + t.Error("Expected non-disabled language extension .go to still work") + } +} diff --git a/fileproc/formats.go b/fileproc/formats.go new file mode 100644 index 0000000..86795af --- /dev/null +++ b/fileproc/formats.go @@ -0,0 +1,28 @@ +package fileproc + +// FileData represents a single file's path and content. +type FileData struct { + Path string `json:"path" yaml:"path"` + Content string `json:"content" yaml:"content"` + Language string `json:"language" yaml:"language"` +} + +// OutputData represents the full output structure. +type OutputData struct { + Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"` + Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"` + Files []FileData `json:"files" yaml:"files"` +} + +// FormatWriter defines the interface for format-specific writers. +type FormatWriter interface { + Start(prefix, suffix string) error + WriteFile(req WriteRequest) error + Close() error +} + +// detectLanguage tries to infer the code block language from the file extension. +func detectLanguage(filePath string) string { + registry := GetDefaultRegistry() + return registry.GetLanguage(filePath) +} diff --git a/fileproc/ignore_rules.go b/fileproc/ignore_rules.go new file mode 100644 index 0000000..827a0f0 --- /dev/null +++ b/fileproc/ignore_rules.go @@ -0,0 +1,66 @@ +package fileproc + +import ( + "os" + "path/filepath" + + ignore "github.com/sabhiram/go-gitignore" +) + +// ignoreRule holds an ignore matcher along with the base directory where it was loaded. +type ignoreRule struct { + gi *ignore.GitIgnore + base string +} + +// loadIgnoreRules loads ignore rules from the current directory and combines them with parent rules. +func loadIgnoreRules(currentDir string, parentRules []ignoreRule) []ignoreRule { + // Pre-allocate for parent rules plus possible .gitignore and .ignore + const expectedIgnoreFiles = 2 + rules := make([]ignoreRule, 0, len(parentRules)+expectedIgnoreFiles) + rules = append(rules, parentRules...) + + // Check for .gitignore and .ignore files in the current directory. + for _, fileName := range []string{".gitignore", ".ignore"} { + if rule := tryLoadIgnoreFile(currentDir, fileName); rule != nil { + rules = append(rules, *rule) + } + } + + return rules +} + +// tryLoadIgnoreFile attempts to load an ignore file from the given directory. +func tryLoadIgnoreFile(dir, fileName string) *ignoreRule { + ignorePath := filepath.Join(dir, fileName) + if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() { + if gi, err := ignore.CompileIgnoreFile(ignorePath); err == nil { + return &ignoreRule{ + base: dir, + gi: gi, + } + } + } + return nil +} + +// matchesIgnoreRules checks if a path matches any of the ignore rules. +func matchesIgnoreRules(fullPath string, rules []ignoreRule) bool { + for _, rule := range rules { + if matchesRule(fullPath, rule) { + return true + } + } + return false +} + +// matchesRule checks if a path matches a specific ignore rule. +func matchesRule(fullPath string, rule ignoreRule) bool { + // Compute the path relative to the base where the ignore rule was defined. + rel, err := filepath.Rel(rule.base, fullPath) + if err != nil { + return false + } + // If the rule matches, skip this entry. + return rule.gi.MatchesPath(rel) +} diff --git a/fileproc/json_writer.go b/fileproc/json_writer.go new file mode 100644 index 0000000..57bc6d9 --- /dev/null +++ b/fileproc/json_writer.go @@ -0,0 +1,188 @@ +package fileproc + +import ( + "encoding/json" + "fmt" + "io" + "os" + + "github.com/ivuorinen/gibidify/utils" +) + +// JSONWriter handles JSON format output with streaming support. +type JSONWriter struct { + outFile *os.File + firstFile bool +} + +// NewJSONWriter creates a new JSON writer. +func NewJSONWriter(outFile *os.File) *JSONWriter { + return &JSONWriter{ + outFile: outFile, + firstFile: true, + } +} + +// Start writes the JSON header. +func (w *JSONWriter) Start(prefix, suffix string) error { + // Start JSON structure + if _, err := w.outFile.WriteString(`{"prefix":"`); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON start") + } + + // Write escaped prefix + escapedPrefix := escapeJSONString(prefix) + if _, err := w.outFile.WriteString(escapedPrefix); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON prefix") + } + + if _, err := w.outFile.WriteString(`","suffix":"`); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON middle") + } + + // Write escaped suffix + escapedSuffix := escapeJSONString(suffix) + if _, err := w.outFile.WriteString(escapedSuffix); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON suffix") + } + + if _, err := w.outFile.WriteString(`","files":[`); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON files start") + } + + return nil +} + +// WriteFile writes a file entry in JSON format. +func (w *JSONWriter) WriteFile(req WriteRequest) error { + if !w.firstFile { + if _, err := w.outFile.WriteString(","); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON separator") + } + } + w.firstFile = false + + if req.IsStream { + return w.writeStreaming(req) + } + return w.writeInline(req) +} + +// Close writes the JSON footer. +func (w *JSONWriter) Close() error { + // Close JSON structure + if _, err := w.outFile.WriteString("]}"); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON end") + } + return nil +} + +// writeStreaming writes a large file as JSON in streaming chunks. +func (w *JSONWriter) writeStreaming(req WriteRequest) error { + defer w.closeReader(req.Reader, req.Path) + + language := detectLanguage(req.Path) + + // Write file start + escapedPath := escapeJSONString(req.Path) + if _, err := fmt.Fprintf(w.outFile, `{"path":"%s","language":"%s","content":"`, escapedPath, language); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file start").WithFilePath(req.Path) + } + + // Stream content with JSON escaping + if err := w.streamJSONContent(req.Reader, req.Path); err != nil { + return err + } + + // Write file end + if _, err := w.outFile.WriteString(`"}`); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file end").WithFilePath(req.Path) + } + + return nil +} + +// writeInline writes a small file directly as JSON. +func (w *JSONWriter) writeInline(req WriteRequest) error { + language := detectLanguage(req.Path) + fileData := FileData{ + Path: req.Path, + Content: req.Content, + Language: language, + } + + encoded, err := json.Marshal(fileData) + if err != nil { + return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingEncode, "failed to marshal JSON").WithFilePath(req.Path) + } + + if _, err := w.outFile.Write(encoded); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file").WithFilePath(req.Path) + } + return nil +} + +// streamJSONContent streams content with JSON escaping. +func (w *JSONWriter) streamJSONContent(reader io.Reader, path string) error { + buf := make([]byte, StreamChunkSize) + for { + n, err := reader.Read(buf) + if n > 0 { + escaped := escapeJSONString(string(buf[:n])) + if _, writeErr := w.outFile.WriteString(escaped); writeErr != nil { + return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON chunk").WithFilePath(path) + } + } + if err == io.EOF { + break + } + if err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read JSON chunk").WithFilePath(path) + } + } + return nil +} + +// closeReader safely closes a reader if it implements io.Closer. +func (w *JSONWriter) closeReader(reader io.Reader, path string) { + if closer, ok := reader.(io.Closer); ok { + if err := closer.Close(); err != nil { + utils.LogError( + "Failed to close file reader", + utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path), + ) + } + } +} + +// escapeJSONString escapes a string for JSON output. +func escapeJSONString(s string) string { + // Use json.Marshal to properly escape the string, then remove the quotes + escaped, _ := json.Marshal(s) + return string(escaped[1 : len(escaped)-1]) // Remove surrounding quotes +} + +// startJSONWriter handles JSON format output with streaming support. +func startJSONWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) { + defer close(done) + + writer := NewJSONWriter(outFile) + + // Start writing + if err := writer.Start(prefix, suffix); err != nil { + utils.LogError("Failed to write JSON start", err) + return + } + + // Process files + for req := range writeCh { + if err := writer.WriteFile(req); err != nil { + utils.LogError("Failed to write JSON file", err) + } + } + + // Close writer + if err := writer.Close(); err != nil { + utils.LogError("Failed to write JSON end", err) + } +} diff --git a/fileproc/markdown_writer.go b/fileproc/markdown_writer.go new file mode 100644 index 0000000..56e5fdf --- /dev/null +++ b/fileproc/markdown_writer.go @@ -0,0 +1,139 @@ +package fileproc + +import ( + "fmt" + "io" + "os" + + "github.com/ivuorinen/gibidify/utils" +) + +// MarkdownWriter handles markdown format output with streaming support. +type MarkdownWriter struct { + outFile *os.File +} + +// NewMarkdownWriter creates a new markdown writer. +func NewMarkdownWriter(outFile *os.File) *MarkdownWriter { + return &MarkdownWriter{outFile: outFile} +} + +// Start writes the markdown header. +func (w *MarkdownWriter) Start(prefix, suffix string) error { + if prefix != "" { + if _, err := fmt.Fprintf(w.outFile, "# %s\n\n", prefix); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write prefix") + } + } + return nil +} + +// WriteFile writes a file entry in markdown format. +func (w *MarkdownWriter) WriteFile(req WriteRequest) error { + if req.IsStream { + return w.writeStreaming(req) + } + return w.writeInline(req) +} + +// Close writes the markdown footer. +func (w *MarkdownWriter) Close(suffix string) error { + if suffix != "" { + if _, err := fmt.Fprintf(w.outFile, "\n# %s\n", suffix); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write suffix") + } + } + return nil +} + +// writeStreaming writes a large file in streaming chunks. +func (w *MarkdownWriter) writeStreaming(req WriteRequest) error { + defer w.closeReader(req.Reader, req.Path) + + language := detectLanguage(req.Path) + + // Write file header + if _, err := fmt.Fprintf(w.outFile, "## File: `%s`\n```%s\n", req.Path, language); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file header").WithFilePath(req.Path) + } + + // Stream file content in chunks + if err := w.streamContent(req.Reader, req.Path); err != nil { + return err + } + + // Write file footer + if _, err := w.outFile.WriteString("\n```\n\n"); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file footer").WithFilePath(req.Path) + } + + return nil +} + +// writeInline writes a small file directly from content. +func (w *MarkdownWriter) writeInline(req WriteRequest) error { + language := detectLanguage(req.Path) + formatted := fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", req.Path, language, req.Content) + + if _, err := w.outFile.WriteString(formatted); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write inline content").WithFilePath(req.Path) + } + return nil +} + +// streamContent streams file content in chunks. +func (w *MarkdownWriter) streamContent(reader io.Reader, path string) error { + buf := make([]byte, StreamChunkSize) + for { + n, err := reader.Read(buf) + if n > 0 { + if _, writeErr := w.outFile.Write(buf[:n]); writeErr != nil { + return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write chunk").WithFilePath(path) + } + } + if err == io.EOF { + break + } + if err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read chunk").WithFilePath(path) + } + } + return nil +} + +// closeReader safely closes a reader if it implements io.Closer. +func (w *MarkdownWriter) closeReader(reader io.Reader, path string) { + if closer, ok := reader.(io.Closer); ok { + if err := closer.Close(); err != nil { + utils.LogError( + "Failed to close file reader", + utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path), + ) + } + } +} + +// startMarkdownWriter handles markdown format output with streaming support. +func startMarkdownWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) { + defer close(done) + + writer := NewMarkdownWriter(outFile) + + // Start writing + if err := writer.Start(prefix, suffix); err != nil { + utils.LogError("Failed to write markdown prefix", err) + return + } + + // Process files + for req := range writeCh { + if err := writer.WriteFile(req); err != nil { + utils.LogError("Failed to write markdown file", err) + } + } + + // Close writer + if err := writer.Close(suffix); err != nil { + utils.LogError("Failed to write markdown suffix", err) + } +} diff --git a/fileproc/processor.go b/fileproc/processor.go index 97e0761..335c364 100644 --- a/fileproc/processor.go +++ b/fileproc/processor.go @@ -3,34 +3,157 @@ package fileproc import ( "fmt" + "io" "os" "path/filepath" + "strings" - "github.com/sirupsen/logrus" + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/utils" +) + +const ( + // StreamChunkSize is the size of chunks when streaming large files (64KB). + StreamChunkSize = 65536 + // StreamThreshold is the file size above which we use streaming (1MB). + StreamThreshold = 1048576 + // MaxMemoryBuffer is the maximum memory to use for buffering content (10MB). + MaxMemoryBuffer = 10485760 ) // WriteRequest represents the content to be written. type WriteRequest struct { - Path string - Content string + Path string + Content string + IsStream bool + Reader io.Reader +} + +// FileProcessor handles file processing operations. +type FileProcessor struct { + rootPath string + sizeLimit int64 +} + +// NewFileProcessor creates a new file processor. +func NewFileProcessor(rootPath string) *FileProcessor { + return &FileProcessor{ + rootPath: rootPath, + sizeLimit: config.GetFileSizeLimit(), + } } // ProcessFile reads the file at filePath and sends a formatted output to outCh. +// It automatically chooses between loading the entire file or streaming based on file size. func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) { - content, err := os.ReadFile(filePath) + processor := NewFileProcessor(rootPath) + processor.Process(filePath, outCh) +} + +// Process handles file processing with the configured settings. +func (p *FileProcessor) Process(filePath string, outCh chan<- WriteRequest) { + // Validate file + fileInfo, err := p.validateFile(filePath) if err != nil { - logrus.Errorf("Failed to read file %s: %v", filePath, err) + return // Error already logged + } + + // Get relative path + relPath := p.getRelativePath(filePath) + + // Choose processing strategy based on file size + if fileInfo.Size() <= StreamThreshold { + p.processInMemory(filePath, relPath, outCh) + } else { + p.processStreaming(filePath, relPath, outCh) + } +} + +// validateFile checks if the file can be processed. +func (p *FileProcessor) validateFile(filePath string) (os.FileInfo, error) { + fileInfo, err := os.Stat(filePath) + if err != nil { + structErr := utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to stat file").WithFilePath(filePath) + utils.LogErrorf(structErr, "Failed to stat file %s", filePath) + return nil, err + } + + // Check size limit + if fileInfo.Size() > p.sizeLimit { + utils.LogErrorf( + utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeValidationSize, + fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", fileInfo.Size(), p.sizeLimit), + ).WithFilePath(filePath).WithContext("file_size", fileInfo.Size()).WithContext("size_limit", p.sizeLimit), + "Skipping large file %s", filePath, + ) + return nil, fmt.Errorf("file too large") + } + + return fileInfo, nil +} + +// getRelativePath computes the path relative to rootPath. +func (p *FileProcessor) getRelativePath(filePath string) string { + relPath, err := filepath.Rel(p.rootPath, filePath) + if err != nil { + return filePath // Fallback + } + return relPath +} + +// processInMemory loads the entire file into memory (for small files). +func (p *FileProcessor) processInMemory(filePath, relPath string, outCh chan<- WriteRequest) { + content, err := os.ReadFile(filePath) // #nosec G304 - filePath is validated by walker + if err != nil { + structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to read file").WithFilePath(filePath) + utils.LogErrorf(structErr, "Failed to read file %s", filePath) return } - // Compute path relative to rootPath, so /a/b/c/d.c becomes c/d.c - relPath, err := filepath.Rel(rootPath, filePath) - if err != nil { - // Fallback if something unexpected happens - relPath = filePath + outCh <- WriteRequest{ + Path: relPath, + Content: p.formatContent(relPath, string(content)), + IsStream: false, + } +} + +// processStreaming creates a streaming reader for large files. +func (p *FileProcessor) processStreaming(filePath, relPath string, outCh chan<- WriteRequest) { + reader := p.createStreamReader(filePath, relPath) + if reader == nil { + return // Error already logged } - // Format: separator, then relative path, then content - formatted := fmt.Sprintf("\n---\n%s\n%s\n", relPath, string(content)) - outCh <- WriteRequest{Path: relPath, Content: formatted} + outCh <- WriteRequest{ + Path: relPath, + Content: "", // Empty since content is in Reader + IsStream: true, + Reader: reader, + } +} + +// createStreamReader creates a reader that combines header and file content. +func (p *FileProcessor) createStreamReader(filePath, relPath string) io.Reader { + file, err := os.Open(filePath) // #nosec G304 - filePath is validated by walker + if err != nil { + structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to open file for streaming").WithFilePath(filePath) + utils.LogErrorf(structErr, "Failed to open file for streaming %s", filePath) + return nil + } + // Note: file will be closed by the writer + + header := p.formatHeader(relPath) + return io.MultiReader(header, file) +} + +// formatContent formats the file content with header. +func (p *FileProcessor) formatContent(relPath, content string) string { + return fmt.Sprintf("\n---\n%s\n%s\n", relPath, content) +} + +// formatHeader creates a reader for the file header. +func (p *FileProcessor) formatHeader(relPath string) io.Reader { + return strings.NewReader(fmt.Sprintf("\n---\n%s\n", relPath)) } diff --git a/fileproc/processor_test.go b/fileproc/processor_test.go index d1c1077..e825399 100644 --- a/fileproc/processor_test.go +++ b/fileproc/processor_test.go @@ -6,12 +6,15 @@ import ( "sync" "testing" - fileproc "github.com/ivuorinen/gibidify/fileproc" + "github.com/ivuorinen/gibidify/fileproc" + "github.com/ivuorinen/gibidify/testutil" ) func TestProcessFile(t *testing.T) { + // Reset and load default config to ensure proper file size limits + testutil.ResetViperConfig(t, "") // Create a temporary file with known content. - tmpFile, err := os.CreateTemp("", "testfile") + tmpFile, err := os.CreateTemp(t.TempDir(), "testfile") if err != nil { t.Fatal(err) } diff --git a/fileproc/registry.go b/fileproc/registry.go new file mode 100644 index 0000000..1ea6f74 --- /dev/null +++ b/fileproc/registry.go @@ -0,0 +1,107 @@ +// Package fileproc provides file processing utilities. +package fileproc + +import ( + "path/filepath" + "strings" + "sync" +) + +const minExtensionLength = 2 + +var ( + registry *FileTypeRegistry + registryOnce sync.Once +) + +// FileTypeRegistry manages file type detection and classification. +type FileTypeRegistry struct { + imageExts map[string]bool + binaryExts map[string]bool + languageMap map[string]string + + // Cache for frequent lookups to avoid repeated string operations + extCache map[string]string // filename -> normalized extension + resultCache map[string]FileTypeResult // extension -> cached result + cacheMutex sync.RWMutex + maxCacheSize int + + // Performance statistics + stats RegistryStats +} + +// RegistryStats tracks performance metrics for the registry. +type RegistryStats struct { + TotalLookups uint64 + CacheHits uint64 + CacheMisses uint64 + CacheEvictions uint64 +} + +// FileTypeResult represents cached file type detection results. +type FileTypeResult struct { + IsImage bool + IsBinary bool + Language string + Extension string +} + +// initRegistry initializes the default file type registry with common extensions. +func initRegistry() *FileTypeRegistry { + return &FileTypeRegistry{ + imageExts: getImageExtensions(), + binaryExts: getBinaryExtensions(), + languageMap: getLanguageMap(), + extCache: make(map[string]string, 1000), // Cache for extension normalization + resultCache: make(map[string]FileTypeResult, 500), // Cache for type results + maxCacheSize: 500, + } +} + +// getRegistry returns the singleton file type registry, creating it if necessary. +func getRegistry() *FileTypeRegistry { + registryOnce.Do(func() { + registry = initRegistry() + }) + return registry +} + +// GetDefaultRegistry returns the default file type registry. +func GetDefaultRegistry() *FileTypeRegistry { + return getRegistry() +} + +// GetStats returns a copy of the current registry statistics. +func (r *FileTypeRegistry) GetStats() RegistryStats { + r.cacheMutex.RLock() + defer r.cacheMutex.RUnlock() + return r.stats +} + +// GetCacheInfo returns current cache size information. +func (r *FileTypeRegistry) GetCacheInfo() (extCacheSize, resultCacheSize, maxCacheSize int) { + r.cacheMutex.RLock() + defer r.cacheMutex.RUnlock() + return len(r.extCache), len(r.resultCache), r.maxCacheSize +} + +// ResetRegistryForTesting resets the registry to its initial state. +// This function should only be used in tests. +func ResetRegistryForTesting() { + registryOnce = sync.Once{} + registry = nil +} + +// normalizeExtension extracts and normalizes the file extension. +func normalizeExtension(filename string) string { + return strings.ToLower(filepath.Ext(filename)) +} + +// isSpecialFile checks if the filename matches special cases like .DS_Store. +func isSpecialFile(filename string, extensions map[string]bool) bool { + if filepath.Ext(filename) == "" { + basename := strings.ToLower(filepath.Base(filename)) + return extensions[basename] + } + return false +} diff --git a/fileproc/walker.go b/fileproc/walker.go index f4bb776..58f9e64 100644 --- a/fileproc/walker.go +++ b/fileproc/walker.go @@ -4,10 +4,8 @@ package fileproc import ( "os" "path/filepath" - "strings" - "github.com/ivuorinen/gibidify/config" - ignore "github.com/sabhiram/go-gitignore" + "github.com/ivuorinen/gibidify/utils" ) // Walker defines an interface for scanning directories. @@ -18,22 +16,25 @@ type Walker interface { // ProdWalker implements Walker using a custom directory walker that // respects .gitignore and .ignore files, configuration-defined ignore directories, // and ignores binary and image files by default. -type ProdWalker struct{} +type ProdWalker struct { + filter *FileFilter +} -// ignoreRule holds an ignore matcher along with the base directory where it was loaded. -type ignoreRule struct { - base string - gi *ignore.GitIgnore +// NewProdWalker creates a new production walker with current configuration. +func NewProdWalker() *ProdWalker { + return &ProdWalker{ + filter: NewFileFilter(), + } } // Walk scans the given root directory recursively and returns a slice of file paths // that are not ignored based on .gitignore/.ignore files, the configuration, or the default binary/image filter. -func (pw ProdWalker) Walk(root string) ([]string, error) { - absRoot, err := filepath.Abs(root) +func (w *ProdWalker) Walk(root string) ([]string, error) { + absRoot, err := utils.GetAbsolutePath(root) if err != nil { - return nil, err + return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to resolve root path").WithFilePath(root) } - return walkDir(absRoot, absRoot, []ignoreRule{}) + return w.walkDir(absRoot, []ignoreRule{}) } // walkDir recursively walks the directory tree starting at currentDir. @@ -41,122 +42,34 @@ func (pw ProdWalker) Walk(root string) ([]string, error) { // appends the corresponding rules to the inherited list. Each file/directory is // then checked against the accumulated ignore rules, the configuration's list of ignored directories, // and a default filter that ignores binary and image files. -func walkDir(root string, currentDir string, parentRules []ignoreRule) ([]string, error) { +func (w *ProdWalker) walkDir(currentDir string, parentRules []ignoreRule) ([]string, error) { var results []string entries, err := os.ReadDir(currentDir) if err != nil { - return nil, err + return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to read directory").WithFilePath(currentDir) } - // Start with the parent's ignore rules. - rules := make([]ignoreRule, len(parentRules)) - copy(rules, parentRules) - - // Check for .gitignore and .ignore files in the current directory. - for _, fileName := range []string{".gitignore", ".ignore"} { - ignorePath := filepath.Join(currentDir, fileName) - if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() { - gi, err := ignore.CompileIgnoreFile(ignorePath) - if err == nil { - rules = append(rules, ignoreRule{ - base: currentDir, - gi: gi, - }) - } - } - } - - // Get the list of directories to ignore from configuration. - ignoredDirs := config.GetIgnoredDirectories() - sizeLimit := config.GetFileSizeLimit() // e.g., 5242880 for 5 MB + rules := loadIgnoreRules(currentDir, parentRules) for _, entry := range entries { fullPath := filepath.Join(currentDir, entry.Name()) - // For directories, check if its name is in the config ignore list. - if entry.IsDir() { - for _, d := range ignoredDirs { - if entry.Name() == d { - // Skip this directory entirely. - goto SkipEntry - } - } - } else { - // Check if file exceeds the configured size limit. - info, err := entry.Info() - if err == nil && info.Size() > sizeLimit { - goto SkipEntry - } - - // For files, apply the default filter to ignore binary and image files. - if isBinaryOrImage(fullPath) { - goto SkipEntry - } + if w.filter.shouldSkipEntry(entry, fullPath, rules) { + continue } - // Check accumulated ignore rules. - for _, rule := range rules { - // Compute the path relative to the base where the ignore rule was defined. - rel, err := filepath.Rel(rule.base, fullPath) - if err != nil { - continue - } - // If the rule matches, skip this entry. - if rule.gi.MatchesPath(rel) { - goto SkipEntry - } - } - - // If not ignored, then process the entry. + // Process entry if entry.IsDir() { - subFiles, err := walkDir(root, fullPath, rules) + subFiles, err := w.walkDir(fullPath, rules) if err != nil { - return nil, err + return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingTraversal, "failed to traverse subdirectory").WithFilePath(fullPath) } results = append(results, subFiles...) } else { results = append(results, fullPath) } - SkipEntry: - continue } return results, nil } - -// isBinaryOrImage checks if a file should be considered binary or an image based on its extension. -// The check is case-insensitive. -func isBinaryOrImage(filePath string) bool { - ext := strings.ToLower(filepath.Ext(filePath)) - // Common image file extensions. - imageExtensions := map[string]bool{ - ".png": true, - ".jpg": true, - ".jpeg": true, - ".gif": true, - ".bmp": true, - ".tiff": true, - ".ico": true, - ".svg": true, - ".webp": true, - } - // Common binary file extensions. - binaryExtensions := map[string]bool{ - ".exe": true, - ".dll": true, - ".so": true, - ".bin": true, - ".dat": true, - ".zip": true, - ".tar": true, - ".gz": true, - ".7z": true, - ".rar": true, - ".DS_Store": true, - } - if imageExtensions[ext] || binaryExtensions[ext] { - return true - } - return false -} diff --git a/fileproc/walker_test.go b/fileproc/walker_test.go index f684947..dfee038 100644 --- a/fileproc/walker_test.go +++ b/fileproc/walker_test.go @@ -1,64 +1,42 @@ package fileproc_test import ( - "os" "path/filepath" "testing" - "github.com/ivuorinen/gibidify/config" - fileproc "github.com/ivuorinen/gibidify/fileproc" "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/fileproc" + "github.com/ivuorinen/gibidify/testutil" ) func TestProdWalkerWithIgnore(t *testing.T) { // Create a temporary directory structure. - rootDir, err := os.MkdirTemp("", "walker_test_root") - if err != nil { - t.Fatalf("Failed to create temp root directory: %v", err) - } - defer func() { - if err := os.RemoveAll(rootDir); err != nil { - t.Fatalf("cleanup failed: %v", err) - } - }() + rootDir := t.TempDir() - subDir := filepath.Join(rootDir, "vendor") - if err := os.Mkdir(subDir, 0755); err != nil { - t.Fatalf("Failed to create subDir: %v", err) - } + subDir := testutil.CreateTestDirectory(t, rootDir, "vendor") // Write sample files - filePaths := []string{ - filepath.Join(rootDir, "file1.go"), - filepath.Join(rootDir, "file2.txt"), - filepath.Join(subDir, "file_in_vendor.txt"), // should be ignored - } - for _, fp := range filePaths { - if err := os.WriteFile(fp, []byte("content"), 0644); err != nil { - t.Fatalf("Failed to write file %s: %v", fp, err) - } - } + testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{ + {Name: "file1.go", Content: "content"}, + {Name: "file2.txt", Content: "content"}, + }) + testutil.CreateTestFile(t, subDir, "file_in_vendor.txt", []byte("content")) // should be ignored // .gitignore that ignores *.txt and itself gitignoreContent := `*.txt .gitignore ` - gitignorePath := filepath.Join(rootDir, ".gitignore") - if err := os.WriteFile(gitignorePath, []byte(gitignoreContent), 0644); err != nil { - t.Fatalf("Failed to write .gitignore: %v", err) - } + testutil.CreateTestFile(t, rootDir, ".gitignore", []byte(gitignoreContent)) // Initialize config to ignore "vendor" directory - viper.Reset() - config.LoadConfig() + testutil.ResetViperConfig(t, "") viper.Set("ignoreDirectories", []string{"vendor"}) // Run walker - var w fileproc.Walker = fileproc.ProdWalker{} + w := fileproc.NewProdWalker() found, err := w.Walk(rootDir) - if err != nil { - t.Fatalf("Walk returned error: %v", err) - } + testutil.MustSucceed(t, err, "walking directory") // We expect only file1.go to appear if len(found) != 1 { @@ -70,38 +48,24 @@ func TestProdWalkerWithIgnore(t *testing.T) { } func TestProdWalkerBinaryCheck(t *testing.T) { - rootDir, err := os.MkdirTemp("", "walker_test_bincheck") - if err != nil { - t.Fatalf("Failed to create temp root directory: %v", err) - } - defer func() { - if err := os.RemoveAll(rootDir); err != nil { - t.Fatalf("cleanup failed: %v", err) - } - }() + rootDir := t.TempDir() - // Create a mock binary file - binFile := filepath.Join(rootDir, "somefile.exe") - if err := os.WriteFile(binFile, []byte("fake-binary-content"), 0644); err != nil { - t.Fatalf("Failed to write file %s: %v", binFile, err) - } - - // Create a normal file - normalFile := filepath.Join(rootDir, "keep.go") - if err := os.WriteFile(normalFile, []byte("package main"), 0644); err != nil { - t.Fatalf("Failed to write file %s: %v", normalFile, err) - } + // Create test files + testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{ + {Name: "somefile.exe", Content: "fake-binary-content"}, + {Name: "keep.go", Content: "package main"}, + }) // Reset and load default config - viper.Reset() - config.LoadConfig() + testutil.ResetViperConfig(t, "") + + // Reset FileTypeRegistry to ensure clean state + fileproc.ResetRegistryForTesting() // Run walker - var w fileproc.Walker = fileproc.ProdWalker{} + w := fileproc.NewProdWalker() found, err := w.Walk(rootDir) - if err != nil { - t.Fatalf("Walk returned error: %v", err) - } + testutil.MustSucceed(t, err, "walking directory") // Only "keep.go" should be returned if len(found) != 1 { @@ -113,34 +77,17 @@ func TestProdWalkerBinaryCheck(t *testing.T) { } func TestProdWalkerSizeLimit(t *testing.T) { - rootDir, err := os.MkdirTemp("", "walker_test_sizelimit") - if err != nil { - t.Fatalf("Failed to create temp root directory: %v", err) - } - defer func() { - if err := os.RemoveAll(rootDir); err != nil { - t.Fatalf("cleanup failed: %v", err) - } - }() + rootDir := t.TempDir() - // Create a file exceeding the size limit - largeFilePath := filepath.Join(rootDir, "largefile.txt") + // Create test files largeFileData := make([]byte, 6*1024*1024) // 6 MB - if err := os.WriteFile(largeFilePath, largeFileData, 0644); err != nil { - t.Fatalf("Failed to write large file: %v", err) - } - - // Create a small file - smallFilePath := filepath.Join(rootDir, "smallfile.go") - if err := os.WriteFile(smallFilePath, []byte("package main"), 0644); err != nil { - t.Fatalf("Failed to write small file: %v", err) - } + testutil.CreateTestFile(t, rootDir, "largefile.txt", largeFileData) + testutil.CreateTestFile(t, rootDir, "smallfile.go", []byte("package main")) // Reset and load default config, which sets size limit to 5 MB - viper.Reset() - config.LoadConfig() + testutil.ResetViperConfig(t, "") - var w fileproc.Walker = fileproc.ProdWalker{} + w := fileproc.NewProdWalker() found, err := w.Walk(rootDir) if err != nil { t.Fatalf("Walk returned error: %v", err) diff --git a/fileproc/writer.go b/fileproc/writer.go index b530b25..8858b0e 100644 --- a/fileproc/writer.go +++ b/fileproc/writer.go @@ -1,101 +1,29 @@ // Package fileproc provides a writer for the output of the file processor. -// -// The StartWriter function writes the output in the specified format. -// The formatMarkdown function formats the output in Markdown format. -// The detectLanguage function tries to infer the code block language from the file extension. -// The OutputData struct represents the full output structure. -// The FileData struct represents a single file's path and content. package fileproc import ( - "encoding/json" "fmt" "os" - "github.com/sirupsen/logrus" - "gopkg.in/yaml.v3" + "github.com/ivuorinen/gibidify/utils" ) -// FileData represents a single file's path and content. -type FileData struct { - Path string `json:"path" yaml:"path"` - Content string `json:"content" yaml:"content"` -} - -// OutputData represents the full output structure. -type OutputData struct { - Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"` - Files []FileData `json:"files" yaml:"files"` - Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"` -} - -// StartWriter writes the output in the specified format. -func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format string, prefix, suffix string) { - var files []FileData - - // Read from channel until closed - for req := range writeCh { - files = append(files, FileData(req)) - } - - // Create output struct - output := OutputData{Prefix: prefix, Files: files, Suffix: suffix} - - // Serialize based on format - var outputData []byte - var err error - +// StartWriter writes the output in the specified format with memory optimization. +func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format, prefix, suffix string) { switch format { - case "json": - outputData, err = json.MarshalIndent(output, "", " ") - case "yaml": - outputData, err = yaml.Marshal(output) case "markdown": - outputData = []byte(formatMarkdown(output)) + startMarkdownWriter(outFile, writeCh, done, prefix, suffix) + case "json": + startJSONWriter(outFile, writeCh, done, prefix, suffix) + case "yaml": + startYAMLWriter(outFile, writeCh, done, prefix, suffix) default: - err = fmt.Errorf("unsupported format: %s", format) - } - - if err != nil { - logrus.Errorf("Error encoding output: %v", err) + err := utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeValidationFormat, + fmt.Sprintf("unsupported format: %s", format), + ).WithContext("format", format) + utils.LogError("Failed to encode output", err) close(done) - return - } - - // Write to file - if _, err := outFile.Write(outputData); err != nil { - logrus.Errorf("Error writing to file: %v", err) - } - - close(done) -} - -func formatMarkdown(output OutputData) string { - markdown := "# " + output.Prefix + "\n\n" - - for _, file := range output.Files { - markdown += fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", file.Path, detectLanguage(file.Path), file.Content) - } - - markdown += "# " + output.Suffix - return markdown -} - -// detectLanguage tries to infer code block language from file extension. -func detectLanguage(filename string) string { - if len(filename) < 3 { - return "" - } - switch { - case len(filename) >= 3 && filename[len(filename)-3:] == ".go": - return "go" - case len(filename) >= 3 && filename[len(filename)-3:] == ".py": - return "python" - case len(filename) >= 2 && filename[len(filename)-2:] == ".c": - return "c" - case len(filename) >= 3 && filename[len(filename)-3:] == ".js": - return "javascript" - default: - return "" } } diff --git a/fileproc/writer_test.go b/fileproc/writer_test.go index 2c3eaa4..0320e23 100644 --- a/fileproc/writer_test.go +++ b/fileproc/writer_test.go @@ -7,8 +7,9 @@ import ( "sync" "testing" - fileproc "github.com/ivuorinen/gibidify/fileproc" "gopkg.in/yaml.v3" + + "github.com/ivuorinen/gibidify/fileproc" ) func TestStartWriter_Formats(t *testing.T) { @@ -18,107 +19,109 @@ func TestStartWriter_Formats(t *testing.T) { format string expectError bool }{ - { - name: "JSON format", - format: "json", - expectError: false, - }, - { - name: "YAML format", - format: "yaml", - expectError: false, - }, - { - name: "Markdown format", - format: "markdown", - expectError: false, - }, - { - name: "Invalid format", - format: "invalid", - expectError: true, - }, + {"JSON format", "json", false}, + {"YAML format", "yaml", false}, + {"Markdown format", "markdown", false}, + {"Invalid format", "invalid", true}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - outFile, err := os.CreateTemp("", "gibidify_test_output") - if err != nil { - t.Fatalf("Failed to create temp file: %v", err) - } - defer func() { - if err := outFile.Close(); err != nil { - t.Errorf("close temp file: %v", err) - } - if err := os.Remove(outFile.Name()); err != nil { - t.Errorf("remove temp file: %v", err) - } - }() - - // Prepare channels - writeCh := make(chan fileproc.WriteRequest, 2) - doneCh := make(chan struct{}) - - // Write a couple of sample requests - writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"} - writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"} - close(writeCh) - - // Start the writer - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - fileproc.StartWriter(outFile, writeCh, doneCh, tc.format, "PREFIX", "SUFFIX") - }() - - // Wait until writer signals completion - wg.Wait() - <-doneCh // make sure all writes finished - - // Read output - data, err := os.ReadFile(outFile.Name()) - if err != nil { - t.Fatalf("Error reading output file: %v", err) - } - + data := runWriterTest(t, tc.format) if tc.expectError { - // For an invalid format, we expect StartWriter to log an error - // and produce no content or minimal content. There's no official - // error returned, so check if it's empty or obviously incorrect. - if len(data) != 0 { - t.Errorf("Expected no output for invalid format, got:\n%s", data) - } + verifyErrorOutput(t, data) } else { - // Valid format: check basic properties in the output - content := string(data) - switch tc.format { - case "json": - // Quick parse check - var outStruct fileproc.OutputData - if err := json.Unmarshal(data, &outStruct); err != nil { - t.Errorf("JSON unmarshal failed: %v", err) - } - case "yaml": - var outStruct fileproc.OutputData - if err := yaml.Unmarshal(data, &outStruct); err != nil { - t.Errorf("YAML unmarshal failed: %v", err) - } - case "markdown": - // Check presence of code fences or "## File: ..." - if !strings.Contains(content, "```") { - t.Error("Expected markdown code fences not found") - } - } - - // Prefix and suffix checks (common to JSON, YAML, markdown) - if !strings.Contains(string(data), "PREFIX") { - t.Errorf("Missing prefix in output: %s", data) - } - if !strings.Contains(string(data), "SUFFIX") { - t.Errorf("Missing suffix in output: %s", data) - } + verifyValidOutput(t, data, tc.format) + verifyPrefixSuffix(t, data) } }) } } + +// runWriterTest executes the writer with the given format and returns the output data. +func runWriterTest(t *testing.T, format string) []byte { + t.Helper() + outFile, err := os.CreateTemp(t.TempDir(), "gibidify_test_output") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer func() { + if closeErr := outFile.Close(); closeErr != nil { + t.Errorf("close temp file: %v", closeErr) + } + if removeErr := os.Remove(outFile.Name()); removeErr != nil { + t.Errorf("remove temp file: %v", removeErr) + } + }() + + // Prepare channels + writeCh := make(chan fileproc.WriteRequest, 2) + doneCh := make(chan struct{}) + + // Write a couple of sample requests + writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"} + writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"} + close(writeCh) + + // Start the writer + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + fileproc.StartWriter(outFile, writeCh, doneCh, format, "PREFIX", "SUFFIX") + }() + + // Wait until writer signals completion + wg.Wait() + <-doneCh // make sure all writes finished + + // Read output + data, err := os.ReadFile(outFile.Name()) + if err != nil { + t.Fatalf("Error reading output file: %v", err) + } + + return data +} + +// verifyErrorOutput checks that error cases produce no output. +func verifyErrorOutput(t *testing.T, data []byte) { + t.Helper() + if len(data) != 0 { + t.Errorf("Expected no output for invalid format, got:\n%s", data) + } +} + +// verifyValidOutput checks format-specific output validity. +func verifyValidOutput(t *testing.T, data []byte, format string) { + t.Helper() + content := string(data) + switch format { + case "json": + var outStruct fileproc.OutputData + if err := json.Unmarshal(data, &outStruct); err != nil { + t.Errorf("JSON unmarshal failed: %v", err) + } + case "yaml": + var outStruct fileproc.OutputData + if err := yaml.Unmarshal(data, &outStruct); err != nil { + t.Errorf("YAML unmarshal failed: %v", err) + } + case "markdown": + if !strings.Contains(content, "```") { + t.Error("Expected markdown code fences not found") + } + } +} + +// verifyPrefixSuffix checks that output contains expected prefix and suffix. +func verifyPrefixSuffix(t *testing.T, data []byte) { + t.Helper() + content := string(data) + if !strings.Contains(content, "PREFIX") { + t.Errorf("Missing prefix in output: %s", data) + } + if !strings.Contains(content, "SUFFIX") { + t.Errorf("Missing suffix in output: %s", data) + } +} diff --git a/fileproc/yaml_writer.go b/fileproc/yaml_writer.go new file mode 100644 index 0000000..3ea60f2 --- /dev/null +++ b/fileproc/yaml_writer.go @@ -0,0 +1,148 @@ +package fileproc + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" + + "github.com/ivuorinen/gibidify/utils" +) + +// YAMLWriter handles YAML format output with streaming support. +type YAMLWriter struct { + outFile *os.File +} + +// NewYAMLWriter creates a new YAML writer. +func NewYAMLWriter(outFile *os.File) *YAMLWriter { + return &YAMLWriter{outFile: outFile} +} + +// Start writes the YAML header. +func (w *YAMLWriter) Start(prefix, suffix string) error { + // Write YAML header + if _, err := fmt.Fprintf(w.outFile, "prefix: %s\nsuffix: %s\nfiles:\n", yamlQuoteString(prefix), yamlQuoteString(suffix)); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML header") + } + return nil +} + +// WriteFile writes a file entry in YAML format. +func (w *YAMLWriter) WriteFile(req WriteRequest) error { + if req.IsStream { + return w.writeStreaming(req) + } + return w.writeInline(req) +} + +// Close writes the YAML footer (no footer needed for YAML). +func (w *YAMLWriter) Close() error { + return nil +} + +// writeStreaming writes a large file as YAML in streaming chunks. +func (w *YAMLWriter) writeStreaming(req WriteRequest) error { + defer w.closeReader(req.Reader, req.Path) + + language := detectLanguage(req.Path) + + // Write YAML file entry start + if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(req.Path), language); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML file start").WithFilePath(req.Path) + } + + // Stream content with YAML indentation + return w.streamYAMLContent(req.Reader, req.Path) +} + +// writeInline writes a small file directly as YAML. +func (w *YAMLWriter) writeInline(req WriteRequest) error { + language := detectLanguage(req.Path) + fileData := FileData{ + Path: req.Path, + Content: req.Content, + Language: language, + } + + // Write YAML entry + if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(fileData.Path), fileData.Language); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML entry start").WithFilePath(req.Path) + } + + // Write indented content + lines := strings.Split(fileData.Content, "\n") + for _, line := range lines { + if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML content line").WithFilePath(req.Path) + } + } + + return nil +} + +// streamYAMLContent streams content with YAML indentation. +func (w *YAMLWriter) streamYAMLContent(reader io.Reader, path string) error { + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + line := scanner.Text() + if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML line").WithFilePath(path) + } + } + + if err := scanner.Err(); err != nil { + return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to scan YAML content").WithFilePath(path) + } + return nil +} + +// closeReader safely closes a reader if it implements io.Closer. +func (w *YAMLWriter) closeReader(reader io.Reader, path string) { + if closer, ok := reader.(io.Closer); ok { + if err := closer.Close(); err != nil { + utils.LogError( + "Failed to close file reader", + utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path), + ) + } + } +} + +// yamlQuoteString quotes a string for YAML output if needed. +func yamlQuoteString(s string) string { + if s == "" { + return `""` + } + // Simple YAML quoting - use double quotes if string contains special characters + if strings.ContainsAny(s, "\n\r\t:\"'\\") { + return fmt.Sprintf(`"%s"`, strings.ReplaceAll(s, `"`, `\"`)) + } + return s +} + +// startYAMLWriter handles YAML format output with streaming support. +func startYAMLWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) { + defer close(done) + + writer := NewYAMLWriter(outFile) + + // Start writing + if err := writer.Start(prefix, suffix); err != nil { + utils.LogError("Failed to write YAML header", err) + return + } + + // Process files + for req := range writeCh { + if err := writer.WriteFile(req); err != nil { + utils.LogError("Failed to write YAML file", err) + } + } + + // Close writer + if err := writer.Close(); err != nil { + utils.LogError("Failed to write YAML end", err) + } +} diff --git a/go.mod b/go.mod index dec5501..d83419a 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,9 @@ module github.com/ivuorinen/gibidify go 1.24.1 require ( + github.com/fatih/color v1.18.0 github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 + github.com/schollz/progressbar/v3 v3.18.0 github.com/sirupsen/logrus v1.9.3 github.com/spf13/viper v1.20.0 gopkg.in/yaml.v3 v3.0.1 @@ -12,7 +14,11 @@ require ( require ( github.com/fsnotify/fsnotify v1.8.0 // indirect github.com/go-viper/mapstructure/v2 v2.2.1 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/pelletier/go-toml/v2 v2.2.3 // indirect + github.com/rivo/uniseg v0.4.7 // indirect github.com/sagikazarmark/locafero v0.8.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.14.0 // indirect @@ -21,5 +27,6 @@ require ( github.com/subosito/gotenv v1.6.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/sys v0.31.0 // indirect + golang.org/x/term v0.28.0 // indirect golang.org/x/text v0.23.0 // indirect ) diff --git a/go.sum b/go.sum index 7ce23c5..e8a1c14 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,10 @@ +github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM= +github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= +github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= @@ -13,16 +17,29 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= github.com/sagikazarmark/locafero v0.8.0 h1:mXaMVw7IqxNBxfv3LdWt9MDmcWDQ1fagDH918lOdVaQ= github.com/sagikazarmark/locafero v0.8.0/go.mod h1:UBUyz37V+EdMS3hDF3QWIiVr/2dPrx49OMO0Bn0hJqk= +github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA= +github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= @@ -45,8 +62,12 @@ github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSW go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= +golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/main.go b/main.go index 6510a7c..c768396 100644 --- a/main.go +++ b/main.go @@ -4,152 +4,46 @@ package main import ( "context" - "flag" - "fmt" "os" - "path/filepath" - "runtime" - "sync" - "github.com/ivuorinen/gibidify/config" - "github.com/ivuorinen/gibidify/fileproc" "github.com/sirupsen/logrus" -) -var ( - sourceDir string - destination string - prefix string - suffix string - concurrency int - format string + "github.com/ivuorinen/gibidify/cli" + "github.com/ivuorinen/gibidify/config" ) -func init() { - flag.StringVar(&sourceDir, "source", "", "Source directory to scan recursively") - flag.StringVar(&destination, "destination", "", "Output file to write aggregated code") - flag.StringVar(&prefix, "prefix", "", "Text to add at the beginning of the output file") - flag.StringVar(&suffix, "suffix", "", "Text to add at the end of the output file") - flag.StringVar(&format, "format", "markdown", "Output format (json, markdown, yaml)") - flag.IntVar(&concurrency, "concurrency", runtime.NumCPU(), "Number of concurrent workers (default: number of CPU cores)") -} - func main() { + // Initialize UI for error handling + ui := cli.NewUIManager() + errorFormatter := cli.NewErrorFormatter(ui) + // In production, use a background context. if err := run(context.Background()); err != nil { - fmt.Println("Error:", err) - os.Exit(1) + // Handle errors with better formatting and suggestions + if cli.IsUserError(err) { + errorFormatter.FormatError(err) + os.Exit(1) + } else { + // System errors still go to logrus for debugging + logrus.Errorf("System error: %v", err) + ui.PrintError("An unexpected error occurred. Please check the logs.") + os.Exit(2) + } } } // Run executes the main logic of the CLI application using the provided context. func run(ctx context.Context) error { - flag.Parse() - - if err := validateFlags(); err != nil { - return err - } - - if err := setDestination(); err != nil { + // Parse CLI flags + flags, err := cli.ParseFlags() + if err != nil { return err } + // Load configuration config.LoadConfig() - logrus.Infof( - "Starting gibidify. Format: %s, Source: %s, Destination: %s, Workers: %d", - format, - sourceDir, - destination, - concurrency, - ) - - files, err := fileproc.CollectFiles(sourceDir) - if err != nil { - return fmt.Errorf("error collecting files: %w", err) - } - logrus.Infof("Found %d files to process", len(files)) - - outFile, err := os.Create(destination) - if err != nil { - return fmt.Errorf("failed to create output file %s: %w", destination, err) - } - defer func(outFile *os.File) { - if err := outFile.Close(); err != nil { - logrus.Errorf("Error closing output file: %v", err) - } - }(outFile) - - fileCh := make(chan string) - writeCh := make(chan fileproc.WriteRequest) - writerDone := make(chan struct{}) - - go fileproc.StartWriter(outFile, writeCh, writerDone, format, prefix, suffix) - - var wg sync.WaitGroup - - startWorkers(ctx, &wg, fileCh, writeCh) - - for _, fp := range files { - select { - case <-ctx.Done(): - close(fileCh) - return ctx.Err() - case fileCh <- fp: - } - } - close(fileCh) - - wg.Wait() - close(writeCh) - <-writerDone - - logrus.Infof("Processing completed. Output saved to %s", destination) - return nil -} -func validateFlags() error { - if sourceDir == "" { - return fmt.Errorf("usage: gibidify -source [--destination ] [--format=json|yaml|markdown] ") - } - return nil -} - -func setDestination() error { - if destination == "" { - absRoot, err := filepath.Abs(sourceDir) - if err != nil { - return fmt.Errorf("failed to get absolute path for %s: %w", sourceDir, err) - } - baseName := filepath.Base(absRoot) - if baseName == "." || baseName == "" { - baseName = "output" - } - destination = baseName + "." + format - } - return nil -} - -func startWorkers(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) { - for i := 0; i < concurrency; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for { - select { - case <-ctx.Done(): - return - case filePath, ok := <-fileCh: - if !ok { - return - } - absRoot, err := filepath.Abs(sourceDir) - if err != nil { - logrus.Errorf("Failed to get absolute path for %s: %v", sourceDir, err) - return - } - fileproc.ProcessFile(filePath, writeCh, absRoot) - } - } - }() - } + // Create and run processor + processor := cli.NewProcessor(flags) + return processor.Process(ctx) } diff --git a/main_test.go b/main_test.go index a7372cf..ab89aac 100644 --- a/main_test.go +++ b/main_test.go @@ -4,139 +4,103 @@ import ( "context" "fmt" "os" - "path/filepath" - "strings" "testing" "time" + + "github.com/ivuorinen/gibidify/testutil" +) + +const ( + testFileCount = 1000 ) // TestIntegrationFullCLI simulates a full run of the CLI application using adaptive concurrency. func TestIntegrationFullCLI(t *testing.T) { - // Create a temporary source directory and populate it with test files. - srcDir, err := os.MkdirTemp("", "gibidify_src") - if err != nil { - t.Fatalf("Failed to create temp source directory: %v", err) - } - defer func() { - if err := os.RemoveAll(srcDir); err != nil { - t.Fatalf("cleanup failed: %v", err) - } - }() - - // Create two test files. - file1 := filepath.Join(srcDir, "file1.txt") - if err := os.WriteFile(file1, []byte("Hello World"), 0644); err != nil { - t.Fatalf("Failed to write file1: %v", err) - } - file2 := filepath.Join(srcDir, "file2.go") - if err := os.WriteFile(file2, []byte("package main\nfunc main() {}"), 0644); err != nil { - t.Fatalf("Failed to write file2: %v", err) - } - - // Create a temporary output file. - outFile, err := os.CreateTemp("", "gibidify_output.txt") - if err != nil { - t.Fatalf("Failed to create temp output file: %v", err) - } - outFilePath := outFile.Name() - if err := outFile.Close(); err != nil { - t.Fatalf("close temp file: %v", err) - } - defer func() { - if err := os.Remove(outFilePath); err != nil { - t.Fatalf("cleanup output file: %v", err) - } - }() - - // Set up CLI arguments. - os.Args = []string{ - "gibidify", - "-source", srcDir, - "-destination", outFilePath, - "-prefix", "PREFIX", - "-suffix", "SUFFIX", - "-concurrency", "2", // For testing, set concurrency to 2. - } + srcDir := setupTestFiles(t) + outFilePath := setupOutputFile(t) + setupCLIArgs(srcDir, outFilePath) // Run the application with a background context. - ctx := context.Background() - if err := run(ctx); err != nil { - t.Fatalf("Run failed: %v", err) + ctx := t.Context() + if runErr := run(ctx); runErr != nil { + t.Fatalf("Run failed: %v", runErr) } - // Verify the output file contains the expected prefix, file contents, and suffix. + verifyOutput(t, outFilePath) +} + +// setupTestFiles creates test files and returns the source directory. +func setupTestFiles(t *testing.T) string { + t.Helper() + srcDir := t.TempDir() + + // Create two test files. + testutil.CreateTestFiles(t, srcDir, []testutil.FileSpec{ + {Name: "file1.txt", Content: "Hello World"}, + {Name: "file2.go", Content: "package main\nfunc main() {}"}, + }) + + return srcDir +} + +// setupOutputFile creates a temporary output file and returns its path. +func setupOutputFile(t *testing.T) string { + t.Helper() + outFile, outFilePath := testutil.CreateTempOutputFile(t, "gibidify_output.txt") + testutil.CloseFile(t, outFile) + + return outFilePath +} + +// setupCLIArgs configures the CLI arguments for testing. +func setupCLIArgs(srcDir, outFilePath string) { + testutil.SetupCLIArgs(srcDir, outFilePath, "PREFIX", "SUFFIX", 2) +} + +// verifyOutput checks that the output file contains expected content. +func verifyOutput(t *testing.T, outFilePath string) { + t.Helper() data, err := os.ReadFile(outFilePath) if err != nil { t.Fatalf("Failed to read output file: %v", err) } output := string(data) - if !strings.Contains(output, "PREFIX") { - t.Error("Output missing prefix") - } - if !strings.Contains(output, "Hello World") { - t.Error("Output missing content from file1.txt") - } - if !strings.Contains(output, "SUFFIX") { - t.Error("Output missing suffix") - } + testutil.VerifyContentContains(t, output, []string{"PREFIX", "Hello World", "SUFFIX"}) } // TestIntegrationCancellation verifies that the application correctly cancels processing when the context times out. func TestIntegrationCancellation(t *testing.T) { // Create a temporary source directory with many files to simulate a long-running process. - srcDir, err := os.MkdirTemp("", "gibidify_src_long") - if err != nil { - t.Fatalf("Failed to create temp source directory: %v", err) - } - defer func() { - if err := os.RemoveAll(srcDir); err != nil { - t.Fatalf("cleanup failed: %v", err) - } - }() + srcDir := t.TempDir() // Create a large number of small files. - for i := 0; i < 1000; i++ { - filePath := filepath.Join(srcDir, fmt.Sprintf("file%d.txt", i)) - if err := os.WriteFile(filePath, []byte("Content"), 0644); err != nil { - t.Fatalf("Failed to write %s: %v", filePath, err) - } + for i := range testFileCount { + fileName := fmt.Sprintf("file%d.txt", i) + testutil.CreateTestFile(t, srcDir, fileName, []byte("Content")) } // Create a temporary output file. - outFile, err := os.CreateTemp("", "gibidify_output.txt") - if err != nil { - t.Fatalf("Failed to create temp output file: %v", err) - } - outFilePath := outFile.Name() - if err := outFile.Close(); err != nil { - t.Fatalf("close temp file: %v", err) - } + outFile, outFilePath := testutil.CreateTempOutputFile(t, "gibidify_output.txt") + testutil.CloseFile(t, outFile) defer func() { - if err := os.Remove(outFilePath); err != nil { - t.Fatalf("cleanup output file: %v", err) + if removeErr := os.Remove(outFilePath); removeErr != nil { + t.Fatalf("cleanup output file: %v", removeErr) } }() // Set up CLI arguments. - os.Args = []string{ - "gibidify", - "-source", srcDir, - "-destination", outFilePath, - "-prefix", "PREFIX", - "-suffix", "SUFFIX", - "-concurrency", "2", - } + testutil.SetupCLIArgs(srcDir, outFilePath, "PREFIX", "SUFFIX", 2) // Create a context with a very short timeout to force cancellation. ctx, cancel := context.WithTimeout( - context.Background(), - 10*time.Millisecond, + t.Context(), + 1*time.Millisecond, ) defer cancel() // Run the application; we expect an error due to cancellation. - err = run(ctx) - if err == nil { + runErr := run(ctx) + if runErr == nil { t.Error("Expected Run to fail due to cancellation, but it succeeded") } } diff --git a/testutil/testutil.go b/testutil/testutil.go new file mode 100644 index 0000000..ecc43c4 --- /dev/null +++ b/testutil/testutil.go @@ -0,0 +1,117 @@ +// Package testutil provides common testing utilities and helper functions. +package testutil + +import ( + "os" + "path/filepath" + "strconv" + "strings" + "testing" + + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/config" +) + +const ( + // FilePermission is the default file permission for test files. + FilePermission = 0o644 + // DirPermission is the default directory permission for test directories. + DirPermission = 0o755 +) + +// CreateTestFile creates a test file with the given content and returns its path. +func CreateTestFile(t *testing.T, dir, filename string, content []byte) string { + t.Helper() + filePath := filepath.Join(dir, filename) + if err := os.WriteFile(filePath, content, FilePermission); err != nil { + t.Fatalf("Failed to write file %s: %v", filePath, err) + } + return filePath +} + +// CreateTempOutputFile creates a temporary output file and returns the file handle and path. +func CreateTempOutputFile(t *testing.T, pattern string) (file *os.File, path string) { + t.Helper() + outFile, err := os.CreateTemp(t.TempDir(), pattern) + if err != nil { + t.Fatalf("Failed to create temp output file: %v", err) + } + path = outFile.Name() + return outFile, path +} + +// CreateTestDirectory creates a test directory and returns its path. +func CreateTestDirectory(t *testing.T, parent, name string) string { + t.Helper() + dirPath := filepath.Join(parent, name) + if err := os.Mkdir(dirPath, DirPermission); err != nil { + t.Fatalf("Failed to create directory %s: %v", dirPath, err) + } + return dirPath +} + +// FileSpec represents a file specification for creating test files. +type FileSpec struct { + Name string + Content string +} + +// CreateTestFiles creates multiple test files from specifications. +func CreateTestFiles(t *testing.T, rootDir string, fileSpecs []FileSpec) []string { + t.Helper() + createdFiles := make([]string, 0, len(fileSpecs)) + for _, spec := range fileSpecs { + filePath := CreateTestFile(t, rootDir, spec.Name, []byte(spec.Content)) + createdFiles = append(createdFiles, filePath) + } + return createdFiles +} + +// ResetViperConfig resets Viper configuration and optionally sets a config path. +func ResetViperConfig(t *testing.T, configPath string) { + t.Helper() + viper.Reset() + if configPath != "" { + viper.AddConfigPath(configPath) + } + config.LoadConfig() +} + +// SetupCLIArgs configures os.Args for CLI testing. +func SetupCLIArgs(srcDir, outFilePath, prefix, suffix string, concurrency int) { + os.Args = []string{ + "gibidify", + "-source", srcDir, + "-destination", outFilePath, + "-prefix", prefix, + "-suffix", suffix, + "-concurrency", strconv.Itoa(concurrency), + } +} + +// VerifyContentContains checks that content contains all expected substrings. +func VerifyContentContains(t *testing.T, content string, expectedSubstrings []string) { + t.Helper() + for _, expected := range expectedSubstrings { + if !strings.Contains(content, expected) { + t.Errorf("Content missing expected substring: %s", expected) + } + } +} + +// MustSucceed fails the test if the error is not nil. +func MustSucceed(t *testing.T, err error, operation string) { + t.Helper() + if err != nil { + t.Fatalf("Operation %s failed: %v", operation, err) + } +} + +// CloseFile closes a file and reports errors to the test. +func CloseFile(t *testing.T, file *os.File) { + t.Helper() + if err := file.Close(); err != nil { + t.Errorf("Failed to close file: %v", err) + } +} diff --git a/testutil/testutil_test.go b/testutil/testutil_test.go new file mode 100644 index 0000000..b2eaf30 --- /dev/null +++ b/testutil/testutil_test.go @@ -0,0 +1,591 @@ +package testutil + +import ( + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/spf13/viper" +) + +func TestCreateTestFile(t *testing.T) { + tests := []struct { + name string + dir string + filename string + content []byte + wantErr bool + }{ + { + name: "create simple test file", + filename: "test.txt", + content: []byte("hello world"), + wantErr: false, + }, + { + name: "create file with empty content", + filename: "empty.txt", + content: []byte{}, + wantErr: false, + }, + { + name: "create file with binary content", + filename: "binary.bin", + content: []byte{0x00, 0xFF, 0x42}, + wantErr: false, + }, + { + name: "create file with subdirectory", + filename: "subdir/test.txt", + content: []byte("nested file"), + wantErr: false, + }, + { + name: "create file with special characters", + filename: "special-file_123.go", + content: []byte("package main"), + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Use a temporary directory for each test + tempDir := t.TempDir() + if tt.dir == "" { + tt.dir = tempDir + } + + // Create subdirectory if needed + if strings.Contains(tt.filename, "/") { + subdir := filepath.Join(tt.dir, filepath.Dir(tt.filename)) + if err := os.MkdirAll(subdir, DirPermission); err != nil { + t.Fatalf("Failed to create subdirectory: %v", err) + } + } + + // Test CreateTestFile + filePath := CreateTestFile(t, tt.dir, tt.filename, tt.content) + + // Verify file exists + info, err := os.Stat(filePath) + if err != nil { + t.Fatalf("Created file does not exist: %v", err) + } + + // Verify it's a regular file + if !info.Mode().IsRegular() { + t.Errorf("Created path is not a regular file") + } + + // Verify permissions + if info.Mode().Perm() != FilePermission { + t.Errorf("File permissions = %v, want %v", info.Mode().Perm(), FilePermission) + } + + // Verify content + readContent, err := os.ReadFile(filePath) + if err != nil { + t.Fatalf("Failed to read created file: %v", err) + } + if string(readContent) != string(tt.content) { + t.Errorf("File content = %q, want %q", readContent, tt.content) + } + }) + } +} + +func TestCreateTempOutputFile(t *testing.T) { + tests := []struct { + name string + pattern string + }{ + { + name: "simple pattern", + pattern: "output-*.txt", + }, + { + name: "pattern with prefix only", + pattern: "test-", + }, + { + name: "pattern with suffix only", + pattern: "*.json", + }, + { + name: "empty pattern", + pattern: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + file, path := CreateTempOutputFile(t, tt.pattern) + defer CloseFile(t, file) + + // Verify file exists + info, err := os.Stat(path) + if err != nil { + t.Fatalf("Temp file does not exist: %v", err) + } + + // Verify it's a regular file + if !info.Mode().IsRegular() { + t.Errorf("Created path is not a regular file") + } + + // Verify we can write to it + testContent := []byte("test content") + if _, err := file.Write(testContent); err != nil { + t.Errorf("Failed to write to temp file: %v", err) + } + + // Verify the path is in a temp directory (any temp directory) + if !strings.Contains(path, os.TempDir()) { + t.Errorf("Temp file not in temp directory: %s", path) + } + }) + } +} + +func TestCreateTestDirectory(t *testing.T) { + tests := []struct { + name string + parent string + dir string + }{ + { + name: "simple directory", + dir: "testdir", + }, + { + name: "directory with special characters", + dir: "test-dir_123", + }, + { + name: "nested directory name", + dir: "nested/dir", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + if tt.parent == "" { + tt.parent = tempDir + } + + // For nested directories, create parent first + if strings.Contains(tt.dir, "/") { + parentPath := filepath.Join(tt.parent, filepath.Dir(tt.dir)) + if err := os.MkdirAll(parentPath, DirPermission); err != nil { + t.Fatalf("Failed to create parent directory: %v", err) + } + tt.dir = filepath.Base(tt.dir) + tt.parent = parentPath + } + + dirPath := CreateTestDirectory(t, tt.parent, tt.dir) + + // Verify directory exists + info, err := os.Stat(dirPath) + if err != nil { + t.Fatalf("Created directory does not exist: %v", err) + } + + // Verify it's a directory + if !info.IsDir() { + t.Errorf("Created path is not a directory") + } + + // Verify permissions + if info.Mode().Perm() != DirPermission { + t.Errorf("Directory permissions = %v, want %v", info.Mode().Perm(), DirPermission) + } + + // Verify we can create files in it + testFile := filepath.Join(dirPath, "test.txt") + if err := os.WriteFile(testFile, []byte("test"), FilePermission); err != nil { + t.Errorf("Cannot create file in directory: %v", err) + } + }) + } +} + +func TestCreateTestFiles(t *testing.T) { + tests := []struct { + name string + fileSpecs []FileSpec + wantCount int + }{ + { + name: "create multiple files", + fileSpecs: []FileSpec{ + {Name: "file1.txt", Content: "content1"}, + {Name: "file2.go", Content: "package main"}, + {Name: "file3.json", Content: `{"key": "value"}`}, + }, + wantCount: 3, + }, + { + name: "create files with subdirectories", + fileSpecs: []FileSpec{ + {Name: "src/main.go", Content: "package main"}, + {Name: "test/test.go", Content: "package test"}, + }, + wantCount: 2, + }, + { + name: "empty file specs", + fileSpecs: []FileSpec{}, + wantCount: 0, + }, + { + name: "files with empty content", + fileSpecs: []FileSpec{ + {Name: "empty1.txt", Content: ""}, + {Name: "empty2.txt", Content: ""}, + }, + wantCount: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rootDir := t.TempDir() + + // Create necessary subdirectories + for _, spec := range tt.fileSpecs { + if strings.Contains(spec.Name, "/") { + subdir := filepath.Join(rootDir, filepath.Dir(spec.Name)) + if err := os.MkdirAll(subdir, DirPermission); err != nil { + t.Fatalf("Failed to create subdirectory: %v", err) + } + } + } + + createdFiles := CreateTestFiles(t, rootDir, tt.fileSpecs) + + // Verify count + if len(createdFiles) != tt.wantCount { + t.Errorf("Created %d files, want %d", len(createdFiles), tt.wantCount) + } + + // Verify each file + for i, filePath := range createdFiles { + content, err := os.ReadFile(filePath) + if err != nil { + t.Errorf("Failed to read file %s: %v", filePath, err) + continue + } + if string(content) != tt.fileSpecs[i].Content { + t.Errorf("File %s content = %q, want %q", filePath, content, tt.fileSpecs[i].Content) + } + } + }) + } +} + +func TestResetViperConfig(t *testing.T) { + tests := []struct { + name string + configPath string + preSetup func() + verify func(t *testing.T) + }{ + { + name: "reset with empty config path", + configPath: "", + preSetup: func() { + viper.Set("test.key", "value") + }, + verify: func(t *testing.T) { + if viper.IsSet("test.key") { + t.Error("Viper config not reset properly") + } + }, + }, + { + name: "reset with config path", + configPath: t.TempDir(), + preSetup: func() { + viper.Set("test.key", "value") + }, + verify: func(t *testing.T) { + if viper.IsSet("test.key") { + t.Error("Viper config not reset properly") + } + // Verify config path was added + paths := viper.ConfigFileUsed() + if paths == "" { + // This is expected as no config file exists + return + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.preSetup() + ResetViperConfig(t, tt.configPath) + tt.verify(t) + }) + } +} + +func TestSetupCLIArgs(t *testing.T) { + // Save original args + originalArgs := os.Args + defer func() { + os.Args = originalArgs + }() + + tests := []struct { + name string + srcDir string + outFile string + prefix string + suffix string + concurrency int + wantLen int + }{ + { + name: "basic CLI args", + srcDir: "/src", + outFile: "/out.txt", + prefix: "PREFIX", + suffix: "SUFFIX", + concurrency: 4, + wantLen: 11, + }, + { + name: "empty strings", + srcDir: "", + outFile: "", + prefix: "", + suffix: "", + concurrency: 1, + wantLen: 11, + }, + { + name: "special characters in args", + srcDir: "/path with spaces/src", + outFile: "/path/to/output file.txt", + prefix: "Prefix with\nnewline", + suffix: "Suffix with\ttab", + concurrency: 8, + wantLen: 11, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + SetupCLIArgs(tt.srcDir, tt.outFile, tt.prefix, tt.suffix, tt.concurrency) + + if len(os.Args) != tt.wantLen { + t.Errorf("os.Args length = %d, want %d", len(os.Args), tt.wantLen) + } + + // Verify specific args + if os.Args[0] != "gibidify" { + t.Errorf("Program name = %s, want gibidify", os.Args[0]) + } + if os.Args[2] != tt.srcDir { + t.Errorf("Source dir = %s, want %s", os.Args[2], tt.srcDir) + } + if os.Args[4] != tt.outFile { + t.Errorf("Output file = %s, want %s", os.Args[4], tt.outFile) + } + if os.Args[6] != tt.prefix { + t.Errorf("Prefix = %s, want %s", os.Args[6], tt.prefix) + } + if os.Args[8] != tt.suffix { + t.Errorf("Suffix = %s, want %s", os.Args[8], tt.suffix) + } + if os.Args[10] != string(rune(tt.concurrency+'0')) { + t.Errorf("Concurrency = %s, want %d", os.Args[10], tt.concurrency) + } + }) + } +} + +func TestVerifyContentContains(t *testing.T) { + // Test successful verification + t.Run("all substrings present", func(t *testing.T) { + content := "This is a test file with multiple lines" + VerifyContentContains(t, content, []string{"test file", "multiple lines"}) + // If we get here, the test passed + }) + + // Test empty expected substrings + t.Run("empty expected substrings", func(t *testing.T) { + content := "Any content" + VerifyContentContains(t, content, []string{}) + // Should pass with no expected strings + }) + + // For failure cases, we'll test indirectly by verifying behavior + t.Run("verify error reporting", func(t *testing.T) { + // We can't easily test the failure case directly since it calls t.Errorf + // But we can at least verify the function doesn't panic + defer func() { + if r := recover(); r != nil { + t.Errorf("VerifyContentContains panicked: %v", r) + } + }() + + // This would normally fail but we're just checking it doesn't panic + content := "test" + expected := []string{"not found"} + // Create a sub-test that we expect to fail + t.Run("expected_failure", func(t *testing.T) { + t.Skip("Skipping actual failure test") + VerifyContentContains(t, content, expected) + }) + }) +} + +func TestMustSucceed(t *testing.T) { + // Test with nil error (should succeed) + t.Run("nil error", func(t *testing.T) { + MustSucceed(t, nil, "successful operation") + // If we get here, the test passed + }) + + // Test error behavior without causing test failure + t.Run("verify error handling", func(t *testing.T) { + // We can't test the failure case directly since it calls t.Fatalf + // But we can verify the function exists and is callable + defer func() { + if r := recover(); r != nil { + t.Errorf("MustSucceed panicked: %v", r) + } + }() + + // Create a sub-test that we expect to fail + t.Run("expected_failure", func(t *testing.T) { + t.Skip("Skipping actual failure test") + MustSucceed(t, errors.New("test error"), "failed operation") + }) + }) +} + +func TestCloseFile(t *testing.T) { + // Test closing a normal file + t.Run("close normal file", func(t *testing.T) { + file, err := os.CreateTemp(t.TempDir(), "test") + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + CloseFile(t, file) + + // Verify file is closed by trying to write to it + _, writeErr := file.Write([]byte("test")) + if writeErr == nil { + t.Error("Expected write to fail on closed file") + } + }) + + // Test that CloseFile doesn't panic on already closed files + // Note: We can't easily test the error case without causing test failure + // since CloseFile calls t.Errorf, which is the expected behavior + t.Run("verify CloseFile function exists and is callable", func(t *testing.T) { + // This test just verifies the function signature and basic functionality + // The error case is tested in integration tests where failures are expected + file, err := os.CreateTemp(t.TempDir(), "test") + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + // Test normal case - file should close successfully + CloseFile(t, file) + + // Verify file is closed + _, writeErr := file.Write([]byte("test")) + if writeErr == nil { + t.Error("Expected write to fail on closed file") + } + }) +} + +// Test thread safety of functions that might be called concurrently +func TestConcurrentOperations(t *testing.T) { + tempDir := t.TempDir() + done := make(chan bool) + + // Test concurrent file creation + for i := 0; i < 5; i++ { + go func(n int) { + CreateTestFile(t, tempDir, string(rune('a'+n))+".txt", []byte("content")) + done <- true + }(i) + } + + // Test concurrent directory creation + for i := 0; i < 5; i++ { + go func(n int) { + CreateTestDirectory(t, tempDir, "dir"+string(rune('0'+n))) + done <- true + }(i) + } + + // Wait for all goroutines + for i := 0; i < 10; i++ { + <-done + } +} + +// Benchmarks +func BenchmarkCreateTestFile(b *testing.B) { + tempDir := b.TempDir() + content := []byte("benchmark content") + + b.ResetTimer() + for i := 0; i < b.N; i++ { + // Use a unique filename for each iteration to avoid conflicts + filename := "bench" + string(rune(i%26+'a')) + ".txt" + filePath := filepath.Join(tempDir, filename) + if err := os.WriteFile(filePath, content, FilePermission); err != nil { + b.Fatalf("Failed to write file: %v", err) + } + } +} + +func BenchmarkCreateTestFiles(b *testing.B) { + tempDir := b.TempDir() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + // Create specs with unique names for each iteration + specs := []FileSpec{ + {Name: "file1_" + string(rune(i%26+'a')) + ".txt", Content: "content1"}, + {Name: "file2_" + string(rune(i%26+'a')) + ".txt", Content: "content2"}, + {Name: "file3_" + string(rune(i%26+'a')) + ".txt", Content: "content3"}, + } + + for _, spec := range specs { + filePath := filepath.Join(tempDir, spec.Name) + if err := os.WriteFile(filePath, []byte(spec.Content), FilePermission); err != nil { + b.Fatalf("Failed to write file: %v", err) + } + } + } +} + +func BenchmarkVerifyContentContains(b *testing.B) { + content := strings.Repeat("test content with various words ", 100) + expected := []string{"test", "content", "various", "words"} + + b.ResetTimer() + for i := 0; i < b.N; i++ { + // We can't use the actual function in benchmark since it needs testing.T + // So we'll benchmark the core logic + for _, exp := range expected { + _ = strings.Contains(content, exp) + } + } +} diff --git a/utils/errors.go b/utils/errors.go new file mode 100644 index 0000000..4e74402 --- /dev/null +++ b/utils/errors.go @@ -0,0 +1,228 @@ +// Package utils provides common utility functions. +package utils + +import ( + "fmt" + + "github.com/sirupsen/logrus" +) + +// ErrorType represents the category of error. +type ErrorType int + +const ( + // ErrorTypeUnknown represents an unknown error type. + ErrorTypeUnknown ErrorType = iota + // ErrorTypeCLI represents command-line interface errors. + ErrorTypeCLI + // ErrorTypeFileSystem represents file system operation errors. + ErrorTypeFileSystem + // ErrorTypeProcessing represents file processing errors. + ErrorTypeProcessing + // ErrorTypeConfiguration represents configuration errors. + ErrorTypeConfiguration + // ErrorTypeIO represents input/output errors. + ErrorTypeIO + // ErrorTypeValidation represents validation errors. + ErrorTypeValidation +) + +// String returns the string representation of the error type. +func (e ErrorType) String() string { + switch e { + case ErrorTypeCLI: + return "CLI" + case ErrorTypeFileSystem: + return "FileSystem" + case ErrorTypeProcessing: + return "Processing" + case ErrorTypeConfiguration: + return "Configuration" + case ErrorTypeIO: + return "IO" + case ErrorTypeValidation: + return "Validation" + default: + return "Unknown" + } +} + +// StructuredError represents a structured error with type, code, and context. +type StructuredError struct { + Type ErrorType + Code string + Message string + Cause error + Context map[string]any + FilePath string + Line int +} + +// Error implements the error interface. +func (e *StructuredError) Error() string { + if e.Cause != nil { + return fmt.Sprintf("%s [%s]: %s: %v", e.Type, e.Code, e.Message, e.Cause) + } + return fmt.Sprintf("%s [%s]: %s", e.Type, e.Code, e.Message) +} + +// Unwrap returns the underlying cause error. +func (e *StructuredError) Unwrap() error { + return e.Cause +} + +// WithContext adds context information to the error. +func (e *StructuredError) WithContext(key string, value any) *StructuredError { + if e.Context == nil { + e.Context = make(map[string]any) + } + e.Context[key] = value + return e +} + +// WithFilePath adds file path information to the error. +func (e *StructuredError) WithFilePath(filePath string) *StructuredError { + e.FilePath = filePath + return e +} + +// WithLine adds line number information to the error. +func (e *StructuredError) WithLine(line int) *StructuredError { + e.Line = line + return e +} + +// NewStructuredError creates a new structured error. +func NewStructuredError(errorType ErrorType, code, message string) *StructuredError { + return &StructuredError{ + Type: errorType, + Code: code, + Message: message, + } +} + +// NewStructuredErrorf creates a new structured error with formatted message. +func NewStructuredErrorf(errorType ErrorType, code, format string, args ...any) *StructuredError { + return &StructuredError{ + Type: errorType, + Code: code, + Message: fmt.Sprintf(format, args...), + } +} + +// WrapError wraps an existing error with structured error information. +func WrapError(err error, errorType ErrorType, code, message string) *StructuredError { + return &StructuredError{ + Type: errorType, + Code: code, + Message: message, + Cause: err, + } +} + +// WrapErrorf wraps an existing error with formatted message. +func WrapErrorf(err error, errorType ErrorType, code, format string, args ...any) *StructuredError { + return &StructuredError{ + Type: errorType, + Code: code, + Message: fmt.Sprintf(format, args...), + Cause: err, + } +} + +// Common error codes for each type +const ( + // CLI Error Codes + CodeCLIMissingSource = "MISSING_SOURCE" + CodeCLIInvalidArgs = "INVALID_ARGS" + + // FileSystem Error Codes + CodeFSPathResolution = "PATH_RESOLUTION" + CodeFSPermission = "PERMISSION_DENIED" + CodeFSNotFound = "NOT_FOUND" + CodeFSAccess = "ACCESS_DENIED" + + // Processing Error Codes + CodeProcessingFileRead = "FILE_READ" + CodeProcessingCollection = "COLLECTION" + CodeProcessingTraversal = "TRAVERSAL" + CodeProcessingEncode = "ENCODE" + + // Configuration Error Codes + CodeConfigValidation = "VALIDATION" + CodeConfigMissing = "MISSING" + + // IO Error Codes + CodeIOFileCreate = "FILE_CREATE" + CodeIOFileWrite = "FILE_WRITE" + CodeIOEncoding = "ENCODING" + CodeIOWrite = "WRITE" + CodeIORead = "READ" + CodeIOClose = "CLOSE" + + // Validation Error Codes + CodeValidationFormat = "FORMAT" + CodeValidationFileType = "FILE_TYPE" + CodeValidationSize = "SIZE_LIMIT" +) + +// Predefined error constructors for common error scenarios + +// NewCLIMissingSourceError creates a CLI error for missing source argument. +func NewCLIMissingSourceError() *StructuredError { + return NewStructuredError(ErrorTypeCLI, CodeCLIMissingSource, "usage: gibidify -source [--destination ] [--format=json|yaml|markdown]") +} + +// NewFileSystemError creates a file system error. +func NewFileSystemError(code, message string) *StructuredError { + return NewStructuredError(ErrorTypeFileSystem, code, message) +} + +// NewProcessingError creates a processing error. +func NewProcessingError(code, message string) *StructuredError { + return NewStructuredError(ErrorTypeProcessing, code, message) +} + +// NewIOError creates an IO error. +func NewIOError(code, message string) *StructuredError { + return NewStructuredError(ErrorTypeIO, code, message) +} + +// NewValidationError creates a validation error. +func NewValidationError(code, message string) *StructuredError { + return NewStructuredError(ErrorTypeValidation, code, message) +} + +// LogError logs an error with a consistent format if the error is not nil. +// The operation parameter describes what was being attempted. +// Additional context can be provided via the args parameter. +func LogError(operation string, err error, args ...any) { + if err != nil { + msg := operation + if len(args) > 0 { + // Format the operation string with the provided arguments + msg = fmt.Sprintf(operation, args...) + } + + // Check if it's a structured error and log with additional context + if structErr, ok := err.(*StructuredError); ok { + logrus.WithFields(logrus.Fields{ + "error_type": structErr.Type.String(), + "error_code": structErr.Code, + "context": structErr.Context, + "file_path": structErr.FilePath, + "line": structErr.Line, + }).Errorf("%s: %v", msg, err) + } else { + logrus.Errorf("%s: %v", msg, err) + } + } +} + +// LogErrorf logs an error with a formatted message if the error is not nil. +// This is a convenience wrapper around LogError for cases where formatting is needed. +func LogErrorf(err error, format string, args ...any) { + if err != nil { + LogError(format, err, args...) + } +} diff --git a/utils/errors_test.go b/utils/errors_test.go new file mode 100644 index 0000000..1831240 --- /dev/null +++ b/utils/errors_test.go @@ -0,0 +1,242 @@ +package utils + +import ( + "bytes" + "errors" + "fmt" + "strings" + "testing" + + "github.com/sirupsen/logrus" +) + +// captureLogOutput captures logrus output for testing +func captureLogOutput(f func()) string { + var buf bytes.Buffer + logrus.SetOutput(&buf) + defer logrus.SetOutput(logrus.StandardLogger().Out) + f() + return buf.String() +} + +func TestLogError(t *testing.T) { + tests := []struct { + name string + operation string + err error + args []any + wantLog string + wantEmpty bool + }{ + { + name: "nil error should not log", + operation: "test operation", + err: nil, + args: nil, + wantEmpty: true, + }, + { + name: "basic error logging", + operation: "failed to read file", + err: errors.New("permission denied"), + args: nil, + wantLog: "failed to read file: permission denied", + }, + { + name: "error with formatting args", + operation: "failed to process file %s", + err: errors.New("file too large"), + args: []any{"test.txt"}, + wantLog: "failed to process file test.txt: file too large", + }, + { + name: "error with multiple formatting args", + operation: "failed to copy from %s to %s", + err: errors.New("disk full"), + args: []any{"source.txt", "dest.txt"}, + wantLog: "failed to copy from source.txt to dest.txt: disk full", + }, + { + name: "wrapped error", + operation: "database operation failed", + err: fmt.Errorf("connection error: %w", errors.New("timeout")), + args: nil, + wantLog: "database operation failed: connection error: timeout", + }, + { + name: "empty operation string", + operation: "", + err: errors.New("some error"), + args: nil, + wantLog: ": some error", + }, + { + name: "operation with percentage sign", + operation: "processing 50% complete", + err: errors.New("interrupted"), + args: nil, + wantLog: "processing 50% complete: interrupted", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + output := captureLogOutput(func() { + LogError(tt.operation, tt.err, tt.args...) + }) + + if tt.wantEmpty { + if output != "" { + t.Errorf("LogError() logged output when error was nil: %q", output) + } + return + } + + if !strings.Contains(output, tt.wantLog) { + t.Errorf("LogError() output = %q, want to contain %q", output, tt.wantLog) + } + + // Verify it's logged at ERROR level + if !strings.Contains(output, "level=error") { + t.Errorf("LogError() should log at ERROR level, got: %q", output) + } + }) + } +} + +func TestLogErrorf(t *testing.T) { + tests := []struct { + name string + err error + format string + args []any + wantLog string + wantEmpty bool + }{ + { + name: "nil error should not log", + err: nil, + format: "operation %s failed", + args: []any{"test"}, + wantEmpty: true, + }, + { + name: "basic formatted error", + err: errors.New("not found"), + format: "file %s not found", + args: []any{"config.yaml"}, + wantLog: "file config.yaml not found: not found", + }, + { + name: "multiple format arguments", + err: errors.New("invalid range"), + format: "value %d is not between %d and %d", + args: []any{150, 0, 100}, + wantLog: "value 150 is not between 0 and 100: invalid range", + }, + { + name: "no format arguments", + err: errors.New("generic error"), + format: "operation failed", + args: nil, + wantLog: "operation failed: generic error", + }, + { + name: "format with different types", + err: errors.New("type mismatch"), + format: "expected %s but got %d", + args: []any{"string", 42}, + wantLog: "expected string but got 42: type mismatch", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + output := captureLogOutput(func() { + LogErrorf(tt.err, tt.format, tt.args...) + }) + + if tt.wantEmpty { + if output != "" { + t.Errorf("LogErrorf() logged output when error was nil: %q", output) + } + return + } + + if !strings.Contains(output, tt.wantLog) { + t.Errorf("LogErrorf() output = %q, want to contain %q", output, tt.wantLog) + } + + // Verify it's logged at ERROR level + if !strings.Contains(output, "level=error") { + t.Errorf("LogErrorf() should log at ERROR level, got: %q", output) + } + }) + } +} + +func TestLogErrorConcurrency(t *testing.T) { + // Test that LogError is safe for concurrent use + done := make(chan bool) + for i := 0; i < 10; i++ { + go func(n int) { + LogError("concurrent operation", fmt.Errorf("error %d", n)) + done <- true + }(i) + } + + // Wait for all goroutines to complete + for i := 0; i < 10; i++ { + <-done + } +} + +func TestLogErrorfConcurrency(t *testing.T) { + // Test that LogErrorf is safe for concurrent use + done := make(chan bool) + for i := 0; i < 10; i++ { + go func(n int) { + LogErrorf(fmt.Errorf("error %d", n), "concurrent operation %d", n) + done <- true + }(i) + } + + // Wait for all goroutines to complete + for i := 0; i < 10; i++ { + <-done + } +} + +// BenchmarkLogError benchmarks the LogError function +func BenchmarkLogError(b *testing.B) { + err := errors.New("benchmark error") + // Disable output during benchmark + logrus.SetOutput(bytes.NewBuffer(nil)) + defer logrus.SetOutput(logrus.StandardLogger().Out) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + LogError("benchmark operation", err) + } +} + +// BenchmarkLogErrorf benchmarks the LogErrorf function +func BenchmarkLogErrorf(b *testing.B) { + err := errors.New("benchmark error") + // Disable output during benchmark + logrus.SetOutput(bytes.NewBuffer(nil)) + defer logrus.SetOutput(logrus.StandardLogger().Out) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + LogErrorf(err, "benchmark operation %d", i) + } +} + +// BenchmarkLogErrorNil benchmarks LogError with nil error (no-op case) +func BenchmarkLogErrorNil(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + LogError("benchmark operation", nil) + } +} diff --git a/utils/paths.go b/utils/paths.go new file mode 100644 index 0000000..845d0ca --- /dev/null +++ b/utils/paths.go @@ -0,0 +1,26 @@ +// Package utils provides common utility functions. +package utils + +import ( + "fmt" + "path/filepath" +) + +// GetAbsolutePath returns the absolute path for the given path. +// It wraps filepath.Abs with consistent error handling. +func GetAbsolutePath(path string) (string, error) { + abs, err := filepath.Abs(path) + if err != nil { + return "", fmt.Errorf("failed to get absolute path for %s: %w", path, err) + } + return abs, nil +} + +// GetBaseName returns the base name for the given path, handling special cases. +func GetBaseName(absPath string) string { + baseName := filepath.Base(absPath) + if baseName == "." || baseName == "" { + return "output" + } + return baseName +} diff --git a/utils/paths_test.go b/utils/paths_test.go new file mode 100644 index 0000000..fe5b80e --- /dev/null +++ b/utils/paths_test.go @@ -0,0 +1,262 @@ +package utils + +import ( + "os" + "path/filepath" + "runtime" + "strings" + "testing" +) + +func TestGetAbsolutePath(t *testing.T) { + // Get current working directory for tests + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("Failed to get current directory: %v", err) + } + + tests := []struct { + name string + path string + wantPrefix string + wantErr bool + wantErrMsg string + skipWindows bool + }{ + { + name: "absolute path unchanged", + path: cwd, + wantPrefix: cwd, + wantErr: false, + }, + { + name: "relative path current directory", + path: ".", + wantPrefix: cwd, + wantErr: false, + }, + { + name: "relative path parent directory", + path: "..", + wantPrefix: filepath.Dir(cwd), + wantErr: false, + }, + { + name: "relative path with file", + path: "test.txt", + wantPrefix: filepath.Join(cwd, "test.txt"), + wantErr: false, + }, + { + name: "relative path with subdirectory", + path: "subdir/file.go", + wantPrefix: filepath.Join(cwd, "subdir", "file.go"), + wantErr: false, + }, + { + name: "empty path", + path: "", + wantPrefix: cwd, + wantErr: false, + }, + { + name: "path with tilde", + path: "~/test", + wantPrefix: filepath.Join(cwd, "~", "test"), + wantErr: false, + skipWindows: false, + }, + { + name: "path with multiple separators", + path: "path//to///file", + wantPrefix: filepath.Join(cwd, "path", "to", "file"), + wantErr: false, + }, + { + name: "path with trailing separator", + path: "path/", + wantPrefix: filepath.Join(cwd, "path"), + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.skipWindows && runtime.GOOS == "windows" { + t.Skip("Skipping test on Windows") + } + + got, err := GetAbsolutePath(tt.path) + + if tt.wantErr { + if err == nil { + t.Errorf("GetAbsolutePath() error = nil, wantErr %v", tt.wantErr) + return + } + if tt.wantErrMsg != "" && !strings.Contains(err.Error(), tt.wantErrMsg) { + t.Errorf("GetAbsolutePath() error = %v, want error containing %v", err, tt.wantErrMsg) + } + return + } + + if err != nil { + t.Errorf("GetAbsolutePath() unexpected error = %v", err) + return + } + + // Clean the expected path for comparison + wantClean := filepath.Clean(tt.wantPrefix) + gotClean := filepath.Clean(got) + + if gotClean != wantClean { + t.Errorf("GetAbsolutePath() = %v, want %v", gotClean, wantClean) + } + + // Verify the result is actually absolute + if !filepath.IsAbs(got) { + t.Errorf("GetAbsolutePath() returned non-absolute path: %v", got) + } + }) + } +} + +func TestGetAbsolutePathSpecialCases(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("Skipping Unix-specific tests on Windows") + } + + tests := []struct { + name string + setup func() (string, func()) + path string + wantErr bool + }{ + { + name: "symlink to directory", + setup: func() (string, func()) { + tmpDir := t.TempDir() + target := filepath.Join(tmpDir, "target") + link := filepath.Join(tmpDir, "link") + + if err := os.Mkdir(target, 0o755); err != nil { + t.Fatalf("Failed to create target directory: %v", err) + } + if err := os.Symlink(target, link); err != nil { + t.Fatalf("Failed to create symlink: %v", err) + } + + return link, func() {} + }, + path: "", + wantErr: false, + }, + { + name: "broken symlink", + setup: func() (string, func()) { + tmpDir := t.TempDir() + link := filepath.Join(tmpDir, "broken_link") + + if err := os.Symlink("/nonexistent/path", link); err != nil { + t.Fatalf("Failed to create broken symlink: %v", err) + } + + return link, func() {} + }, + path: "", + wantErr: false, // filepath.Abs still works with broken symlinks + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + path, cleanup := tt.setup() + defer cleanup() + + if tt.path == "" { + tt.path = path + } + + got, err := GetAbsolutePath(tt.path) + if (err != nil) != tt.wantErr { + t.Errorf("GetAbsolutePath() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if err == nil && !filepath.IsAbs(got) { + t.Errorf("GetAbsolutePath() returned non-absolute path: %v", got) + } + }) + } +} + +func TestGetAbsolutePathConcurrency(t *testing.T) { + // Test that GetAbsolutePath is safe for concurrent use + paths := []string{".", "..", "test.go", "subdir/file.txt", "/tmp/test"} + done := make(chan bool) + + for _, p := range paths { + go func(path string) { + _, _ = GetAbsolutePath(path) + done <- true + }(p) + } + + // Wait for all goroutines to complete + for range paths { + <-done + } +} + +func TestGetAbsolutePathErrorFormatting(t *testing.T) { + // This test verifies error message formatting + // We need to trigger an actual error from filepath.Abs + // On Unix systems, we can't easily trigger filepath.Abs errors + // so we'll just verify the error wrapping works correctly + + // Create a test that would fail if filepath.Abs returns an error + path := "test/path" + got, err := GetAbsolutePath(path) + if err != nil { + // If we somehow get an error, verify it's properly formatted + if !strings.Contains(err.Error(), "failed to get absolute path for") { + t.Errorf("Error message format incorrect: %v", err) + } + if !strings.Contains(err.Error(), path) { + t.Errorf("Error message should contain original path: %v", err) + } + } else { + // Normal case - just verify we got a valid absolute path + if !filepath.IsAbs(got) { + t.Errorf("Expected absolute path, got: %v", got) + } + } +} + +// BenchmarkGetAbsolutePath benchmarks the GetAbsolutePath function +func BenchmarkGetAbsolutePath(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = GetAbsolutePath("test/path/file.go") + } +} + +// BenchmarkGetAbsolutePathAbs benchmarks with already absolute path +func BenchmarkGetAbsolutePathAbs(b *testing.B) { + absPath := "/home/user/test/file.go" + if runtime.GOOS == "windows" { + absPath = "C:\\Users\\test\\file.go" + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = GetAbsolutePath(absPath) + } +} + +// BenchmarkGetAbsolutePathCurrent benchmarks with current directory +func BenchmarkGetAbsolutePathCurrent(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = GetAbsolutePath(".") + } +}