feat: many features, check TODO.md

This commit is contained in:
2025-07-19 00:45:21 +03:00
parent 3556b06bb9
commit e35126856d
50 changed files with 6996 additions and 674 deletions

3
.gitignore vendored
View File

@@ -9,3 +9,6 @@ output.txt
output.yaml output.yaml
coverage.out coverage.out
megalinter-reports/* megalinter-reports/*
coverage.*
*.out
gibidify-benchmark

256
.golangci.yml Normal file
View File

@@ -0,0 +1,256 @@
run:
timeout: 5m
tests: true
go: "1.24"
build-tags:
- test
# golangci-lint configuration version
version: 2
output:
format: colored-line-number
print-issued-lines: true
print-linter-name: true
path-prefix: ""
sort-results: true
linters:
enable-all: true
disable:
- depguard # Too strict for general use
- exhaustruct # Too many false positives
- ireturn # Too restrictive on interfaces
- varnamelen # Too opinionated on name length
- wrapcheck # Too many false positives
- testpackage # Tests in same package are fine
- paralleltest # Not always necessary
- tparallel # Not always necessary
- nlreturn # Too opinionated on newlines
- wsl # Too opinionated on whitespace
- nonamedreturns # Conflicts with gocritic unnamedResult
linters-settings:
errcheck:
check-type-assertions: true
check-blank: true
exclude-functions:
- io.Copy
- fmt.Print
- fmt.Printf
- fmt.Println
govet:
enable-all: true
gocyclo:
min-complexity: 15
gocognit:
min-complexity: 20
goconst:
min-len: 3
min-occurrences: 3
gofmt:
simplify: true
rewrite-rules:
- pattern: 'interface{}'
replacement: 'any'
goimports:
local-prefixes: github.com/ivuorinen/gibidify
golint:
min-confidence: 0.8
lll:
line-length: 120
tab-width: 2 # EditorConfig: tab_width = 2
misspell:
locale: US
nakedret:
max-func-lines: 30
prealloc:
simple: true
range-loops: true
for-loops: true
revive:
enable-all-rules: true
rules:
- name: package-comments
disabled: true
- name: file-header
disabled: true
- name: max-public-structs
disabled: true
- name: line-length-limit
arguments: [120]
- name: function-length
arguments: [50, 100]
- name: cognitive-complexity
arguments: [20]
- name: cyclomatic
arguments: [15]
- name: add-constant
arguments:
- maxLitCount: "3"
allowStrs: "\"error\",\"\""
allowInts: "0,1,2"
- name: argument-limit
arguments: [6]
- name: banned-characters
disabled: true
- name: function-result-limit
arguments: [3]
gosec:
excludes:
- G104 # Handled by errcheck
severity: medium
confidence: medium
exclude-generated: true
config:
G301: "0750"
G302: "0640"
G306: "0640"
dupl:
threshold: 150
gocritic:
enabled-tags:
- diagnostic
- experimental
- opinionated
- performance
- style
disabled-checks:
- whyNoLint
- paramTypeCombine
gofumpt:
extra-rules: true
# EditorConfig compliance settings
# These settings enforce .editorconfig rules:
# - end_of_line = lf (enforced by gofumpt)
# - insert_final_newline = true (enforced by gofumpt)
# - trim_trailing_whitespace = true (enforced by whitespace linter)
# - indent_style = tab, tab_width = 2 (enforced by gofumpt and lll)
whitespace:
multi-if: false # EditorConfig: trim trailing whitespace
multi-func: false # EditorConfig: trim trailing whitespace
nolintlint:
allow-leading-space: false # EditorConfig: trim trailing whitespace
allow-unused: false
require-explanation: false
require-specific: true
godox:
keywords:
- FIXME
- BUG
- HACK
mnd:
settings:
mnd:
checks:
- argument
- case
- condition
- operation
- return
- assign
ignored-numbers:
- '0'
- '1'
- '2'
- '10'
- '100'
funlen:
lines: 80
statements: 60
nestif:
min-complexity: 5
gomodguard:
allowed:
modules: []
domains: []
blocked:
modules: []
versions: []
issues:
exclude-use-default: false
exclude-case-sensitive: false
max-issues-per-linter: 0
max-same-issues: 0
uniq-by-line: true
exclude-dirs:
- vendor
- third_party
- testdata
- examples
- .git
exclude-files:
- ".*\\.pb\\.go$"
- ".*\\.gen\\.go$"
exclude-rules:
- path: _test\.go
linters:
- dupl
- gosec
- goconst
- funlen
- gocognit
- gocyclo
- errcheck
- lll
- nestif
- path: main\.go
linters:
- gochecknoglobals
- gochecknoinits
- path: fileproc/filetypes\.go
linters:
- gochecknoglobals # Allow globals for singleton registry pattern
- text: "Using the variable on range scope"
linters:
- scopelint
- text: "should have comment or be unexported"
linters:
- golint
- revive
- text: "don't use ALL_CAPS in Go names"
linters:
- golint
- stylecheck
exclude:
- "Error return value of .* is not checked"
- "exported (type|method|function) .* should have comment"
- "ST1000: at least one file in a package should have a package comment"
severity:
default-severity: error
case-sensitive: false

View File

@@ -1,12 +0,0 @@
# AGENTS
This repo is a Go CLI that aggregates code files into a single text output. The
main entry point is `main.go` with packages under `config` and `fileproc`.
Tests exist for each package, and CI workflows live in `.github/workflows`.
## Contributions
- Look for additional `AGENTS.md` files under `.github` first.
- Use Semantic Commit messages and PR titles.
- Run `go test ./...` and linting for code changes. Docs-only changes skip this.
- Use Yarn if installing Node packages.
- Follow `.editorconfig` and formatting via pre-commit.

48
CLAUDE.md Normal file
View File

@@ -0,0 +1,48 @@
# CLAUDE.md
Go CLI that aggregates code files into LLM-optimized output. Supports markdown/JSON/YAML with concurrent processing.
## Architecture (40 files, 189KB, 6.8K lines)
**Core**: `main.go` (37 lines), `cli/` (4 files), `fileproc/` (22 files), `config/` (3 files), `utils/` (4 files), `testutil/` (2 files)
**Key modules**: File collection, processing, writers (markdown/JSON/YAML), registry with caching, back-pressure management
**Patterns**: Producer-consumer pools, thread-safe registry (~63ns lookups), streaming with back-pressure, modular files (50-200 lines), progress bars, enhanced errors
## Commands
```bash
make lint-fix && make lint && make test # Essential workflow
./gibidify -source <dir> -format markdown --no-colors --no-progress --verbose
```
## Config
XDG config paths: `~/.config/gibidify/config.yaml`
**Key settings**: File size limit (5MB), ignore dirs, custom file types, back-pressure (100MB memory limit)
## Quality
**CRITICAL**: `make lint-fix && make lint` (0 issues), max 120 chars, EditorConfig compliance, 30+ linters
## Testing
**Coverage**: 84%+ (utils 90.9%, testutil 84.2%, fileproc 83.8%), race detection, benchmarks, testutil helpers
## Standards
EditorConfig (LF, tabs), semantic commits, testing required, linting must pass
## Status
**Health: 10/10** - Production-ready, 84%+ coverage, modular architecture, memory-optimized
**Completed**: Structured errors, benchmarking, config validation, memory optimization, code modularization, CLI enhancements (progress bars, colors, enhanced errors)
**Next**: Security hardening, documentation, output customization
## Workflow
1. `make lint-fix` before changes 2. >80% coverage 3. Follow patterns 4. Update docs 5. Security/performance

132
Makefile Normal file
View File

@@ -0,0 +1,132 @@
.PHONY: help install-tools lint lint-fix lint-verbose test coverage build clean all build-benchmark benchmark benchmark-collection benchmark-processing benchmark-concurrency benchmark-format
# Default target shows help
.DEFAULT_GOAL := help
# All target runs full workflow
all: lint test build
# Help target
help:
@echo "Available targets:"
@echo " install-tools - Install required linting and development tools"
@echo " lint - Run all linters"
@echo " lint-fix - Run linters with auto-fix enabled"
@echo " lint-verbose - Run linters with verbose output"
@echo " test - Run tests"
@echo " coverage - Run tests with coverage"
@echo " build - Build the application"
@echo " clean - Clean build artifacts"
@echo " all - Run lint, test, and build"
@echo ""
@echo "Benchmark targets:"
@echo " build-benchmark - Build the benchmark binary"
@echo " benchmark - Run all benchmarks"
@echo " benchmark-collection - Run file collection benchmarks"
@echo " benchmark-processing - Run file processing benchmarks"
@echo " benchmark-concurrency - Run concurrency benchmarks"
@echo " benchmark-format - Run format benchmarks"
@echo ""
@echo "Run 'make <target>' to execute a specific target."
# Install required tools
install-tools:
@echo "Installing golangci-lint..."
@go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
@echo "Installing gofumpt..."
@go install mvdan.cc/gofumpt@latest
@echo "Installing goimports..."
@go install golang.org/x/tools/cmd/goimports@latest
@echo "Installing staticcheck..."
@go install honnef.co/go/tools/cmd/staticcheck@latest
@echo "Installing gosec..."
@go install github.com/securego/gosec/v2/cmd/gosec@latest
@echo "Installing gocyclo..."
@go install github.com/fzipp/gocyclo/cmd/gocyclo@latest
@echo "All tools installed successfully!"
# Run linters
lint:
@echo "Running golangci-lint..."
@golangci-lint run ./...
# Run linters with auto-fix
lint-fix:
@echo "Running gofumpt..."
@gofumpt -l -w .
@echo "Running goimports..."
@goimports -w -local github.com/ivuorinen/gibidify .
@echo "Running go fmt..."
@go fmt ./...
@echo "Running go mod tidy..."
@go mod tidy
@echo "Running golangci-lint with --fix..."
@golangci-lint run --fix ./...
@echo "Auto-fix completed. Running final lint check..."
@golangci-lint run ./...
# Run linters with verbose output
lint-verbose:
@golangci-lint run -v ./...
# Run tests
test:
@echo "Running tests..."
@go test -race -v ./...
# Run tests with coverage
coverage:
@echo "Running tests with coverage..."
@go test -race -coverprofile=coverage.out -covermode=atomic ./...
@go tool cover -html=coverage.out -o coverage.html
@echo "Coverage report generated: coverage.html"
# Build the application
build:
@echo "Building gibidify..."
@go build -ldflags="-s -w" -o gibidify .
@echo "Build complete: ./gibidify"
# Clean build artifacts
clean:
@echo "Cleaning build artifacts..."
@rm -f gibidify gibidify-benchmark
@rm -f coverage.out coverage.html
@echo "Clean complete"
# CI-specific targets
.PHONY: ci-lint ci-test
ci-lint:
@golangci-lint run --out-format=github-actions ./...
ci-test:
@go test -race -coverprofile=coverage.out -json ./... > test-results.json
# Build benchmark binary
build-benchmark:
@echo "Building gibidify-benchmark..."
@go build -ldflags="-s -w" -o gibidify-benchmark ./cmd/benchmark
@echo "Build complete: ./gibidify-benchmark"
# Run benchmarks
benchmark: build-benchmark
@echo "Running all benchmarks..."
@./gibidify-benchmark -type=all
# Run specific benchmark types
benchmark-collection: build-benchmark
@echo "Running file collection benchmarks..."
@./gibidify-benchmark -type=collection
benchmark-processing: build-benchmark
@echo "Running file processing benchmarks..."
@./gibidify-benchmark -type=processing
benchmark-concurrency: build-benchmark
@echo "Running concurrency benchmarks..."
@./gibidify-benchmark -type=concurrency
benchmark-format: build-benchmark
@echo "Running format benchmarks..."
@./gibidify-benchmark -type=format

View File

@@ -7,11 +7,16 @@ file sections with separators, and a suffix.
## Features ## Features
- Recursive scanning of a source directory. - **Recursive directory scanning** with smart file filtering
- File filtering based on size, glob patterns, and .gitignore rules. - **Configurable file type detection** - add/remove extensions and languages
- Modular, concurrent file processing with progress bar feedback. - **Multiple output formats** - markdown, JSON, YAML
- Configurable logging and configuration via Viper. - **Memory-optimized processing** - streaming for large files, intelligent back-pressure
- Cross-platform build with Docker packaging support. - **Concurrent processing** with configurable worker pools
- **Comprehensive configuration** via YAML with validation
- **Production-ready** with structured error handling and benchmarking
- **Modular architecture** - clean, focused codebase with ~63ns registry lookups
- **Enhanced CLI experience** - progress bars, colored output, helpful error messages
- **Cross-platform** with Docker support
## Installation ## Installation
@@ -32,7 +37,10 @@ go build -o gibidify .
-format markdown|json|yaml \ -format markdown|json|yaml \
-concurrency <num_workers> \ -concurrency <num_workers> \
--prefix="..." \ --prefix="..." \
--suffix="..." --suffix="..." \
--no-colors \
--no-progress \
--verbose
``` ```
Flags: Flags:
@@ -42,6 +50,9 @@ Flags:
- `-format`: output format (`markdown`, `json`, or `yaml`). - `-format`: output format (`markdown`, `json`, or `yaml`).
- `-concurrency`: number of concurrent workers. - `-concurrency`: number of concurrent workers.
- `--prefix` / `--suffix`: optional text blocks. - `--prefix` / `--suffix`: optional text blocks.
- `--no-colors`: disable colored terminal output.
- `--no-progress`: disable progress bars.
- `--verbose`: enable verbose output and detailed logging.
## Docker ## Docker
@@ -83,11 +94,39 @@ ignoreDirectories:
- dist - dist
- build - build
- target - target
- bower_components
- cache # FileType customization
- tmp fileTypes:
enabled: true
# Add custom file extensions
customImageExtensions:
- .webp
- .avif
customBinaryExtensions:
- .custom
customLanguages:
.zig: zig
.odin: odin
.v: vlang
# Disable default extensions
disabledImageExtensions:
- .bmp
disabledBinaryExtensions:
- .exe
disabledLanguageExtensions:
- .bat
# Memory optimization (back-pressure management)
backpressure:
enabled: true
maxPendingFiles: 1000 # Max files in file channel buffer
maxPendingWrites: 100 # Max writes in write channel buffer
maxMemoryUsage: 104857600 # 100MB max memory usage
memoryCheckInterval: 1000 # Check memory every 1000 files
``` ```
See `config.example.yaml` for a comprehensive configuration example.
## License ## License
This project is licensed under [the MIT License](LICENSE). This project is licensed under [the MIT License](LICENSE).

66
TODO.md Normal file
View File

@@ -0,0 +1,66 @@
# TODO: gibidify
Prioritized improvements by impact/effort.
## ✅ Completed (High Priority)
**Testing**: utils (90.9%), testutil (84.2%), FileTypeRegistry (100%) ✅
**Config**: Registry customization, validation, schema ✅
**Errors**: Structured types, categorization, context ✅
**Performance**: Benchmarking, memory optimization, streaming ✅
**Architecture**: Code modularization (50-200 lines/file) ✅
**CLI**: Progress bars, colored output, enhanced errors ✅
## 🚀 Current Priorities
### Metrics
- [ ] Timing/profiling
- [ ] Processing stats
### Output Customization
- [ ] Templates
- [ ] Markdown config
- [ ] Metadata options
### Security
- [ ] Path traversal review
- [ ] Resource limits
- [ ] Security scanning
### Documentation
- [ ] API docs (GoDoc, examples)
- [ ] User guides, troubleshooting
### Dev Tools
- [ ] Hot reload, debug mode
- [ ] More CI/CD linters
## 🌟 Future
**Plugins**: Custom handlers, formats
**Git integration**: Commit filtering, blame
**Rich output**: HTML, PDF, web UI
**Microservices**: API-first, orchestration
**Monitoring**: Prometheus metrics, structured logging
## Guidelines
**Before**: `make lint-fix && make lint`, follow TDD, update docs
**DoD**: >80% coverage, linting passes, security reviewed
**Priorities**: Security → UX → Extensions
## Status (2025-07-19)
**Health: 10/10** - Production-ready, 40 files (189KB, 6.8K lines), 84%+ coverage
**Completed**: All critical items - testing, config, errors, performance, modularization, CLI enhancements
**Next**: Security hardening → Documentation → Output customization
### Token Usage
- TODO.md: 247 words (~329 tokens) - 63% reduction ✅
- CLAUDE.md: 212 words (~283 tokens) - 65% reduction ✅
- Total: 459 words (~612 tokens) - 64% reduction ✅
*Optimized from 1,581 → 459 words while preserving all critical information*

405
benchmark/benchmark.go Normal file
View File

@@ -0,0 +1,405 @@
// Package benchmark provides benchmarking infrastructure for gibidify.
package benchmark
import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"sync"
"time"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/utils"
)
// BenchmarkResult represents the results of a benchmark run.
type BenchmarkResult struct {
Name string
Duration time.Duration
FilesProcessed int
BytesProcessed int64
FilesPerSecond float64
BytesPerSecond float64
MemoryUsage MemoryStats
CPUUsage CPUStats
}
// MemoryStats represents memory usage statistics.
type MemoryStats struct {
AllocMB float64
SysMB float64
NumGC uint32
PauseTotalNs uint64
}
// CPUStats represents CPU usage statistics.
type CPUStats struct {
UserTime time.Duration
SystemTime time.Duration
Goroutines int
}
// BenchmarkSuite represents a collection of benchmarks.
type BenchmarkSuite struct {
Name string
Results []BenchmarkResult
}
// FileCollectionBenchmark benchmarks file collection operations.
func FileCollectionBenchmark(sourceDir string, numFiles int) (*BenchmarkResult, error) {
// Load configuration to ensure proper file filtering
config.LoadConfig()
// Create temporary directory with test files if no source is provided
var cleanup func()
if sourceDir == "" {
tempDir, cleanupFunc, err := createBenchmarkFiles(numFiles)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create benchmark files")
}
cleanup = cleanupFunc
defer cleanup()
sourceDir = tempDir
}
// Measure memory before
var memBefore runtime.MemStats
runtime.ReadMemStats(&memBefore)
startTime := time.Now()
// Run the file collection benchmark
files, err := fileproc.CollectFiles(sourceDir)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "benchmark file collection failed")
}
duration := time.Since(startTime)
// Measure memory after
var memAfter runtime.MemStats
runtime.ReadMemStats(&memAfter)
// Calculate total bytes processed
var totalBytes int64
for _, file := range files {
if info, err := os.Stat(file); err == nil {
totalBytes += info.Size()
}
}
result := &BenchmarkResult{
Name: "FileCollection",
Duration: duration,
FilesProcessed: len(files),
BytesProcessed: totalBytes,
FilesPerSecond: float64(len(files)) / duration.Seconds(),
BytesPerSecond: float64(totalBytes) / duration.Seconds(),
MemoryUsage: MemoryStats{
AllocMB: float64(memAfter.Alloc-memBefore.Alloc) / 1024 / 1024,
SysMB: float64(memAfter.Sys-memBefore.Sys) / 1024 / 1024,
NumGC: memAfter.NumGC - memBefore.NumGC,
PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs,
},
CPUUsage: CPUStats{
Goroutines: runtime.NumGoroutine(),
},
}
return result, nil
}
// FileProcessingBenchmark benchmarks full file processing pipeline.
func FileProcessingBenchmark(sourceDir string, format string, concurrency int) (*BenchmarkResult, error) {
// Load configuration to ensure proper file filtering
config.LoadConfig()
var cleanup func()
if sourceDir == "" {
// Create temporary directory with test files
tempDir, cleanupFunc, err := createBenchmarkFiles(100)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create benchmark files")
}
cleanup = cleanupFunc
defer cleanup()
sourceDir = tempDir
}
// Create temporary output file
outputFile, err := os.CreateTemp("", "benchmark_output_*."+format)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create benchmark output file")
}
defer func() {
if err := outputFile.Close(); err != nil {
// Log error but don't fail the benchmark
fmt.Printf("Warning: failed to close benchmark output file: %v\n", err)
}
if err := os.Remove(outputFile.Name()); err != nil {
// Log error but don't fail the benchmark
fmt.Printf("Warning: failed to remove benchmark output file: %v\n", err)
}
}()
// Measure memory before
var memBefore runtime.MemStats
runtime.ReadMemStats(&memBefore)
startTime := time.Now()
// Run the full processing pipeline
files, err := fileproc.CollectFiles(sourceDir)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "benchmark file collection failed")
}
// Process files with concurrency
err = runProcessingPipeline(context.Background(), files, outputFile, format, concurrency, sourceDir)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "benchmark processing pipeline failed")
}
duration := time.Since(startTime)
// Measure memory after
var memAfter runtime.MemStats
runtime.ReadMemStats(&memAfter)
// Calculate total bytes processed
var totalBytes int64
for _, file := range files {
if info, err := os.Stat(file); err == nil {
totalBytes += info.Size()
}
}
result := &BenchmarkResult{
Name: fmt.Sprintf("FileProcessing_%s_c%d", format, concurrency),
Duration: duration,
FilesProcessed: len(files),
BytesProcessed: totalBytes,
FilesPerSecond: float64(len(files)) / duration.Seconds(),
BytesPerSecond: float64(totalBytes) / duration.Seconds(),
MemoryUsage: MemoryStats{
AllocMB: float64(memAfter.Alloc-memBefore.Alloc) / 1024 / 1024,
SysMB: float64(memAfter.Sys-memBefore.Sys) / 1024 / 1024,
NumGC: memAfter.NumGC - memBefore.NumGC,
PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs,
},
CPUUsage: CPUStats{
Goroutines: runtime.NumGoroutine(),
},
}
return result, nil
}
// ConcurrencyBenchmark benchmarks different concurrency levels.
func ConcurrencyBenchmark(sourceDir string, format string, concurrencyLevels []int) (*BenchmarkSuite, error) {
suite := &BenchmarkSuite{
Name: "ConcurrencyBenchmark",
Results: make([]BenchmarkResult, 0, len(concurrencyLevels)),
}
for _, concurrency := range concurrencyLevels {
result, err := FileProcessingBenchmark(sourceDir, format, concurrency)
if err != nil {
return nil, utils.WrapErrorf(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed for level %d", concurrency)
}
suite.Results = append(suite.Results, *result)
}
return suite, nil
}
// FormatBenchmark benchmarks different output formats.
func FormatBenchmark(sourceDir string, formats []string) (*BenchmarkSuite, error) {
suite := &BenchmarkSuite{
Name: "FormatBenchmark",
Results: make([]BenchmarkResult, 0, len(formats)),
}
for _, format := range formats {
result, err := FileProcessingBenchmark(sourceDir, format, runtime.NumCPU())
if err != nil {
return nil, utils.WrapErrorf(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed for format %s", format)
}
suite.Results = append(suite.Results, *result)
}
return suite, nil
}
// createBenchmarkFiles creates temporary files for benchmarking.
func createBenchmarkFiles(numFiles int) (string, func(), error) {
tempDir, err := os.MkdirTemp("", "gibidify_benchmark_*")
if err != nil {
return "", nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create temp directory")
}
cleanup := func() {
if err := os.RemoveAll(tempDir); err != nil {
// Log error but don't fail the benchmark
fmt.Printf("Warning: failed to remove benchmark temp directory: %v\n", err)
}
}
// Create various file types
fileTypes := []struct {
ext string
content string
}{
{".go", "package main\n\nfunc main() {\n\tprintln(\"Hello, World!\")\n}"},
{".js", "console.log('Hello, World!');"},
{".py", "print('Hello, World!')"},
{".java", "public class Hello {\n\tpublic static void main(String[] args) {\n\t\tSystem.out.println(\"Hello, World!\");\n\t}\n}"},
{".cpp", "#include <iostream>\n\nint main() {\n\tstd::cout << \"Hello, World!\" << std::endl;\n\treturn 0;\n}"},
{".rs", "fn main() {\n\tprintln!(\"Hello, World!\");\n}"},
{".rb", "puts 'Hello, World!'"},
{".php", "<?php\necho 'Hello, World!';\n?>"},
{".sh", "#!/bin/bash\necho 'Hello, World!'"},
{".md", "# Hello, World!\n\nThis is a markdown file."},
}
for i := 0; i < numFiles; i++ {
fileType := fileTypes[i%len(fileTypes)]
filename := fmt.Sprintf("file_%d%s", i, fileType.ext)
// Create subdirectories for some files
if i%10 == 0 {
subdir := filepath.Join(tempDir, fmt.Sprintf("subdir_%d", i/10))
if err := os.MkdirAll(subdir, 0o755); err != nil {
cleanup()
return "", nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create subdirectory")
}
filename = filepath.Join(subdir, filename)
} else {
filename = filepath.Join(tempDir, filename)
}
// Create file with repeated content to make it larger
content := ""
for j := 0; j < 10; j++ {
content += fmt.Sprintf("// Line %d\n%s\n", j, fileType.content)
}
if err := os.WriteFile(filename, []byte(content), 0o644); err != nil {
cleanup()
return "", nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileWrite, "failed to write benchmark file")
}
}
return tempDir, cleanup, nil
}
// runProcessingPipeline runs the processing pipeline similar to main.go.
func runProcessingPipeline(ctx context.Context, files []string, outputFile *os.File, format string, concurrency int, sourceDir string) error {
fileCh := make(chan string, concurrency)
writeCh := make(chan fileproc.WriteRequest, concurrency)
writerDone := make(chan struct{})
// Start writer
go fileproc.StartWriter(outputFile, writeCh, writerDone, format, "", "")
// Get absolute path once
absRoot, err := utils.GetAbsolutePath(sourceDir)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to get absolute path for source directory")
}
// Start workers with proper synchronization
var workersDone sync.WaitGroup
for i := 0; i < concurrency; i++ {
workersDone.Add(1)
go func() {
defer workersDone.Done()
for filePath := range fileCh {
fileproc.ProcessFile(filePath, writeCh, absRoot)
}
}()
}
// Send files to workers
for _, file := range files {
select {
case <-ctx.Done():
close(fileCh)
workersDone.Wait() // Wait for workers to finish
close(writeCh)
<-writerDone
return ctx.Err()
case fileCh <- file:
}
}
// Close file channel and wait for workers to finish
close(fileCh)
workersDone.Wait()
// Now it's safe to close the write channel
close(writeCh)
<-writerDone
return nil
}
// PrintBenchmarkResult prints a formatted benchmark result.
func PrintBenchmarkResult(result *BenchmarkResult) {
fmt.Printf("=== %s ===\n", result.Name)
fmt.Printf("Duration: %v\n", result.Duration)
fmt.Printf("Files Processed: %d\n", result.FilesProcessed)
fmt.Printf("Bytes Processed: %d (%.2f MB)\n", result.BytesProcessed, float64(result.BytesProcessed)/1024/1024)
fmt.Printf("Files/sec: %.2f\n", result.FilesPerSecond)
fmt.Printf("Bytes/sec: %.2f MB/sec\n", result.BytesPerSecond/1024/1024)
fmt.Printf("Memory Usage: +%.2f MB (Sys: +%.2f MB)\n", result.MemoryUsage.AllocMB, result.MemoryUsage.SysMB)
fmt.Printf("GC Runs: %d (Pause: %v)\n", result.MemoryUsage.NumGC, time.Duration(result.MemoryUsage.PauseTotalNs))
fmt.Printf("Goroutines: %d\n", result.CPUUsage.Goroutines)
fmt.Println()
}
// PrintBenchmarkSuite prints all results in a benchmark suite.
func PrintBenchmarkSuite(suite *BenchmarkSuite) {
fmt.Printf("=== %s ===\n", suite.Name)
for _, result := range suite.Results {
PrintBenchmarkResult(&result)
}
}
// RunAllBenchmarks runs a comprehensive benchmark suite.
func RunAllBenchmarks(sourceDir string) error {
fmt.Println("Running gibidify benchmark suite...")
// Load configuration
config.LoadConfig()
// File collection benchmark
fmt.Println("Running file collection benchmark...")
result, err := FileCollectionBenchmark(sourceDir, 1000)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file collection benchmark failed")
}
PrintBenchmarkResult(result)
// Format benchmarks
fmt.Println("Running format benchmarks...")
formatSuite, err := FormatBenchmark(sourceDir, []string{"json", "yaml", "markdown"})
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed")
}
PrintBenchmarkSuite(formatSuite)
// Concurrency benchmarks
fmt.Println("Running concurrency benchmarks...")
concurrencyLevels := []int{1, 2, 4, 8, runtime.NumCPU()}
concurrencySuite, err := ConcurrencyBenchmark(sourceDir, "json", concurrencyLevels)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed")
}
PrintBenchmarkSuite(concurrencySuite)
return nil
}

165
benchmark/benchmark_test.go Normal file
View File

@@ -0,0 +1,165 @@
package benchmark
import (
"runtime"
"testing"
)
// TestFileCollectionBenchmark tests the file collection benchmark.
func TestFileCollectionBenchmark(t *testing.T) {
result, err := FileCollectionBenchmark("", 10)
if err != nil {
t.Fatalf("FileCollectionBenchmark failed: %v", err)
}
if result.Name != "FileCollection" {
t.Errorf("Expected name 'FileCollection', got %s", result.Name)
}
// Debug information
t.Logf("Files processed: %d", result.FilesProcessed)
t.Logf("Duration: %v", result.Duration)
t.Logf("Bytes processed: %d", result.BytesProcessed)
if result.FilesProcessed <= 0 {
t.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
}
if result.Duration <= 0 {
t.Errorf("Expected duration > 0, got %v", result.Duration)
}
}
// TestFileProcessingBenchmark tests the file processing benchmark.
func TestFileProcessingBenchmark(t *testing.T) {
result, err := FileProcessingBenchmark("", "json", 2)
if err != nil {
t.Fatalf("FileProcessingBenchmark failed: %v", err)
}
if result.FilesProcessed <= 0 {
t.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
}
if result.Duration <= 0 {
t.Errorf("Expected duration > 0, got %v", result.Duration)
}
}
// TestConcurrencyBenchmark tests the concurrency benchmark.
func TestConcurrencyBenchmark(t *testing.T) {
concurrencyLevels := []int{1, 2}
suite, err := ConcurrencyBenchmark("", "json", concurrencyLevels)
if err != nil {
t.Fatalf("ConcurrencyBenchmark failed: %v", err)
}
if suite.Name != "ConcurrencyBenchmark" {
t.Errorf("Expected name 'ConcurrencyBenchmark', got %s", suite.Name)
}
if len(suite.Results) != len(concurrencyLevels) {
t.Errorf("Expected %d results, got %d", len(concurrencyLevels), len(suite.Results))
}
for i, result := range suite.Results {
if result.FilesProcessed <= 0 {
t.Errorf("Result %d: Expected files processed > 0, got %d", i, result.FilesProcessed)
}
}
}
// TestFormatBenchmark tests the format benchmark.
func TestFormatBenchmark(t *testing.T) {
formats := []string{"json", "yaml"}
suite, err := FormatBenchmark("", formats)
if err != nil {
t.Fatalf("FormatBenchmark failed: %v", err)
}
if suite.Name != "FormatBenchmark" {
t.Errorf("Expected name 'FormatBenchmark', got %s", suite.Name)
}
if len(suite.Results) != len(formats) {
t.Errorf("Expected %d results, got %d", len(formats), len(suite.Results))
}
for i, result := range suite.Results {
if result.FilesProcessed <= 0 {
t.Errorf("Result %d: Expected files processed > 0, got %d", i, result.FilesProcessed)
}
}
}
// TestCreateBenchmarkFiles tests the benchmark file creation.
func TestCreateBenchmarkFiles(t *testing.T) {
tempDir, cleanup, err := createBenchmarkFiles(5)
if err != nil {
t.Fatalf("createBenchmarkFiles failed: %v", err)
}
defer cleanup()
if tempDir == "" {
t.Error("Expected non-empty temp directory")
}
// Verify files were created
// This is tested indirectly through the benchmark functions
}
// BenchmarkFileCollection benchmarks the file collection process.
func BenchmarkFileCollection(b *testing.B) {
for i := 0; i < b.N; i++ {
result, err := FileCollectionBenchmark("", 50)
if err != nil {
b.Fatalf("FileCollectionBenchmark failed: %v", err)
}
if result.FilesProcessed <= 0 {
b.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
}
}
}
// BenchmarkFileProcessing benchmarks the file processing pipeline.
func BenchmarkFileProcessing(b *testing.B) {
for i := 0; i < b.N; i++ {
result, err := FileProcessingBenchmark("", "json", runtime.NumCPU())
if err != nil {
b.Fatalf("FileProcessingBenchmark failed: %v", err)
}
if result.FilesProcessed <= 0 {
b.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
}
}
}
// BenchmarkConcurrency benchmarks different concurrency levels.
func BenchmarkConcurrency(b *testing.B) {
concurrencyLevels := []int{1, 2, 4}
for i := 0; i < b.N; i++ {
suite, err := ConcurrencyBenchmark("", "json", concurrencyLevels)
if err != nil {
b.Fatalf("ConcurrencyBenchmark failed: %v", err)
}
if len(suite.Results) != len(concurrencyLevels) {
b.Errorf("Expected %d results, got %d", len(concurrencyLevels), len(suite.Results))
}
}
}
// BenchmarkFormats benchmarks different output formats.
func BenchmarkFormats(b *testing.B) {
formats := []string{"json", "yaml", "markdown"}
for i := 0; i < b.N; i++ {
suite, err := FormatBenchmark("", formats)
if err != nil {
b.Fatalf("FormatBenchmark failed: %v", err)
}
if len(suite.Results) != len(formats) {
b.Errorf("Expected %d results, got %d", len(formats), len(suite.Results))
}
}
}

285
cli/errors.go Normal file
View File

@@ -0,0 +1,285 @@
package cli
import (
"errors"
"os"
"path/filepath"
"strings"
"github.com/ivuorinen/gibidify/utils"
)
// ErrorFormatter handles CLI-friendly error formatting with suggestions.
type ErrorFormatter struct {
ui *UIManager
}
// NewErrorFormatter creates a new error formatter.
func NewErrorFormatter(ui *UIManager) *ErrorFormatter {
return &ErrorFormatter{ui: ui}
}
// FormatError formats an error with context and suggestions.
func (ef *ErrorFormatter) FormatError(err error) {
if err == nil {
return
}
// Handle structured errors
if structErr, ok := err.(*utils.StructuredError); ok {
ef.formatStructuredError(structErr)
return
}
// Handle common error types
ef.formatGenericError(err)
}
// formatStructuredError formats a structured error with context and suggestions.
func (ef *ErrorFormatter) formatStructuredError(err *utils.StructuredError) {
// Print main error
ef.ui.PrintError("Error: %s", err.Message)
// Print error type and code
if err.Type != utils.ErrorTypeUnknown || err.Code != "" {
ef.ui.PrintInfo("Type: %s, Code: %s", err.Type.String(), err.Code)
}
// Print file path if available
if err.FilePath != "" {
ef.ui.PrintInfo("File: %s", err.FilePath)
}
// Print context if available
if len(err.Context) > 0 {
ef.ui.PrintInfo("Context:")
for key, value := range err.Context {
ef.ui.printf(" %s: %v\n", key, value)
}
}
// Provide suggestions based on error type
ef.provideSuggestions(err)
}
// formatGenericError formats a generic error.
func (ef *ErrorFormatter) formatGenericError(err error) {
ef.ui.PrintError("Error: %s", err.Error())
ef.provideGenericSuggestions(err)
}
// provideSuggestions provides helpful suggestions based on the error.
func (ef *ErrorFormatter) provideSuggestions(err *utils.StructuredError) {
switch err.Type {
case utils.ErrorTypeFileSystem:
ef.provideFileSystemSuggestions(err)
case utils.ErrorTypeValidation:
ef.provideValidationSuggestions(err)
case utils.ErrorTypeProcessing:
ef.provideProcessingSuggestions(err)
case utils.ErrorTypeIO:
ef.provideIOSuggestions(err)
default:
ef.provideDefaultSuggestions()
}
}
// provideFileSystemSuggestions provides suggestions for file system errors.
func (ef *ErrorFormatter) provideFileSystemSuggestions(err *utils.StructuredError) {
filePath := err.FilePath
ef.ui.PrintWarning("Suggestions:")
switch err.Code {
case utils.CodeFSAccess:
ef.suggestFileAccess(filePath)
case utils.CodeFSPathResolution:
ef.suggestPathResolution(filePath)
case utils.CodeFSNotFound:
ef.suggestFileNotFound(filePath)
default:
ef.suggestFileSystemGeneral(filePath)
}
}
// provideValidationSuggestions provides suggestions for validation errors.
func (ef *ErrorFormatter) provideValidationSuggestions(err *utils.StructuredError) {
ef.ui.PrintWarning("Suggestions:")
switch err.Code {
case utils.CodeValidationFormat:
ef.ui.printf(" • Use a supported format: markdown, json, yaml\n")
ef.ui.printf(" • Example: -format markdown\n")
case utils.CodeValidationSize:
ef.ui.printf(" • Increase file size limit in config.yaml\n")
ef.ui.printf(" • Use smaller files or exclude large files\n")
default:
ef.ui.printf(" • Check your command line arguments\n")
ef.ui.printf(" • Run with --help for usage information\n")
}
}
// provideProcessingSuggestions provides suggestions for processing errors.
func (ef *ErrorFormatter) provideProcessingSuggestions(err *utils.StructuredError) {
ef.ui.PrintWarning("Suggestions:")
switch err.Code {
case utils.CodeProcessingCollection:
ef.ui.printf(" • Check if the source directory exists and is readable\n")
ef.ui.printf(" • Verify directory permissions\n")
case utils.CodeProcessingFileRead:
ef.ui.printf(" • Check file permissions\n")
ef.ui.printf(" • Verify the file is not corrupted\n")
default:
ef.ui.printf(" • Try reducing concurrency: -concurrency 1\n")
ef.ui.printf(" • Check available system resources\n")
}
}
// provideIOSuggestions provides suggestions for I/O errors.
func (ef *ErrorFormatter) provideIOSuggestions(err *utils.StructuredError) {
ef.ui.PrintWarning("Suggestions:")
switch err.Code {
case utils.CodeIOFileCreate:
ef.ui.printf(" • Check if the destination directory exists\n")
ef.ui.printf(" • Verify write permissions for the output file\n")
ef.ui.printf(" • Ensure sufficient disk space\n")
case utils.CodeIOWrite:
ef.ui.printf(" • Check available disk space\n")
ef.ui.printf(" • Verify write permissions\n")
default:
ef.ui.printf(" • Check file/directory permissions\n")
ef.ui.printf(" • Verify available disk space\n")
}
}
// Helper methods for specific suggestions
func (ef *ErrorFormatter) suggestFileAccess(filePath string) {
ef.ui.printf(" • Check if the path exists: %s\n", filePath)
ef.ui.printf(" • Verify read permissions\n")
if filePath != "" {
if stat, err := os.Stat(filePath); err == nil {
ef.ui.printf(" • Path exists but may not be accessible\n")
ef.ui.printf(" • Mode: %s\n", stat.Mode())
}
}
}
func (ef *ErrorFormatter) suggestPathResolution(filePath string) {
ef.ui.printf(" • Use an absolute path instead of relative\n")
if filePath != "" {
if abs, err := filepath.Abs(filePath); err == nil {
ef.ui.printf(" • Try: %s\n", abs)
}
}
}
func (ef *ErrorFormatter) suggestFileNotFound(filePath string) {
ef.ui.printf(" • Check if the file/directory exists: %s\n", filePath)
if filePath != "" {
dir := filepath.Dir(filePath)
if entries, err := os.ReadDir(dir); err == nil {
ef.ui.printf(" • Similar files in %s:\n", dir)
count := 0
for _, entry := range entries {
if count >= 3 {
break
}
if strings.Contains(entry.Name(), filepath.Base(filePath)) {
ef.ui.printf(" - %s\n", entry.Name())
count++
}
}
}
}
}
func (ef *ErrorFormatter) suggestFileSystemGeneral(filePath string) {
ef.ui.printf(" • Check file/directory permissions\n")
ef.ui.printf(" • Verify the path is correct\n")
if filePath != "" {
ef.ui.printf(" • Path: %s\n", filePath)
}
}
// provideDefaultSuggestions provides general suggestions.
func (ef *ErrorFormatter) provideDefaultSuggestions() {
ef.ui.printf(" • Check your command line arguments\n")
ef.ui.printf(" • Run with --help for usage information\n")
ef.ui.printf(" • Try with -concurrency 1 to reduce resource usage\n")
}
// provideGenericSuggestions provides suggestions for generic errors.
func (ef *ErrorFormatter) provideGenericSuggestions(err error) {
errorMsg := err.Error()
ef.ui.PrintWarning("Suggestions:")
// Pattern matching for common errors
switch {
case strings.Contains(errorMsg, "permission denied"):
ef.ui.printf(" • Check file/directory permissions\n")
ef.ui.printf(" • Try running with appropriate privileges\n")
case strings.Contains(errorMsg, "no such file or directory"):
ef.ui.printf(" • Verify the file/directory path is correct\n")
ef.ui.printf(" • Check if the file exists\n")
case strings.Contains(errorMsg, "flag") && strings.Contains(errorMsg, "redefined"):
ef.ui.printf(" • This is likely a test environment issue\n")
ef.ui.printf(" • Try running the command directly instead of in tests\n")
default:
ef.provideDefaultSuggestions()
}
}
// CLI-specific error types
// CLIMissingSourceError represents a missing source directory error.
type CLIMissingSourceError struct{}
func (e CLIMissingSourceError) Error() string {
return "source directory is required"
}
// NewCLIMissingSourceError creates a new CLI missing source error with suggestions.
func NewCLIMissingSourceError() error {
return &CLIMissingSourceError{}
}
// IsUserError checks if an error is a user input error that should be handled gracefully.
func IsUserError(err error) bool {
if err == nil {
return false
}
// Check for specific user error types
var cliErr *CLIMissingSourceError
if errors.As(err, &cliErr) {
return true
}
// Check for structured errors that are user-facing
if structErr, ok := err.(*utils.StructuredError); ok {
return structErr.Type == utils.ErrorTypeValidation ||
structErr.Code == utils.CodeValidationFormat ||
structErr.Code == utils.CodeValidationSize
}
// Check error message patterns
errMsg := err.Error()
userErrorPatterns := []string{
"flag",
"usage",
"invalid argument",
"file not found",
"permission denied",
}
for _, pattern := range userErrorPatterns {
if strings.Contains(strings.ToLower(errMsg), pattern) {
return true
}
}
return false
}

93
cli/flags.go Normal file
View File

@@ -0,0 +1,93 @@
package cli
import (
"flag"
"runtime"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/utils"
)
// Flags holds CLI flags values.
type Flags struct {
SourceDir string
Destination string
Prefix string
Suffix string
Concurrency int
Format string
NoColors bool
NoProgress bool
Verbose bool
}
var (
flagsParsed bool
globalFlags *Flags
)
// ParseFlags parses and validates CLI flags.
func ParseFlags() (*Flags, error) {
if flagsParsed {
return globalFlags, nil
}
flags := &Flags{}
flag.StringVar(&flags.SourceDir, "source", "", "Source directory to scan recursively")
flag.StringVar(&flags.Destination, "destination", "", "Output file to write aggregated code")
flag.StringVar(&flags.Prefix, "prefix", "", "Text to add at the beginning of the output file")
flag.StringVar(&flags.Suffix, "suffix", "", "Text to add at the end of the output file")
flag.StringVar(&flags.Format, "format", "markdown", "Output format (json, markdown, yaml)")
flag.IntVar(&flags.Concurrency, "concurrency", runtime.NumCPU(),
"Number of concurrent workers (default: number of CPU cores)")
flag.BoolVar(&flags.NoColors, "no-colors", false, "Disable colored output")
flag.BoolVar(&flags.NoProgress, "no-progress", false, "Disable progress bars")
flag.BoolVar(&flags.Verbose, "verbose", false, "Enable verbose output")
flag.Parse()
if err := flags.validate(); err != nil {
return nil, err
}
if err := flags.setDefaultDestination(); err != nil {
return nil, err
}
flagsParsed = true
globalFlags = flags
return flags, nil
}
// validate validates the CLI flags.
func (f *Flags) validate() error {
if f.SourceDir == "" {
return NewCLIMissingSourceError()
}
// Validate output format
if err := config.ValidateOutputFormat(f.Format); err != nil {
return err
}
// Validate concurrency
if err := config.ValidateConcurrency(f.Concurrency); err != nil {
return err
}
return nil
}
// setDefaultDestination sets the default destination if not provided.
func (f *Flags) setDefaultDestination() error {
if f.Destination == "" {
absRoot, err := utils.GetAbsolutePath(f.SourceDir)
if err != nil {
return err
}
baseName := utils.GetBaseName(absRoot)
f.Destination = baseName + "." + f.Format
}
return nil
}

210
cli/processor.go Normal file
View File

@@ -0,0 +1,210 @@
package cli
import (
"context"
"os"
"sync"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/utils"
)
// Processor handles the main file processing logic.
type Processor struct {
flags *Flags
backpressure *fileproc.BackpressureManager
ui *UIManager
}
// NewProcessor creates a new processor with the given flags.
func NewProcessor(flags *Flags) *Processor {
ui := NewUIManager()
// Configure UI based on flags
ui.SetColorOutput(!flags.NoColors)
ui.SetProgressOutput(!flags.NoProgress)
return &Processor{
flags: flags,
backpressure: fileproc.NewBackpressureManager(),
ui: ui,
}
}
// Process executes the main file processing workflow.
func (p *Processor) Process(ctx context.Context) error {
// Configure file type registry
p.configureFileTypes()
// Print startup info with colors
p.ui.PrintHeader("🚀 Starting gibidify")
p.ui.PrintInfo("Format: %s", p.flags.Format)
p.ui.PrintInfo("Source: %s", p.flags.SourceDir)
p.ui.PrintInfo("Destination: %s", p.flags.Destination)
p.ui.PrintInfo("Workers: %d", p.flags.Concurrency)
// Collect files with progress indication
p.ui.PrintInfo("📁 Collecting files...")
files, err := p.collectFiles()
if err != nil {
return err
}
// Show collection results
p.ui.PrintSuccess("Found %d files to process", len(files))
// Process files
return p.processFiles(ctx, files)
}
// configureFileTypes configures the file type registry.
func (p *Processor) configureFileTypes() {
if config.GetFileTypesEnabled() {
fileproc.ConfigureFromSettings(
config.GetCustomImageExtensions(),
config.GetCustomBinaryExtensions(),
config.GetCustomLanguages(),
config.GetDisabledImageExtensions(),
config.GetDisabledBinaryExtensions(),
config.GetDisabledLanguageExtensions(),
)
}
}
// collectFiles collects all files to be processed.
func (p *Processor) collectFiles() ([]string, error) {
files, err := fileproc.CollectFiles(p.flags.SourceDir)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "error collecting files")
}
logrus.Infof("Found %d files to process", len(files))
return files, nil
}
// processFiles processes the collected files.
func (p *Processor) processFiles(ctx context.Context, files []string) error {
outFile, err := p.createOutputFile()
if err != nil {
return err
}
defer func() {
utils.LogError("Error closing output file", outFile.Close())
}()
// Initialize back-pressure and channels
p.ui.PrintInfo("⚙️ Initializing processing...")
p.backpressure.LogBackpressureInfo()
fileCh, writeCh := p.backpressure.CreateChannels()
writerDone := make(chan struct{})
// Start writer
go fileproc.StartWriter(outFile, writeCh, writerDone, p.flags.Format, p.flags.Prefix, p.flags.Suffix)
// Start workers
var wg sync.WaitGroup
p.startWorkers(ctx, &wg, fileCh, writeCh)
// Start progress bar
p.ui.StartProgress(len(files), "📝 Processing files")
// Send files to workers
if err := p.sendFiles(ctx, files, fileCh); err != nil {
p.ui.FinishProgress()
return err
}
// Wait for completion
p.waitForCompletion(&wg, writeCh, writerDone)
p.ui.FinishProgress()
p.logFinalStats()
p.ui.PrintSuccess("Processing completed. Output saved to %s", p.flags.Destination)
return nil
}
// createOutputFile creates the output file.
func (p *Processor) createOutputFile() (*os.File, error) {
outFile, err := os.Create(p.flags.Destination) // #nosec G304 - destination is user-provided CLI arg
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create output file").WithFilePath(p.flags.Destination)
}
return outFile, nil
}
// startWorkers starts the worker goroutines.
func (p *Processor) startWorkers(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) {
for range p.flags.Concurrency {
wg.Add(1)
go p.worker(ctx, wg, fileCh, writeCh)
}
}
// worker is the worker goroutine function.
func (p *Processor) worker(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) {
defer wg.Done()
for {
select {
case <-ctx.Done():
return
case filePath, ok := <-fileCh:
if !ok {
return
}
p.processFile(filePath, writeCh)
}
}
}
// processFile processes a single file.
func (p *Processor) processFile(filePath string, writeCh chan fileproc.WriteRequest) {
absRoot, err := utils.GetAbsolutePath(p.flags.SourceDir)
if err != nil {
utils.LogError("Failed to get absolute path", err)
return
}
fileproc.ProcessFile(filePath, writeCh, absRoot)
// Update progress bar
p.ui.UpdateProgress(1)
}
// sendFiles sends files to the worker channels with back-pressure handling.
func (p *Processor) sendFiles(ctx context.Context, files []string, fileCh chan string) error {
defer close(fileCh)
for _, fp := range files {
// Check if we should apply back-pressure
if p.backpressure.ShouldApplyBackpressure(ctx) {
p.backpressure.ApplyBackpressure(ctx)
}
// Wait for channel space if needed
p.backpressure.WaitForChannelSpace(ctx, fileCh, nil)
select {
case <-ctx.Done():
return ctx.Err()
case fileCh <- fp:
}
}
return nil
}
// waitForCompletion waits for all workers to complete.
func (p *Processor) waitForCompletion(wg *sync.WaitGroup, writeCh chan fileproc.WriteRequest, writerDone chan struct{}) {
wg.Wait()
close(writeCh)
<-writerDone
}
// logFinalStats logs the final back-pressure statistics.
func (p *Processor) logFinalStats() {
stats := p.backpressure.GetStats()
if stats.Enabled {
logrus.Infof("Back-pressure stats: processed=%d files, memory=%dMB/%dMB",
stats.FilesProcessed, stats.CurrentMemoryUsage/1024/1024, stats.MaxMemoryUsage/1024/1024)
}
}

173
cli/ui.go Normal file
View File

@@ -0,0 +1,173 @@
package cli
import (
"fmt"
"io"
"os"
"time"
"github.com/fatih/color"
"github.com/schollz/progressbar/v3"
)
// UIManager handles CLI user interface elements.
type UIManager struct {
enableColors bool
enableProgress bool
progressBar *progressbar.ProgressBar
output io.Writer
}
// NewUIManager creates a new UI manager.
func NewUIManager() *UIManager {
return &UIManager{
enableColors: isColorTerminal(),
enableProgress: isInteractiveTerminal(),
output: os.Stderr, // Progress and colors go to stderr
}
}
// SetColorOutput enables or disables colored output.
func (ui *UIManager) SetColorOutput(enabled bool) {
ui.enableColors = enabled
color.NoColor = !enabled
}
// SetProgressOutput enables or disables progress bars.
func (ui *UIManager) SetProgressOutput(enabled bool) {
ui.enableProgress = enabled
}
// StartProgress initializes a progress bar for file processing.
func (ui *UIManager) StartProgress(total int, description string) {
if !ui.enableProgress || total <= 0 {
return
}
ui.progressBar = progressbar.NewOptions(total,
progressbar.OptionSetWriter(ui.output),
progressbar.OptionSetDescription(description),
progressbar.OptionSetTheme(progressbar.Theme{
Saucer: color.GreenString("█"),
SaucerHead: color.GreenString("█"),
SaucerPadding: " ",
BarStart: "[",
BarEnd: "]",
}),
progressbar.OptionShowCount(),
progressbar.OptionShowIts(),
progressbar.OptionSetWidth(40),
progressbar.OptionThrottle(100*time.Millisecond),
progressbar.OptionOnCompletion(func() {
_, _ = fmt.Fprint(ui.output, "\n")
}),
progressbar.OptionSetRenderBlankState(true),
)
}
// UpdateProgress increments the progress bar.
func (ui *UIManager) UpdateProgress(increment int) {
if ui.progressBar != nil {
_ = ui.progressBar.Add(increment)
}
}
// FinishProgress completes the progress bar.
func (ui *UIManager) FinishProgress() {
if ui.progressBar != nil {
_ = ui.progressBar.Finish()
ui.progressBar = nil
}
}
// PrintSuccess prints a success message in green.
func (ui *UIManager) PrintSuccess(format string, args ...interface{}) {
if ui.enableColors {
color.Green("✓ "+format, args...)
} else {
ui.printf("✓ "+format+"\n", args...)
}
}
// PrintError prints an error message in red.
func (ui *UIManager) PrintError(format string, args ...interface{}) {
if ui.enableColors {
color.Red("✗ "+format, args...)
} else {
ui.printf("✗ "+format+"\n", args...)
}
}
// PrintWarning prints a warning message in yellow.
func (ui *UIManager) PrintWarning(format string, args ...interface{}) {
if ui.enableColors {
color.Yellow("⚠ "+format, args...)
} else {
ui.printf("⚠ "+format+"\n", args...)
}
}
// PrintInfo prints an info message in blue.
func (ui *UIManager) PrintInfo(format string, args ...interface{}) {
if ui.enableColors {
color.Blue(" "+format, args...)
} else {
ui.printf(" "+format+"\n", args...)
}
}
// PrintHeader prints a header message in bold.
func (ui *UIManager) PrintHeader(format string, args ...interface{}) {
if ui.enableColors {
_, _ = color.New(color.Bold).Fprintf(ui.output, format+"\n", args...)
} else {
ui.printf(format+"\n", args...)
}
}
// isColorTerminal checks if the terminal supports colors.
func isColorTerminal() bool {
// Check common environment variables
term := os.Getenv("TERM")
if term == "" || term == "dumb" {
return false
}
// Check for CI environments that typically don't support colors
if os.Getenv("CI") != "" {
// GitHub Actions supports colors
if os.Getenv("GITHUB_ACTIONS") == "true" {
return true
}
// Most other CI systems don't
return false
}
// Check if NO_COLOR is set (https://no-color.org/)
if os.Getenv("NO_COLOR") != "" {
return false
}
// Check if FORCE_COLOR is set
if os.Getenv("FORCE_COLOR") != "" {
return true
}
// Default to true for interactive terminals
return isInteractiveTerminal()
}
// isInteractiveTerminal checks if we're running in an interactive terminal.
func isInteractiveTerminal() bool {
// Check if stderr is a terminal (where we output progress/colors)
fileInfo, err := os.Stderr.Stat()
if err != nil {
return false
}
return (fileInfo.Mode() & os.ModeCharDevice) != 0
}
// printf is a helper that ignores printf errors (for UI output).
func (ui *UIManager) printf(format string, args ...interface{}) {
_, _ = fmt.Fprintf(ui.output, format, args...)
}

145
cmd/benchmark/main.go Normal file
View File

@@ -0,0 +1,145 @@
// Package main provides a CLI for running gibidify benchmarks.
package main
import (
"flag"
"fmt"
"os"
"runtime"
"strings"
"github.com/ivuorinen/gibidify/benchmark"
"github.com/ivuorinen/gibidify/utils"
)
var (
sourceDir = flag.String("source", "", "Source directory to benchmark (uses temp files if empty)")
benchmarkType = flag.String("type", "all", "Benchmark type: all, collection, processing, concurrency, format")
format = flag.String("format", "json", "Output format for processing benchmarks")
concurrency = flag.Int("concurrency", runtime.NumCPU(), "Concurrency level for processing benchmarks")
concurrencyList = flag.String("concurrency-list", "1,2,4,8", "Comma-separated list of concurrency levels")
formatList = flag.String("format-list", "json,yaml,markdown", "Comma-separated list of formats")
numFiles = flag.Int("files", 100, "Number of files to create for benchmarks")
)
func main() {
flag.Parse()
if err := runBenchmarks(); err != nil {
fmt.Fprintf(os.Stderr, "Benchmark failed: %v\n", err)
os.Exit(1)
}
}
func runBenchmarks() error {
fmt.Printf("Running gibidify benchmarks...\n")
fmt.Printf("Source: %s\n", getSourceDescription())
fmt.Printf("Type: %s\n", *benchmarkType)
fmt.Printf("CPU cores: %d\n", runtime.NumCPU())
fmt.Println()
switch *benchmarkType {
case "all":
return benchmark.RunAllBenchmarks(*sourceDir)
case "collection":
return runCollectionBenchmark()
case "processing":
return runProcessingBenchmark()
case "concurrency":
return runConcurrencyBenchmark()
case "format":
return runFormatBenchmark()
default:
return utils.NewValidationError(utils.CodeValidationFormat, "invalid benchmark type: "+*benchmarkType)
}
}
func runCollectionBenchmark() error {
fmt.Println("Running file collection benchmark...")
result, err := benchmark.FileCollectionBenchmark(*sourceDir, *numFiles)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file collection benchmark failed")
}
benchmark.PrintBenchmarkResult(result)
return nil
}
func runProcessingBenchmark() error {
fmt.Printf("Running file processing benchmark (format: %s, concurrency: %d)...\n", *format, *concurrency)
result, err := benchmark.FileProcessingBenchmark(*sourceDir, *format, *concurrency)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file processing benchmark failed")
}
benchmark.PrintBenchmarkResult(result)
return nil
}
func runConcurrencyBenchmark() error {
concurrencyLevels, err := parseConcurrencyList(*concurrencyList)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeValidation, utils.CodeValidationFormat, "invalid concurrency list")
}
fmt.Printf("Running concurrency benchmark (format: %s, levels: %v)...\n", *format, concurrencyLevels)
suite, err := benchmark.ConcurrencyBenchmark(*sourceDir, *format, concurrencyLevels)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed")
}
benchmark.PrintBenchmarkSuite(suite)
return nil
}
func runFormatBenchmark() error {
formats := parseFormatList(*formatList)
fmt.Printf("Running format benchmark (formats: %v)...\n", formats)
suite, err := benchmark.FormatBenchmark(*sourceDir, formats)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed")
}
benchmark.PrintBenchmarkSuite(suite)
return nil
}
func getSourceDescription() string {
if *sourceDir == "" {
return fmt.Sprintf("temporary files (%d files)", *numFiles)
}
return *sourceDir
}
func parseConcurrencyList(list string) ([]int, error) {
parts := strings.Split(list, ",")
levels := make([]int, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(part)
var level int
if _, err := fmt.Sscanf(part, "%d", &level); err != nil {
return nil, utils.WrapErrorf(err, utils.ErrorTypeValidation, utils.CodeValidationFormat, "invalid concurrency level: %s", part)
}
if level <= 0 {
return nil, utils.NewValidationError(utils.CodeValidationFormat, "concurrency level must be positive: "+part)
}
levels = append(levels, level)
}
if len(levels) == 0 {
return nil, utils.NewValidationError(utils.CodeValidationFormat, "no valid concurrency levels found")
}
return levels, nil
}
func parseFormatList(list string) []string {
parts := strings.Split(list, ",")
formats := make([]string, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(part)
if part != "" {
formats = append(formats, part)
}
}
return formats
}

84
config.example.yaml Normal file
View File

@@ -0,0 +1,84 @@
# gibidify configuration example
# Place this file in one of these locations:
# - $XDG_CONFIG_HOME/gibidify/config.yaml
# - $HOME/.config/gibidify/config.yaml
# - Current directory (if no gibidify.yaml output file exists)
# File size limit in bytes (default: 5MB)
fileSizeLimit: 5242880
# Directories to ignore during scanning
ignoreDirectories:
- vendor
- node_modules
- .git
- dist
- build
- target
- bower_components
- cache
- tmp
- .next
- .nuxt
# FileType registry configuration
fileTypes:
# Enable/disable file type detection entirely (default: true)
enabled: true
# Add custom image extensions
customImageExtensions:
- .webp
- .avif
- .heic
- .jxl
# Add custom binary extensions
customBinaryExtensions:
- .custom
- .proprietary
- .blob
# Add custom language mappings
customLanguages:
.zig: zig
.odin: odin
.v: vlang
.grain: grain
.gleam: gleam
.roc: roc
.janet: janet
.fennel: fennel
.wast: wast
.wat: wat
# Disable specific default image extensions
disabledImageExtensions:
- .bmp # Disable bitmap support
- .tif # Disable TIFF support
# Disable specific default binary extensions
disabledBinaryExtensions:
- .exe # Don't treat executables as binary
- .dll # Don't treat DLL files as binary
# Disable specific default language extensions
disabledLanguageExtensions:
- .bat # Don't detect batch files
- .cmd # Don't detect command files
# Maximum concurrency (optional)
maxConcurrency: 16
# Supported output formats (optional validation)
supportedFormats:
- json
- yaml
- markdown
# File patterns for filtering (optional)
filePatterns:
- "*.go"
- "*.py"
- "*.js"
- "*.ts"

View File

@@ -2,11 +2,24 @@
package config package config
import ( import (
"fmt"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/spf13/viper" "github.com/spf13/viper"
"github.com/ivuorinen/gibidify/utils"
)
const (
// DefaultFileSizeLimit is the default maximum file size (5MB).
DefaultFileSizeLimit = 5242880
// MinFileSizeLimit is the minimum allowed file size limit (1KB).
MinFileSizeLimit = 1024
// MaxFileSizeLimit is the maximum allowed file size limit (100MB).
MaxFileSizeLimit = 104857600
) )
// LoadConfig reads configuration from a YAML file. // LoadConfig reads configuration from a YAML file.
@@ -23,23 +36,51 @@ func LoadConfig() {
} else if home, err := os.UserHomeDir(); err == nil { } else if home, err := os.UserHomeDir(); err == nil {
viper.AddConfigPath(filepath.Join(home, ".config", "gibidify")) viper.AddConfigPath(filepath.Join(home, ".config", "gibidify"))
} }
viper.AddConfigPath(".") // Only add current directory if no config file named gibidify.yaml exists
// to avoid conflicts with the project's output file
if _, err := os.Stat("gibidify.yaml"); os.IsNotExist(err) {
viper.AddConfigPath(".")
}
if err := viper.ReadInConfig(); err != nil { if err := viper.ReadInConfig(); err != nil {
logrus.Infof("Config file not found, using default values: %v", err) logrus.Infof("Config file not found, using default values: %v", err)
setDefaultConfig() setDefaultConfig()
} else { } else {
logrus.Infof("Using config file: %s", viper.ConfigFileUsed()) logrus.Infof("Using config file: %s", viper.ConfigFileUsed())
// Validate configuration after loading
if err := ValidateConfig(); err != nil {
logrus.Warnf("Configuration validation failed: %v", err)
logrus.Info("Falling back to default configuration")
// Reset viper and set defaults when validation fails
viper.Reset()
setDefaultConfig()
}
} }
} }
// setDefaultConfig sets default configuration values. // setDefaultConfig sets default configuration values.
func setDefaultConfig() { func setDefaultConfig() {
viper.SetDefault("fileSizeLimit", 5242880) // 5 MB viper.SetDefault("fileSizeLimit", DefaultFileSizeLimit)
// Default ignored directories. // Default ignored directories.
viper.SetDefault("ignoreDirectories", []string{ viper.SetDefault("ignoreDirectories", []string{
"vendor", "node_modules", ".git", "dist", "build", "target", "bower_components", "cache", "tmp", "vendor", "node_modules", ".git", "dist", "build", "target", "bower_components", "cache", "tmp",
}) })
// FileTypeRegistry defaults
viper.SetDefault("fileTypes.enabled", true)
viper.SetDefault("fileTypes.customImageExtensions", []string{})
viper.SetDefault("fileTypes.customBinaryExtensions", []string{})
viper.SetDefault("fileTypes.customLanguages", map[string]string{})
viper.SetDefault("fileTypes.disabledImageExtensions", []string{})
viper.SetDefault("fileTypes.disabledBinaryExtensions", []string{})
viper.SetDefault("fileTypes.disabledLanguageExtensions", []string{})
// Back-pressure and memory management defaults
viper.SetDefault("backpressure.enabled", true)
viper.SetDefault("backpressure.maxPendingFiles", 1000) // Max files in file channel buffer
viper.SetDefault("backpressure.maxPendingWrites", 100) // Max writes in write channel buffer
viper.SetDefault("backpressure.maxMemoryUsage", 104857600) // 100MB max memory usage
viper.SetDefault("backpressure.memoryCheckInterval", 1000) // Check memory every 1000 files
} }
// GetFileSizeLimit returns the file size limit from configuration. // GetFileSizeLimit returns the file size limit from configuration.
@@ -51,3 +92,303 @@ func GetFileSizeLimit() int64 {
func GetIgnoredDirectories() []string { func GetIgnoredDirectories() []string {
return viper.GetStringSlice("ignoreDirectories") return viper.GetStringSlice("ignoreDirectories")
} }
// ValidateConfig validates the loaded configuration.
func ValidateConfig() error {
var validationErrors []string
// Validate file size limit
fileSizeLimit := viper.GetInt64("fileSizeLimit")
if fileSizeLimit < MinFileSizeLimit {
validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) is below minimum (%d)", fileSizeLimit, MinFileSizeLimit))
}
if fileSizeLimit > MaxFileSizeLimit {
validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) exceeds maximum (%d)", fileSizeLimit, MaxFileSizeLimit))
}
// Validate ignore directories
ignoreDirectories := viper.GetStringSlice("ignoreDirectories")
for i, dir := range ignoreDirectories {
dir = strings.TrimSpace(dir)
if dir == "" {
validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] is empty", i))
continue
}
if strings.Contains(dir, "/") {
validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) contains path separator - only directory names are allowed", i, dir))
}
if strings.HasPrefix(dir, ".") && dir != ".git" && dir != ".vscode" && dir != ".idea" {
validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) starts with dot - this may cause unexpected behavior", i, dir))
}
}
// Validate supported output formats if configured
if viper.IsSet("supportedFormats") {
supportedFormats := viper.GetStringSlice("supportedFormats")
validFormats := map[string]bool{"json": true, "yaml": true, "markdown": true}
for i, format := range supportedFormats {
format = strings.ToLower(strings.TrimSpace(format))
if !validFormats[format] {
validationErrors = append(validationErrors, fmt.Sprintf("supportedFormats[%d] (%s) is not a valid format (json, yaml, markdown)", i, format))
}
}
}
// Validate concurrency settings if configured
if viper.IsSet("maxConcurrency") {
maxConcurrency := viper.GetInt("maxConcurrency")
if maxConcurrency < 1 {
validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) must be at least 1", maxConcurrency))
}
if maxConcurrency > 100 {
validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) is unreasonably high (max 100)", maxConcurrency))
}
}
// Validate file patterns if configured
if viper.IsSet("filePatterns") {
filePatterns := viper.GetStringSlice("filePatterns")
for i, pattern := range filePatterns {
pattern = strings.TrimSpace(pattern)
if pattern == "" {
validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] is empty", i))
continue
}
// Basic validation - patterns should contain at least one alphanumeric character
if !strings.ContainsAny(pattern, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") {
validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] (%s) appears to be invalid", i, pattern))
}
}
}
// Validate FileTypeRegistry configuration
if viper.IsSet("fileTypes.customImageExtensions") {
customImages := viper.GetStringSlice("fileTypes.customImageExtensions")
for i, ext := range customImages {
ext = strings.TrimSpace(ext)
if ext == "" {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] is empty", i))
continue
}
if !strings.HasPrefix(ext, ".") {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] (%s) must start with a dot", i, ext))
}
}
}
if viper.IsSet("fileTypes.customBinaryExtensions") {
customBinary := viper.GetStringSlice("fileTypes.customBinaryExtensions")
for i, ext := range customBinary {
ext = strings.TrimSpace(ext)
if ext == "" {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] is empty", i))
continue
}
if !strings.HasPrefix(ext, ".") {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] (%s) must start with a dot", i, ext))
}
}
}
if viper.IsSet("fileTypes.customLanguages") {
customLangs := viper.GetStringMapString("fileTypes.customLanguages")
for ext, lang := range customLangs {
ext = strings.TrimSpace(ext)
lang = strings.TrimSpace(lang)
if ext == "" {
validationErrors = append(validationErrors, "fileTypes.customLanguages contains empty extension key")
continue
}
if !strings.HasPrefix(ext, ".") {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages extension (%s) must start with a dot", ext))
}
if lang == "" {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages[%s] has empty language value", ext))
}
}
}
// Validate back-pressure configuration
if viper.IsSet("backpressure.maxPendingFiles") {
maxPendingFiles := viper.GetInt("backpressure.maxPendingFiles")
if maxPendingFiles < 1 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) must be at least 1", maxPendingFiles))
}
if maxPendingFiles > 100000 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) is unreasonably high (max 100000)", maxPendingFiles))
}
}
if viper.IsSet("backpressure.maxPendingWrites") {
maxPendingWrites := viper.GetInt("backpressure.maxPendingWrites")
if maxPendingWrites < 1 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) must be at least 1", maxPendingWrites))
}
if maxPendingWrites > 10000 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) is unreasonably high (max 10000)", maxPendingWrites))
}
}
if viper.IsSet("backpressure.maxMemoryUsage") {
maxMemoryUsage := viper.GetInt64("backpressure.maxMemoryUsage")
if maxMemoryUsage < 1048576 { // 1MB minimum
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) must be at least 1MB (1048576 bytes)", maxMemoryUsage))
}
if maxMemoryUsage > 10737418240 { // 10GB maximum
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) is unreasonably high (max 10GB)", maxMemoryUsage))
}
}
if viper.IsSet("backpressure.memoryCheckInterval") {
interval := viper.GetInt("backpressure.memoryCheckInterval")
if interval < 1 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) must be at least 1", interval))
}
if interval > 100000 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) is unreasonably high (max 100000)", interval))
}
}
if len(validationErrors) > 0 {
return utils.NewStructuredError(
utils.ErrorTypeConfiguration,
utils.CodeConfigValidation,
"configuration validation failed: "+strings.Join(validationErrors, "; "),
).WithContext("validation_errors", validationErrors)
}
return nil
}
// GetMaxConcurrency returns the maximum concurrency limit from configuration.
func GetMaxConcurrency() int {
return viper.GetInt("maxConcurrency")
}
// GetSupportedFormats returns the supported output formats from configuration.
func GetSupportedFormats() []string {
return viper.GetStringSlice("supportedFormats")
}
// GetFilePatterns returns the file patterns from configuration.
func GetFilePatterns() []string {
return viper.GetStringSlice("filePatterns")
}
// IsValidFormat checks if a format is supported.
func IsValidFormat(format string) bool {
format = strings.ToLower(strings.TrimSpace(format))
validFormats := map[string]bool{"json": true, "yaml": true, "markdown": true}
return validFormats[format]
}
// ValidateFileSize checks if a file size is within the configured limit.
func ValidateFileSize(size int64) error {
limit := GetFileSizeLimit()
if size > limit {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationSize,
fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", size, limit),
).WithContext("file_size", size).WithContext("size_limit", limit)
}
return nil
}
// ValidateOutputFormat checks if an output format is valid.
func ValidateOutputFormat(format string) error {
if !IsValidFormat(format) {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationFormat,
fmt.Sprintf("unsupported output format: %s (supported: json, yaml, markdown)", format),
).WithContext("format", format)
}
return nil
}
// ValidateConcurrency checks if a concurrency level is valid.
func ValidateConcurrency(concurrency int) error {
if concurrency < 1 {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationFormat,
fmt.Sprintf("concurrency (%d) must be at least 1", concurrency),
).WithContext("concurrency", concurrency)
}
if viper.IsSet("maxConcurrency") {
maxConcurrency := GetMaxConcurrency()
if concurrency > maxConcurrency {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationFormat,
fmt.Sprintf("concurrency (%d) exceeds maximum (%d)", concurrency, maxConcurrency),
).WithContext("concurrency", concurrency).WithContext("max_concurrency", maxConcurrency)
}
}
return nil
}
// GetFileTypesEnabled returns whether file type detection is enabled.
func GetFileTypesEnabled() bool {
return viper.GetBool("fileTypes.enabled")
}
// GetCustomImageExtensions returns custom image extensions from configuration.
func GetCustomImageExtensions() []string {
return viper.GetStringSlice("fileTypes.customImageExtensions")
}
// GetCustomBinaryExtensions returns custom binary extensions from configuration.
func GetCustomBinaryExtensions() []string {
return viper.GetStringSlice("fileTypes.customBinaryExtensions")
}
// GetCustomLanguages returns custom language mappings from configuration.
func GetCustomLanguages() map[string]string {
return viper.GetStringMapString("fileTypes.customLanguages")
}
// GetDisabledImageExtensions returns disabled image extensions from configuration.
func GetDisabledImageExtensions() []string {
return viper.GetStringSlice("fileTypes.disabledImageExtensions")
}
// GetDisabledBinaryExtensions returns disabled binary extensions from configuration.
func GetDisabledBinaryExtensions() []string {
return viper.GetStringSlice("fileTypes.disabledBinaryExtensions")
}
// GetDisabledLanguageExtensions returns disabled language extensions from configuration.
func GetDisabledLanguageExtensions() []string {
return viper.GetStringSlice("fileTypes.disabledLanguageExtensions")
}
// Back-pressure configuration getters
// GetBackpressureEnabled returns whether back-pressure management is enabled.
func GetBackpressureEnabled() bool {
return viper.GetBool("backpressure.enabled")
}
// GetMaxPendingFiles returns the maximum number of files that can be pending in the file channel.
func GetMaxPendingFiles() int {
return viper.GetInt("backpressure.maxPendingFiles")
}
// GetMaxPendingWrites returns the maximum number of writes that can be pending in the write channel.
func GetMaxPendingWrites() int {
return viper.GetInt("backpressure.maxPendingWrites")
}
// GetMaxMemoryUsage returns the maximum memory usage in bytes before back-pressure kicks in.
func GetMaxMemoryUsage() int64 {
return viper.GetInt64("backpressure.maxMemoryUsage")
}
// GetMemoryCheckInterval returns how often to check memory usage (in number of files processed).
func GetMemoryCheckInterval() int {
return viper.GetInt("backpressure.memoryCheckInterval")
}

View File

@@ -0,0 +1,174 @@
package config
import (
"testing"
"github.com/spf13/viper"
)
// TestFileTypeRegistryConfig tests the FileTypeRegistry configuration functionality.
func TestFileTypeRegistryConfig(t *testing.T) {
// Test default values
t.Run("DefaultValues", func(t *testing.T) {
viper.Reset()
setDefaultConfig()
if !GetFileTypesEnabled() {
t.Error("Expected file types to be enabled by default")
}
if len(GetCustomImageExtensions()) != 0 {
t.Error("Expected custom image extensions to be empty by default")
}
if len(GetCustomBinaryExtensions()) != 0 {
t.Error("Expected custom binary extensions to be empty by default")
}
if len(GetCustomLanguages()) != 0 {
t.Error("Expected custom languages to be empty by default")
}
if len(GetDisabledImageExtensions()) != 0 {
t.Error("Expected disabled image extensions to be empty by default")
}
if len(GetDisabledBinaryExtensions()) != 0 {
t.Error("Expected disabled binary extensions to be empty by default")
}
if len(GetDisabledLanguageExtensions()) != 0 {
t.Error("Expected disabled language extensions to be empty by default")
}
})
// Test configuration setting and getting
t.Run("ConfigurationSetGet", func(t *testing.T) {
viper.Reset()
// Set test values
viper.Set("fileTypes.enabled", false)
viper.Set("fileTypes.customImageExtensions", []string{".webp", ".avif"})
viper.Set("fileTypes.customBinaryExtensions", []string{".custom", ".mybin"})
viper.Set("fileTypes.customLanguages", map[string]string{
".zig": "zig",
".v": "vlang",
})
viper.Set("fileTypes.disabledImageExtensions", []string{".gif", ".bmp"})
viper.Set("fileTypes.disabledBinaryExtensions", []string{".exe", ".dll"})
viper.Set("fileTypes.disabledLanguageExtensions", []string{".rb", ".pl"})
// Test getter functions
if GetFileTypesEnabled() {
t.Error("Expected file types to be disabled")
}
customImages := GetCustomImageExtensions()
expectedImages := []string{".webp", ".avif"}
if len(customImages) != len(expectedImages) {
t.Errorf("Expected %d custom image extensions, got %d", len(expectedImages), len(customImages))
}
for i, ext := range expectedImages {
if customImages[i] != ext {
t.Errorf("Expected custom image extension %s, got %s", ext, customImages[i])
}
}
customBinary := GetCustomBinaryExtensions()
expectedBinary := []string{".custom", ".mybin"}
if len(customBinary) != len(expectedBinary) {
t.Errorf("Expected %d custom binary extensions, got %d", len(expectedBinary), len(customBinary))
}
for i, ext := range expectedBinary {
if customBinary[i] != ext {
t.Errorf("Expected custom binary extension %s, got %s", ext, customBinary[i])
}
}
customLangs := GetCustomLanguages()
expectedLangs := map[string]string{
".zig": "zig",
".v": "vlang",
}
if len(customLangs) != len(expectedLangs) {
t.Errorf("Expected %d custom languages, got %d", len(expectedLangs), len(customLangs))
}
for ext, lang := range expectedLangs {
if customLangs[ext] != lang {
t.Errorf("Expected custom language %s -> %s, got %s", ext, lang, customLangs[ext])
}
}
disabledImages := GetDisabledImageExtensions()
expectedDisabledImages := []string{".gif", ".bmp"}
if len(disabledImages) != len(expectedDisabledImages) {
t.Errorf("Expected %d disabled image extensions, got %d", len(expectedDisabledImages), len(disabledImages))
}
disabledBinary := GetDisabledBinaryExtensions()
expectedDisabledBinary := []string{".exe", ".dll"}
if len(disabledBinary) != len(expectedDisabledBinary) {
t.Errorf("Expected %d disabled binary extensions, got %d", len(expectedDisabledBinary), len(disabledBinary))
}
disabledLangs := GetDisabledLanguageExtensions()
expectedDisabledLangs := []string{".rb", ".pl"}
if len(disabledLangs) != len(expectedDisabledLangs) {
t.Errorf("Expected %d disabled language extensions, got %d", len(expectedDisabledLangs), len(disabledLangs))
}
})
// Test validation
t.Run("ValidationSuccess", func(t *testing.T) {
viper.Reset()
setDefaultConfig()
// Set valid configuration
viper.Set("fileTypes.customImageExtensions", []string{".webp", ".avif"})
viper.Set("fileTypes.customBinaryExtensions", []string{".custom"})
viper.Set("fileTypes.customLanguages", map[string]string{
".zig": "zig",
".v": "vlang",
})
err := ValidateConfig()
if err != nil {
t.Errorf("Expected validation to pass with valid config, got error: %v", err)
}
})
t.Run("ValidationFailure", func(t *testing.T) {
// Test invalid custom image extensions
viper.Reset()
setDefaultConfig()
viper.Set("fileTypes.customImageExtensions", []string{"", "webp"}) // Empty and missing dot
err := ValidateConfig()
if err == nil {
t.Error("Expected validation to fail with invalid custom image extensions")
}
// Test invalid custom binary extensions
viper.Reset()
setDefaultConfig()
viper.Set("fileTypes.customBinaryExtensions", []string{"custom"}) // Missing dot
err = ValidateConfig()
if err == nil {
t.Error("Expected validation to fail with invalid custom binary extensions")
}
// Test invalid custom languages
viper.Reset()
setDefaultConfig()
viper.Set("fileTypes.customLanguages", map[string]string{
"zig": "zig", // Missing dot in extension
".v": "", // Empty language
})
err = ValidateConfig()
if err == nil {
t.Error("Expected validation to fail with invalid custom languages")
}
})
}

View File

@@ -2,40 +2,38 @@ package config_test
import ( import (
"os" "os"
"path/filepath" "strings"
"testing" "testing"
configpkg "github.com/ivuorinen/gibidify/config"
"github.com/spf13/viper" "github.com/spf13/viper"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/testutil"
"github.com/ivuorinen/gibidify/utils"
)
const (
defaultFileSizeLimit = 5242880
testFileSizeLimit = 123456
) )
// TestDefaultConfig verifies that if no config file is found, // TestDefaultConfig verifies that if no config file is found,
// the default configuration values are correctly set. // the default configuration values are correctly set.
func TestDefaultConfig(t *testing.T) { func TestDefaultConfig(t *testing.T) {
// Create a temporary directory to ensure no config file is present. // Create a temporary directory to ensure no config file is present.
tmpDir, err := os.MkdirTemp("", "gibidify_config_test_default") tmpDir := t.TempDir()
if err != nil {
t.Fatalf("Failed to create temp directory: %v", err)
}
defer func() {
if err := os.RemoveAll(tmpDir); err != nil {
t.Fatalf("cleanup failed: %v", err)
}
}()
// Point Viper to the temp directory with no config file. // Point Viper to the temp directory with no config file.
originalConfigPaths := viper.ConfigFileUsed() originalConfigPaths := viper.ConfigFileUsed()
viper.Reset() testutil.ResetViperConfig(t, tmpDir)
viper.AddConfigPath(tmpDir)
configpkg.LoadConfig()
// Check defaults // Check defaults
defaultSizeLimit := configpkg.GetFileSizeLimit() defaultSizeLimit := config.GetFileSizeLimit()
if defaultSizeLimit != 5242880 { if defaultSizeLimit != defaultFileSizeLimit {
t.Errorf("Expected default file size limit of 5242880, got %d", defaultSizeLimit) t.Errorf("Expected default file size limit of 5242880, got %d", defaultSizeLimit)
} }
ignoredDirs := configpkg.GetIgnoredDirectories() ignoredDirs := config.GetIgnoredDirectories()
if len(ignoredDirs) == 0 { if len(ignoredDirs) == 0 {
t.Errorf("Expected some default ignored directories, got none") t.Errorf("Expected some default ignored directories, got none")
} }
@@ -47,15 +45,7 @@ func TestDefaultConfig(t *testing.T) {
// TestLoadConfigFile verifies that when a valid config file is present, // TestLoadConfigFile verifies that when a valid config file is present,
// viper loads the specified values correctly. // viper loads the specified values correctly.
func TestLoadConfigFile(t *testing.T) { func TestLoadConfigFile(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "gibidify_config_test_file") tmpDir := t.TempDir()
if err != nil {
t.Fatalf("Failed to create temp directory: %v", err)
}
defer func() {
if err := os.RemoveAll(tmpDir); err != nil {
t.Fatalf("cleanup failed: %v", err)
}
}()
// Prepare a minimal config file // Prepare a minimal config file
configContent := []byte(`--- configContent := []byte(`---
@@ -65,22 +55,17 @@ ignoreDirectories:
- "testdir2" - "testdir2"
`) `)
configPath := filepath.Join(tmpDir, "config.yaml") testutil.CreateTestFile(t, tmpDir, "config.yaml", configContent)
if err := os.WriteFile(configPath, configContent, 0644); err != nil {
t.Fatalf("Failed to write config file: %v", err)
}
// Reset viper and point to the new config path // Reset viper and point to the new config path
viper.Reset() viper.Reset()
viper.AddConfigPath(tmpDir) viper.AddConfigPath(tmpDir)
// Force Viper to read our config file // Force Viper to read our config file
if err := viper.ReadInConfig(); err != nil { testutil.MustSucceed(t, viper.ReadInConfig(), "reading config file")
t.Fatalf("Could not read config file: %v", err)
}
// Validate loaded data // Validate loaded data
if got := viper.GetInt64("fileSizeLimit"); got != 123456 { if got := viper.GetInt64("fileSizeLimit"); got != testFileSizeLimit {
t.Errorf("Expected fileSizeLimit=123456, got %d", got) t.Errorf("Expected fileSizeLimit=123456, got %d", got)
} }
@@ -89,3 +74,283 @@ ignoreDirectories:
t.Errorf("Expected [\"testdir1\", \"testdir2\"], got %v", ignored) t.Errorf("Expected [\"testdir1\", \"testdir2\"], got %v", ignored)
} }
} }
// TestValidateConfig tests the configuration validation functionality.
func TestValidateConfig(t *testing.T) {
tests := []struct {
name string
config map[string]interface{}
wantErr bool
errContains string
}{
{
name: "valid default config",
config: map[string]interface{}{
"fileSizeLimit": config.DefaultFileSizeLimit,
"ignoreDirectories": []string{"node_modules", ".git"},
},
wantErr: false,
},
{
name: "file size limit too small",
config: map[string]interface{}{
"fileSizeLimit": config.MinFileSizeLimit - 1,
},
wantErr: true,
errContains: "fileSizeLimit",
},
{
name: "file size limit too large",
config: map[string]interface{}{
"fileSizeLimit": config.MaxFileSizeLimit + 1,
},
wantErr: true,
errContains: "fileSizeLimit",
},
{
name: "empty ignore directory",
config: map[string]interface{}{
"ignoreDirectories": []string{"node_modules", "", ".git"},
},
wantErr: true,
errContains: "ignoreDirectories",
},
{
name: "ignore directory with path separator",
config: map[string]interface{}{
"ignoreDirectories": []string{"node_modules", "src/build", ".git"},
},
wantErr: true,
errContains: "path separator",
},
{
name: "invalid supported format",
config: map[string]interface{}{
"supportedFormats": []string{"json", "xml", "yaml"},
},
wantErr: true,
errContains: "not a valid format",
},
{
name: "invalid max concurrency",
config: map[string]interface{}{
"maxConcurrency": 0,
},
wantErr: true,
errContains: "maxConcurrency",
},
{
name: "valid comprehensive config",
config: map[string]interface{}{
"fileSizeLimit": config.DefaultFileSizeLimit,
"ignoreDirectories": []string{"node_modules", ".git", ".vscode"},
"supportedFormats": []string{"json", "yaml", "markdown"},
"maxConcurrency": 8,
"filePatterns": []string{"*.go", "*.js", "*.py"},
},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Reset viper for each test
viper.Reset()
// Set test configuration
for key, value := range tt.config {
viper.Set(key, value)
}
// Load defaults for missing values
config.LoadConfig()
err := config.ValidateConfig()
if tt.wantErr {
if err == nil {
t.Errorf("Expected error but got none")
return
}
if tt.errContains != "" && !strings.Contains(err.Error(), tt.errContains) {
t.Errorf("Expected error to contain %q, got %q", tt.errContains, err.Error())
}
// Check that it's a structured error
var structErr *utils.StructuredError
if !errorAs(err, &structErr) {
t.Errorf("Expected structured error, got %T", err)
return
}
if structErr.Type != utils.ErrorTypeConfiguration {
t.Errorf("Expected error type %v, got %v", utils.ErrorTypeConfiguration, structErr.Type)
}
if structErr.Code != utils.CodeConfigValidation {
t.Errorf("Expected error code %v, got %v", utils.CodeConfigValidation, structErr.Code)
}
} else {
if err != nil {
t.Errorf("Expected no error but got: %v", err)
}
}
})
}
}
// TestValidationFunctions tests individual validation functions.
func TestValidationFunctions(t *testing.T) {
t.Run("IsValidFormat", func(t *testing.T) {
tests := []struct {
format string
valid bool
}{
{"json", true},
{"yaml", true},
{"markdown", true},
{"JSON", true},
{"xml", false},
{"txt", false},
{"", false},
{" json ", true},
}
for _, tt := range tests {
result := config.IsValidFormat(tt.format)
if result != tt.valid {
t.Errorf("IsValidFormat(%q) = %v, want %v", tt.format, result, tt.valid)
}
}
})
t.Run("ValidateFileSize", func(t *testing.T) {
viper.Reset()
viper.Set("fileSizeLimit", config.DefaultFileSizeLimit)
tests := []struct {
name string
size int64
wantErr bool
}{
{"size within limit", config.DefaultFileSizeLimit - 1, false},
{"size at limit", config.DefaultFileSizeLimit, false},
{"size exceeds limit", config.DefaultFileSizeLimit + 1, true},
{"zero size", 0, false},
}
for _, tt := range tests {
err := config.ValidateFileSize(tt.size)
if (err != nil) != tt.wantErr {
t.Errorf("%s: ValidateFileSize(%d) error = %v, wantErr %v", tt.name, tt.size, err, tt.wantErr)
}
}
})
t.Run("ValidateOutputFormat", func(t *testing.T) {
tests := []struct {
format string
wantErr bool
}{
{"json", false},
{"yaml", false},
{"markdown", false},
{"xml", true},
{"txt", true},
{"", true},
}
for _, tt := range tests {
err := config.ValidateOutputFormat(tt.format)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateOutputFormat(%q) error = %v, wantErr %v", tt.format, err, tt.wantErr)
}
}
})
t.Run("ValidateConcurrency", func(t *testing.T) {
tests := []struct {
name string
concurrency int
maxConcurrency int
setMax bool
wantErr bool
}{
{"valid concurrency", 4, 0, false, false},
{"minimum concurrency", 1, 0, false, false},
{"zero concurrency", 0, 0, false, true},
{"negative concurrency", -1, 0, false, true},
{"concurrency within max", 4, 8, true, false},
{"concurrency exceeds max", 16, 8, true, true},
}
for _, tt := range tests {
viper.Reset()
if tt.setMax {
viper.Set("maxConcurrency", tt.maxConcurrency)
}
err := config.ValidateConcurrency(tt.concurrency)
if (err != nil) != tt.wantErr {
t.Errorf("%s: ValidateConcurrency(%d) error = %v, wantErr %v", tt.name, tt.concurrency, err, tt.wantErr)
}
}
})
}
// TestLoadConfigWithValidation tests that invalid config files fall back to defaults.
func TestLoadConfigWithValidation(t *testing.T) {
// Create a temporary config file with invalid content
configContent := `
fileSizeLimit: 100
ignoreDirectories:
- node_modules
- ""
- .git
`
tempDir := t.TempDir()
configFile := tempDir + "/config.yaml"
err := os.WriteFile(configFile, []byte(configContent), 0o644)
if err != nil {
t.Fatalf("Failed to write config file: %v", err)
}
// Reset viper and set config path
viper.Reset()
viper.AddConfigPath(tempDir)
// This should load the config but validation should fail and fall back to defaults
config.LoadConfig()
// Should have fallen back to defaults due to validation failure
if config.GetFileSizeLimit() != int64(config.DefaultFileSizeLimit) {
t.Errorf("Expected default file size limit after validation failure, got %d", config.GetFileSizeLimit())
}
if containsString(config.GetIgnoredDirectories(), "") {
t.Errorf("Expected ignored directories not to contain empty string after validation failure, got %v", config.GetIgnoredDirectories())
}
}
// Helper functions
func containsString(slice []string, item string) bool {
for _, s := range slice {
if s == item {
return true
}
}
return false
}
func errorAs(err error, target interface{}) bool {
if err == nil {
return false
}
if structErr, ok := err.(*utils.StructuredError); ok {
if ptr, ok := target.(**utils.StructuredError); ok {
*ptr = structErr
return true
}
}
return false
}

196
fileproc/backpressure.go Normal file
View File

@@ -0,0 +1,196 @@
// Package fileproc provides back-pressure management for memory optimization.
package fileproc
import (
"context"
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/config"
)
// BackpressureManager manages memory usage and applies back-pressure when needed.
type BackpressureManager struct {
enabled bool
maxMemoryUsage int64
memoryCheckInterval int
maxPendingFiles int
maxPendingWrites int
filesProcessed int64
mu sync.RWMutex
memoryWarningLogged bool
lastMemoryCheck time.Time
}
// NewBackpressureManager creates a new back-pressure manager with configuration.
func NewBackpressureManager() *BackpressureManager {
return &BackpressureManager{
enabled: config.GetBackpressureEnabled(),
maxMemoryUsage: config.GetMaxMemoryUsage(),
memoryCheckInterval: config.GetMemoryCheckInterval(),
maxPendingFiles: config.GetMaxPendingFiles(),
maxPendingWrites: config.GetMaxPendingWrites(),
lastMemoryCheck: time.Now(),
}
}
// CreateChannels creates properly sized channels based on back-pressure configuration.
func (bp *BackpressureManager) CreateChannels() (chan string, chan WriteRequest) {
var fileCh chan string
var writeCh chan WriteRequest
if bp.enabled {
// Use buffered channels with configured limits
fileCh = make(chan string, bp.maxPendingFiles)
writeCh = make(chan WriteRequest, bp.maxPendingWrites)
logrus.Debugf("Created buffered channels: files=%d, writes=%d", bp.maxPendingFiles, bp.maxPendingWrites)
} else {
// Use unbuffered channels (default behavior)
fileCh = make(chan string)
writeCh = make(chan WriteRequest)
logrus.Debug("Created unbuffered channels (back-pressure disabled)")
}
return fileCh, writeCh
}
// ShouldApplyBackpressure checks if back-pressure should be applied.
func (bp *BackpressureManager) ShouldApplyBackpressure(ctx context.Context) bool {
if !bp.enabled {
return false
}
// Check if we should evaluate memory usage
filesProcessed := atomic.AddInt64(&bp.filesProcessed, 1)
if int(filesProcessed)%bp.memoryCheckInterval != 0 {
return false
}
// Get current memory usage
var m runtime.MemStats
runtime.ReadMemStats(&m)
currentMemory := int64(m.Alloc)
bp.mu.Lock()
defer bp.mu.Unlock()
bp.lastMemoryCheck = time.Now()
// Check if we're over the memory limit
if currentMemory > bp.maxMemoryUsage {
if !bp.memoryWarningLogged {
logrus.Warnf("Memory usage (%d bytes) exceeds limit (%d bytes), applying back-pressure",
currentMemory, bp.maxMemoryUsage)
bp.memoryWarningLogged = true
}
return true
}
// Reset warning flag if we're back under the limit
if bp.memoryWarningLogged && currentMemory < bp.maxMemoryUsage*8/10 { // 80% of limit
logrus.Infof("Memory usage normalized (%d bytes), removing back-pressure", currentMemory)
bp.memoryWarningLogged = false
}
return false
}
// ApplyBackpressure applies back-pressure by triggering garbage collection and adding delay.
func (bp *BackpressureManager) ApplyBackpressure(ctx context.Context) {
if !bp.enabled {
return
}
// Force garbage collection to free up memory
runtime.GC()
// Add a small delay to allow memory to be freed
select {
case <-ctx.Done():
return
case <-time.After(10 * time.Millisecond):
// Small delay to allow GC to complete
}
// Log memory usage after GC
var m runtime.MemStats
runtime.ReadMemStats(&m)
logrus.Debugf("Applied back-pressure: memory after GC = %d bytes", m.Alloc)
}
// GetStats returns current back-pressure statistics.
func (bp *BackpressureManager) GetStats() BackpressureStats {
bp.mu.RLock()
defer bp.mu.RUnlock()
var m runtime.MemStats
runtime.ReadMemStats(&m)
return BackpressureStats{
Enabled: bp.enabled,
FilesProcessed: atomic.LoadInt64(&bp.filesProcessed),
CurrentMemoryUsage: int64(m.Alloc),
MaxMemoryUsage: bp.maxMemoryUsage,
MemoryWarningActive: bp.memoryWarningLogged,
LastMemoryCheck: bp.lastMemoryCheck,
MaxPendingFiles: bp.maxPendingFiles,
MaxPendingWrites: bp.maxPendingWrites,
}
}
// BackpressureStats represents back-pressure manager statistics.
type BackpressureStats struct {
Enabled bool `json:"enabled"`
FilesProcessed int64 `json:"files_processed"`
CurrentMemoryUsage int64 `json:"current_memory_usage"`
MaxMemoryUsage int64 `json:"max_memory_usage"`
MemoryWarningActive bool `json:"memory_warning_active"`
LastMemoryCheck time.Time `json:"last_memory_check"`
MaxPendingFiles int `json:"max_pending_files"`
MaxPendingWrites int `json:"max_pending_writes"`
}
// WaitForChannelSpace waits for space in channels if they're getting full.
func (bp *BackpressureManager) WaitForChannelSpace(ctx context.Context, fileCh chan string, writeCh chan WriteRequest) {
if !bp.enabled {
return
}
// Check if file channel is getting full (>90% capacity)
if len(fileCh) > bp.maxPendingFiles*9/10 {
logrus.Debugf("File channel is %d%% full, waiting for space", len(fileCh)*100/bp.maxPendingFiles)
// Wait a bit for the channel to drain
select {
case <-ctx.Done():
return
case <-time.After(5 * time.Millisecond):
}
}
// Check if write channel is getting full (>90% capacity)
if len(writeCh) > bp.maxPendingWrites*9/10 {
logrus.Debugf("Write channel is %d%% full, waiting for space", len(writeCh)*100/bp.maxPendingWrites)
// Wait a bit for the channel to drain
select {
case <-ctx.Done():
return
case <-time.After(5 * time.Millisecond):
}
}
}
// LogBackpressureInfo logs back-pressure configuration and status.
func (bp *BackpressureManager) LogBackpressureInfo() {
if bp.enabled {
logrus.Infof("Back-pressure enabled: maxMemory=%dMB, fileBuffer=%d, writeBuffer=%d, checkInterval=%d",
bp.maxMemoryUsage/1024/1024, bp.maxPendingFiles, bp.maxPendingWrites, bp.memoryCheckInterval)
} else {
logrus.Info("Back-pressure disabled")
}
}

127
fileproc/cache.go Normal file
View File

@@ -0,0 +1,127 @@
package fileproc
// getNormalizedExtension efficiently extracts and normalizes the file extension with caching.
func (r *FileTypeRegistry) getNormalizedExtension(filename string) string {
// Try cache first (read lock)
r.cacheMutex.RLock()
if ext, exists := r.extCache[filename]; exists {
r.cacheMutex.RUnlock()
return ext
}
r.cacheMutex.RUnlock()
// Compute normalized extension
ext := normalizeExtension(filename)
// Cache the result (write lock)
r.cacheMutex.Lock()
// Check cache size and clean if needed
if len(r.extCache) >= r.maxCacheSize*2 {
r.clearExtCache()
r.stats.CacheEvictions++
}
r.extCache[filename] = ext
r.cacheMutex.Unlock()
return ext
}
// getFileTypeResult gets cached file type detection result or computes it.
func (r *FileTypeRegistry) getFileTypeResult(filename string) FileTypeResult {
ext := r.getNormalizedExtension(filename)
// Update statistics
r.updateStats(func() {
r.stats.TotalLookups++
})
// Try cache first (read lock)
r.cacheMutex.RLock()
if result, exists := r.resultCache[ext]; exists {
r.cacheMutex.RUnlock()
r.updateStats(func() {
r.stats.CacheHits++
})
return result
}
r.cacheMutex.RUnlock()
// Cache miss
r.updateStats(func() {
r.stats.CacheMisses++
})
// Compute result
result := FileTypeResult{
Extension: ext,
IsImage: r.imageExts[ext],
IsBinary: r.binaryExts[ext],
Language: r.languageMap[ext],
}
// Handle special cases for binary detection (like .DS_Store)
if !result.IsBinary && isSpecialFile(filename, r.binaryExts) {
result.IsBinary = true
}
// Cache the result (write lock)
r.cacheMutex.Lock()
if len(r.resultCache) >= r.maxCacheSize {
r.clearResultCache()
r.stats.CacheEvictions++
}
r.resultCache[ext] = result
r.cacheMutex.Unlock()
return result
}
// clearExtCache clears half of the extension cache (LRU-like behavior).
func (r *FileTypeRegistry) clearExtCache() {
r.clearCache(&r.extCache, r.maxCacheSize)
}
// clearResultCache clears half of the result cache.
func (r *FileTypeRegistry) clearResultCache() {
newCache := make(map[string]FileTypeResult, r.maxCacheSize)
count := 0
for k, v := range r.resultCache {
if count >= r.maxCacheSize/2 {
break
}
newCache[k] = v
count++
}
r.resultCache = newCache
}
// clearCache is a generic cache clearing function.
func (r *FileTypeRegistry) clearCache(cache *map[string]string, maxSize int) {
newCache := make(map[string]string, maxSize)
count := 0
for k, v := range *cache {
if count >= maxSize/2 {
break
}
newCache[k] = v
count++
}
*cache = newCache
}
// invalidateCache clears both caches when the registry is modified.
func (r *FileTypeRegistry) invalidateCache() {
r.cacheMutex.Lock()
defer r.cacheMutex.Unlock()
r.extCache = make(map[string]string, r.maxCacheSize)
r.resultCache = make(map[string]FileTypeResult, r.maxCacheSize)
r.stats.CacheEvictions++
}
// updateStats safely updates statistics.
func (r *FileTypeRegistry) updateStats(fn func()) {
r.cacheMutex.Lock()
fn()
r.cacheMutex.Unlock()
}

View File

@@ -4,6 +4,6 @@ package fileproc
// CollectFiles scans the given root directory using the default walker (ProdWalker) // CollectFiles scans the given root directory using the default walker (ProdWalker)
// and returns a slice of file paths. // and returns a slice of file paths.
func CollectFiles(root string) ([]string, error) { func CollectFiles(root string) ([]string, error) {
var w Walker = ProdWalker{} w := NewProdWalker()
return w.Walk(root) return w.Walk(root)
} }

View File

@@ -4,7 +4,7 @@ import (
"os" "os"
"testing" "testing"
fileproc "github.com/ivuorinen/gibidify/fileproc" "github.com/ivuorinen/gibidify/fileproc"
) )
func TestCollectFilesWithFakeWalker(t *testing.T) { func TestCollectFilesWithFakeWalker(t *testing.T) {

40
fileproc/config.go Normal file
View File

@@ -0,0 +1,40 @@
package fileproc
import "strings"
// ApplyCustomExtensions applies custom extensions from configuration.
func (r *FileTypeRegistry) ApplyCustomExtensions(customImages, customBinary []string, customLanguages map[string]string) {
// Add custom image extensions
r.addExtensions(customImages, r.AddImageExtension)
// Add custom binary extensions
r.addExtensions(customBinary, r.AddBinaryExtension)
// Add custom language mappings
for ext, lang := range customLanguages {
if ext != "" && lang != "" {
r.AddLanguageMapping(strings.ToLower(ext), lang)
}
}
}
// addExtensions is a helper to add multiple extensions.
func (r *FileTypeRegistry) addExtensions(extensions []string, adder func(string)) {
for _, ext := range extensions {
if ext != "" {
adder(strings.ToLower(ext))
}
}
}
// ConfigureFromSettings applies configuration settings to the registry.
// This function is called from main.go after config is loaded to avoid circular imports.
func ConfigureFromSettings(
customImages, customBinary []string,
customLanguages map[string]string,
disabledImages, disabledBinary, disabledLanguages []string,
) {
registry := GetDefaultRegistry()
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
}

99
fileproc/detection.go Normal file
View File

@@ -0,0 +1,99 @@
package fileproc
import "strings"
// Package-level detection functions
// IsImage checks if the file extension indicates an image file.
func IsImage(filename string) bool {
return getRegistry().IsImage(filename)
}
// IsBinary checks if the file extension indicates a binary file.
func IsBinary(filename string) bool {
return getRegistry().IsBinary(filename)
}
// GetLanguage returns the language identifier for the given filename based on its extension.
func GetLanguage(filename string) string {
return getRegistry().GetLanguage(filename)
}
// Registry methods for detection
// IsImage checks if the file extension indicates an image file.
func (r *FileTypeRegistry) IsImage(filename string) bool {
result := r.getFileTypeResult(filename)
return result.IsImage
}
// IsBinary checks if the file extension indicates a binary file.
func (r *FileTypeRegistry) IsBinary(filename string) bool {
result := r.getFileTypeResult(filename)
return result.IsBinary
}
// GetLanguage returns the language identifier for the given filename based on its extension.
func (r *FileTypeRegistry) GetLanguage(filename string) string {
if len(filename) < minExtensionLength {
return ""
}
result := r.getFileTypeResult(filename)
return result.Language
}
// Extension management methods
// AddImageExtension adds a new image extension to the registry.
func (r *FileTypeRegistry) AddImageExtension(ext string) {
r.addExtension(ext, r.imageExts)
}
// AddBinaryExtension adds a new binary extension to the registry.
func (r *FileTypeRegistry) AddBinaryExtension(ext string) {
r.addExtension(ext, r.binaryExts)
}
// AddLanguageMapping adds a new language mapping to the registry.
func (r *FileTypeRegistry) AddLanguageMapping(ext, language string) {
r.languageMap[strings.ToLower(ext)] = language
r.invalidateCache()
}
// addExtension is a helper to add extensions to a map.
func (r *FileTypeRegistry) addExtension(ext string, target map[string]bool) {
target[strings.ToLower(ext)] = true
r.invalidateCache()
}
// removeExtension is a helper to remove extensions from a map.
func (r *FileTypeRegistry) removeExtension(ext string, target map[string]bool) {
delete(target, strings.ToLower(ext))
}
// DisableExtensions removes specified extensions from the registry.
func (r *FileTypeRegistry) DisableExtensions(disabledImages, disabledBinary, disabledLanguages []string) {
// Disable image extensions
for _, ext := range disabledImages {
if ext != "" {
r.removeExtension(ext, r.imageExts)
}
}
// Disable binary extensions
for _, ext := range disabledBinary {
if ext != "" {
r.removeExtension(ext, r.binaryExts)
}
}
// Disable language extensions
for _, ext := range disabledLanguages {
if ext != "" {
delete(r.languageMap, strings.ToLower(ext))
}
}
// Invalidate cache after all modifications
r.invalidateCache()
}

161
fileproc/extensions.go Normal file
View File

@@ -0,0 +1,161 @@
package fileproc
// getImageExtensions returns the default image file extensions.
func getImageExtensions() map[string]bool {
return map[string]bool{
".png": true,
".jpg": true,
".jpeg": true,
".gif": true,
".bmp": true,
".tiff": true,
".tif": true,
".svg": true,
".webp": true,
".ico": true,
}
}
// getBinaryExtensions returns the default binary file extensions.
func getBinaryExtensions() map[string]bool {
return map[string]bool{
// Executables and libraries
".exe": true,
".dll": true,
".so": true,
".dylib": true,
".bin": true,
".o": true,
".a": true,
".lib": true,
// Compiled bytecode
".jar": true,
".class": true,
".pyc": true,
".pyo": true,
// Data files
".dat": true,
".db": true,
".sqlite": true,
".ds_store": true,
// Documents
".pdf": true,
// Archives
".zip": true,
".tar": true,
".gz": true,
".bz2": true,
".xz": true,
".7z": true,
".rar": true,
// Fonts
".ttf": true,
".otf": true,
".woff": true,
".woff2": true,
// Media files
".mp3": true,
".mp4": true,
".avi": true,
".mov": true,
".wmv": true,
".flv": true,
".webm": true,
".ogg": true,
".wav": true,
".flac": true,
}
}
// getLanguageMap returns the default language mappings.
func getLanguageMap() map[string]string {
return map[string]string{
// Systems programming
".go": "go",
".c": "c",
".cpp": "cpp",
".h": "c",
".hpp": "cpp",
".rs": "rust",
// Scripting languages
".py": "python",
".rb": "ruby",
".pl": "perl",
".lua": "lua",
".php": "php",
// Web technologies
".js": "javascript",
".ts": "typescript",
".jsx": "javascript",
".tsx": "typescript",
".html": "html",
".htm": "html",
".css": "css",
".scss": "scss",
".sass": "sass",
".less": "less",
".vue": "vue",
// JVM languages
".java": "java",
".scala": "scala",
".kt": "kotlin",
".clj": "clojure",
// .NET languages
".cs": "csharp",
".vb": "vbnet",
".fs": "fsharp",
// Apple platforms
".swift": "swift",
".m": "objc",
".mm": "objcpp",
// Shell scripts
".sh": "bash",
".bash": "bash",
".zsh": "zsh",
".fish": "fish",
".ps1": "powershell",
".bat": "batch",
".cmd": "batch",
// Data formats
".json": "json",
".yaml": "yaml",
".yml": "yaml",
".toml": "toml",
".xml": "xml",
".sql": "sql",
// Documentation
".md": "markdown",
".rst": "rst",
".tex": "latex",
// Functional languages
".hs": "haskell",
".ml": "ocaml",
".mli": "ocaml",
".elm": "elm",
".ex": "elixir",
".exs": "elixir",
".erl": "erlang",
".hrl": "erlang",
// Other languages
".r": "r",
".dart": "dart",
".nim": "nim",
".nims": "nim",
}
}

View File

@@ -3,8 +3,8 @@ package fileproc
// FakeWalker implements Walker for testing purposes. // FakeWalker implements Walker for testing purposes.
type FakeWalker struct { type FakeWalker struct {
Files []string
Err error Err error
Files []string
} }
// Walk returns predetermined file paths or an error, depending on FakeWalker's configuration. // Walk returns predetermined file paths or an error, depending on FakeWalker's configuration.

55
fileproc/file_filters.go Normal file
View File

@@ -0,0 +1,55 @@
package fileproc
import (
"os"
"github.com/ivuorinen/gibidify/config"
)
// FileFilter defines filtering criteria for files and directories.
type FileFilter struct {
ignoredDirs []string
sizeLimit int64
}
// NewFileFilter creates a new file filter with current configuration.
func NewFileFilter() *FileFilter {
return &FileFilter{
ignoredDirs: config.GetIgnoredDirectories(),
sizeLimit: config.GetFileSizeLimit(),
}
}
// shouldSkipEntry determines if an entry should be skipped based on ignore rules and filters.
func (f *FileFilter) shouldSkipEntry(entry os.DirEntry, fullPath string, rules []ignoreRule) bool {
if entry.IsDir() {
return f.shouldSkipDirectory(entry)
}
if f.shouldSkipFile(entry, fullPath) {
return true
}
return matchesIgnoreRules(fullPath, rules)
}
// shouldSkipDirectory checks if a directory should be skipped based on the ignored directories list.
func (f *FileFilter) shouldSkipDirectory(entry os.DirEntry) bool {
for _, d := range f.ignoredDirs {
if entry.Name() == d {
return true
}
}
return false
}
// shouldSkipFile checks if a file should be skipped based on size limit and file type.
func (f *FileFilter) shouldSkipFile(entry os.DirEntry, fullPath string) bool {
// Check if file exceeds the configured size limit.
if info, err := entry.Info(); err == nil && info.Size() > f.sizeLimit {
return true
}
// Apply the default filter to ignore binary and image files.
return IsBinary(fullPath) || IsImage(fullPath)
}

827
fileproc/filetypes_test.go Normal file
View File

@@ -0,0 +1,827 @@
package fileproc
import (
"fmt"
"sync"
"testing"
)
// TestFileTypeRegistry_ModificationMethods tests the modification methods of FileTypeRegistry.
func TestFileTypeRegistry_ModificationMethods(t *testing.T) {
// Create a new registry instance for testing
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
// Test AddImageExtension
t.Run("AddImageExtension", func(t *testing.T) {
// Add a new image extension
registry.AddImageExtension(".webp")
if !registry.IsImage("test.webp") {
t.Errorf("Expected .webp to be recognized as image after adding")
}
// Test case insensitive addition
registry.AddImageExtension(".AVIF")
if !registry.IsImage("test.avif") {
t.Errorf("Expected .avif to be recognized as image after adding .AVIF")
}
if !registry.IsImage("test.AVIF") {
t.Errorf("Expected .AVIF to be recognized as image")
}
// Test with dot prefix
registry.AddImageExtension("heic")
if registry.IsImage("test.heic") {
t.Errorf("Expected extension without dot to not work")
}
// Test with proper dot prefix
registry.AddImageExtension(".heic")
if !registry.IsImage("test.heic") {
t.Errorf("Expected .heic to be recognized as image")
}
})
// Test AddBinaryExtension
t.Run("AddBinaryExtension", func(t *testing.T) {
// Add a new binary extension
registry.AddBinaryExtension(".custom")
if !registry.IsBinary("test.custom") {
t.Errorf("Expected .custom to be recognized as binary after adding")
}
// Test case insensitive addition
registry.AddBinaryExtension(".NEWBIN")
if !registry.IsBinary("test.newbin") {
t.Errorf("Expected .newbin to be recognized as binary after adding .NEWBIN")
}
if !registry.IsBinary("test.NEWBIN") {
t.Errorf("Expected .NEWBIN to be recognized as binary")
}
// Test overwriting existing extension
registry.AddBinaryExtension(".custom")
if !registry.IsBinary("test.custom") {
t.Errorf("Expected .custom to still be recognized as binary after re-adding")
}
})
// Test AddLanguageMapping
t.Run("AddLanguageMapping", func(t *testing.T) {
// Add a new language mapping
registry.AddLanguageMapping(".zig", "zig")
if registry.GetLanguage("test.zig") != "zig" {
t.Errorf("Expected .zig to map to 'zig', got '%s'", registry.GetLanguage("test.zig"))
}
// Test case insensitive addition
registry.AddLanguageMapping(".V", "vlang")
if registry.GetLanguage("test.v") != "vlang" {
t.Errorf("Expected .v to map to 'vlang' after adding .V, got '%s'", registry.GetLanguage("test.v"))
}
if registry.GetLanguage("test.V") != "vlang" {
t.Errorf("Expected .V to map to 'vlang', got '%s'", registry.GetLanguage("test.V"))
}
// Test overwriting existing mapping
registry.AddLanguageMapping(".zig", "ziglang")
if registry.GetLanguage("test.zig") != "ziglang" {
t.Errorf("Expected .zig to map to 'ziglang' after update, got '%s'", registry.GetLanguage("test.zig"))
}
// Test empty language
registry.AddLanguageMapping(".empty", "")
if registry.GetLanguage("test.empty") != "" {
t.Errorf("Expected .empty to map to empty string, got '%s'", registry.GetLanguage("test.empty"))
}
})
}
// TestFileTypeRegistry_LanguageDetection tests the language detection functionality.
func TestFileTypeRegistry_LanguageDetection(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected string
}{
// Programming languages
{"main.go", "go"},
{"script.py", "python"},
{"app.js", "javascript"},
{"component.tsx", "typescript"},
{"service.ts", "typescript"},
{"App.java", "java"},
{"program.c", "c"},
{"program.cpp", "cpp"},
{"header.h", "c"},
{"header.hpp", "cpp"},
{"main.rs", "rust"},
{"script.rb", "ruby"},
{"index.php", "php"},
{"app.swift", "swift"},
{"MainActivity.kt", "kotlin"},
{"Main.scala", "scala"},
{"analysis.r", "r"},
{"ViewController.m", "objc"},
{"ViewController.mm", "objcpp"},
{"Program.cs", "csharp"},
{"Module.vb", "vbnet"},
{"program.fs", "fsharp"},
{"script.lua", "lua"},
{"script.pl", "perl"},
// Shell scripts
{"script.sh", "bash"},
{"script.bash", "bash"},
{"script.zsh", "zsh"},
{"script.fish", "fish"},
{"script.ps1", "powershell"},
{"script.bat", "batch"},
{"script.cmd", "batch"},
// Data and markup
{"query.sql", "sql"},
{"index.html", "html"},
{"page.htm", "html"},
{"data.xml", "xml"},
{"style.css", "css"},
{"style.scss", "scss"},
{"style.sass", "sass"},
{"style.less", "less"},
{"data.json", "json"},
{"config.yaml", "yaml"},
{"config.yml", "yaml"},
{"config.toml", "toml"},
{"README.md", "markdown"},
{"doc.rst", "rst"},
{"paper.tex", "latex"},
// Modern languages
{"main.dart", "dart"},
{"Main.elm", "elm"},
{"core.clj", "clojure"},
{"server.ex", "elixir"},
{"test.exs", "elixir"},
{"server.erl", "erlang"},
{"header.hrl", "erlang"},
{"main.hs", "haskell"},
{"module.ml", "ocaml"},
{"interface.mli", "ocaml"},
{"main.nim", "nim"},
{"config.nims", "nim"},
// Web frameworks
{"Component.vue", "vue"},
{"Component.jsx", "javascript"},
// Case sensitivity tests
{"MAIN.GO", "go"},
{"Script.PY", "python"},
{"APP.JS", "javascript"},
// Edge cases
{"", ""}, // Empty filename
{"a", ""}, // Too short (less than minExtensionLength)
{"noext", ""}, // No extension
{".hidden", ""}, // Hidden file with no name
{"file.", ""}, // Extension is just a dot
{"file.unknown", ""}, // Unknown extension
{"file.123", ""}, // Numeric extension
{"a.b", ""}, // Very short filename and extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.GetLanguage(tt.filename)
if result != tt.expected {
t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected)
}
})
}
}
// TestFileTypeRegistry_ImageDetection tests the image detection functionality.
func TestFileTypeRegistry_ImageDetection(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected bool
}{
// Common image formats
{"photo.png", true},
{"image.jpg", true},
{"picture.jpeg", true},
{"animation.gif", true},
{"bitmap.bmp", true},
{"image.tiff", true},
{"scan.tif", true},
{"vector.svg", true},
{"modern.webp", true},
{"favicon.ico", true},
// Case sensitivity tests
{"PHOTO.PNG", true},
{"IMAGE.JPG", true},
{"PICTURE.JPEG", true},
// Non-image files
{"document.txt", false},
{"script.js", false},
{"data.json", false},
{"archive.zip", false},
{"executable.exe", false},
// Edge cases
{"", false}, // Empty filename
{"image", false}, // No extension
{".png", true}, // Just extension
{"file.png.bak", false}, // Multiple extensions
{"image.unknown", false}, // Unknown extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.IsImage(tt.filename)
if result != tt.expected {
t.Errorf("IsImage(%q) = %t, expected %t", tt.filename, result, tt.expected)
}
})
}
}
// TestFileTypeRegistry_BinaryDetection tests the binary detection functionality.
func TestFileTypeRegistry_BinaryDetection(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected bool
}{
// Executable files
{"program.exe", true},
{"library.dll", true},
{"libfoo.so", true},
{"framework.dylib", true},
{"data.bin", true},
// Object and library files
{"object.o", true},
{"archive.a", true},
{"library.lib", true},
{"application.jar", true},
{"bytecode.class", true},
{"compiled.pyc", true},
{"optimized.pyo", true},
// System files
{".DS_Store", true},
// Document files (treated as binary)
{"document.pdf", true},
// Archive files
{"archive.zip", true},
{"backup.tar", true},
{"compressed.gz", true},
{"data.bz2", true},
{"package.xz", true},
{"archive.7z", true},
{"backup.rar", true},
// Font files
{"font.ttf", true},
{"font.otf", true},
{"font.woff", true},
{"font.woff2", true},
// Media files
{"song.mp3", true},
{"video.mp4", true},
{"movie.avi", true},
{"clip.mov", true},
{"video.wmv", true},
{"animation.flv", true},
{"modern.webm", true},
{"audio.ogg", true},
{"sound.wav", true},
{"music.flac", true},
// Database files
{"data.dat", true},
{"database.db", true},
{"app.sqlite", true},
// Case sensitivity tests
{"PROGRAM.EXE", true},
{"LIBRARY.DLL", true},
// Non-binary files
{"document.txt", false},
{"script.js", false},
{"data.json", false},
{"style.css", false},
{"page.html", false},
// Edge cases
{"", false}, // Empty filename
{"binary", false}, // No extension
{".exe", true}, // Just extension
{"file.exe.bak", false}, // Multiple extensions
{"file.unknown", false}, // Unknown extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.IsBinary(tt.filename)
if result != tt.expected {
t.Errorf("IsBinary(%q) = %t, expected %t", tt.filename, result, tt.expected)
}
})
}
}
// TestFileTypeRegistry_DefaultRegistryConsistency tests that the default registry is consistent.
func TestFileTypeRegistry_DefaultRegistryConsistency(t *testing.T) {
// Get registry multiple times and ensure it's the same instance
registry1 := GetDefaultRegistry()
registry2 := GetDefaultRegistry()
registry3 := getRegistry()
if registry1 != registry2 {
t.Error("GetDefaultRegistry() should return the same instance")
}
if registry1 != registry3 {
t.Error("getRegistry() should return the same instance as GetDefaultRegistry()")
}
// Test that global functions use the same registry
filename := "test.go"
if IsImage(filename) != registry1.IsImage(filename) {
t.Error("IsImage() global function should match registry method")
}
if IsBinary(filename) != registry1.IsBinary(filename) {
t.Error("IsBinary() global function should match registry method")
}
if GetLanguage(filename) != registry1.GetLanguage(filename) {
t.Error("GetLanguage() global function should match registry method")
}
}
// TestFileTypeRegistry_ThreadSafety tests the thread safety of the FileTypeRegistry.
func TestFileTypeRegistry_ThreadSafety(t *testing.T) {
const numGoroutines = 100
const numOperationsPerGoroutine = 100
var wg sync.WaitGroup
// Test concurrent read operations
t.Run("ConcurrentReads", func(t *testing.T) {
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
registry := GetDefaultRegistry()
for j := 0; j < numOperationsPerGoroutine; j++ {
// Test various file detection operations
_ = registry.IsImage("test.png")
_ = registry.IsBinary("test.exe")
_ = registry.GetLanguage("test.go")
// Test global functions too
_ = IsImage("image.jpg")
_ = IsBinary("binary.dll")
_ = GetLanguage("script.py")
}
}(i)
}
wg.Wait()
})
// Test concurrent registry access (singleton creation)
t.Run("ConcurrentRegistryAccess", func(t *testing.T) {
// Reset the registry to test concurrent initialization
// Note: This is not safe in a real application, but needed for testing
registryOnce = sync.Once{}
registry = nil
registries := make([]*FileTypeRegistry, numGoroutines)
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
registries[id] = GetDefaultRegistry()
}(i)
}
wg.Wait()
// Verify all goroutines got the same registry instance
firstRegistry := registries[0]
for i := 1; i < numGoroutines; i++ {
if registries[i] != firstRegistry {
t.Errorf("Registry %d is different from registry 0", i)
}
}
})
// Test concurrent modifications on separate registry instances
t.Run("ConcurrentModifications", func(t *testing.T) {
// Create separate registry instances for each goroutine to test modification thread safety
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
// Create a new registry instance for this goroutine
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
for j := 0; j < numOperationsPerGoroutine; j++ {
// Add unique extensions for this goroutine
extSuffix := fmt.Sprintf("_%d_%d", id, j)
registry.AddImageExtension(".img" + extSuffix)
registry.AddBinaryExtension(".bin" + extSuffix)
registry.AddLanguageMapping(".lang"+extSuffix, "lang"+extSuffix)
// Verify the additions worked
if !registry.IsImage("test.img" + extSuffix) {
t.Errorf("Failed to add image extension .img%s", extSuffix)
}
if !registry.IsBinary("test.bin" + extSuffix) {
t.Errorf("Failed to add binary extension .bin%s", extSuffix)
}
if registry.GetLanguage("test.lang"+extSuffix) != "lang"+extSuffix {
t.Errorf("Failed to add language mapping .lang%s", extSuffix)
}
}
}(i)
}
wg.Wait()
})
}
// TestFileTypeRegistry_EdgeCases tests edge cases and boundary conditions.
func TestFileTypeRegistry_EdgeCases(t *testing.T) {
registry := GetDefaultRegistry()
// Test various edge cases for filename handling
edgeCases := []struct {
name string
filename string
desc string
}{
{"empty", "", "empty filename"},
{"single_char", "a", "single character filename"},
{"just_dot", ".", "just a dot"},
{"double_dot", "..", "double dot"},
{"hidden_file", ".hidden", "hidden file"},
{"hidden_with_ext", ".hidden.txt", "hidden file with extension"},
{"multiple_dots", "file.tar.gz", "multiple extensions"},
{"trailing_dot", "file.", "trailing dot"},
{"unicode", "файл.txt", "unicode filename"},
{"spaces", "my file.txt", "filename with spaces"},
{"special_chars", "file@#$.txt", "filename with special characters"},
{"very_long", "very_long_filename_with_many_characters_in_it.extension", "very long filename"},
{"no_basename", ".gitignore", "dotfile with no basename"},
{"case_mixed", "FiLe.ExT", "mixed case"},
}
for _, tc := range edgeCases {
t.Run(tc.name, func(t *testing.T) {
// These should not panic
_ = registry.IsImage(tc.filename)
_ = registry.IsBinary(tc.filename)
_ = registry.GetLanguage(tc.filename)
// Global functions should also not panic
_ = IsImage(tc.filename)
_ = IsBinary(tc.filename)
_ = GetLanguage(tc.filename)
})
}
}
// TestFileTypeRegistry_MinimumExtensionLength tests the minimum extension length requirement.
func TestFileTypeRegistry_MinimumExtensionLength(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected string
}{
{"", ""}, // Empty filename
{"a", ""}, // Single character (less than minExtensionLength)
{"ab", ""}, // Two characters, no extension
{"a.b", ""}, // Extension too short, but filename too short anyway
{"ab.c", "c"}, // Valid: filename >= minExtensionLength and .c is valid extension
{"a.go", "go"}, // Valid extension
{"ab.py", "python"}, // Valid extension
{"a.unknown", ""}, // Valid length but unknown extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.GetLanguage(tt.filename)
if result != tt.expected {
t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected)
}
})
}
}
// BenchmarkFileTypeRegistry tests performance of the registry operations.
func BenchmarkFileTypeRegistry_IsImage(b *testing.B) {
registry := GetDefaultRegistry()
filename := "test.png"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = registry.IsImage(filename)
}
}
func BenchmarkFileTypeRegistry_IsBinary(b *testing.B) {
registry := GetDefaultRegistry()
filename := "test.exe"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = registry.IsBinary(filename)
}
}
func BenchmarkFileTypeRegistry_GetLanguage(b *testing.B) {
registry := GetDefaultRegistry()
filename := "test.go"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = registry.GetLanguage(filename)
}
}
func BenchmarkFileTypeRegistry_GlobalFunctions(b *testing.B) {
filename := "test.go"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = IsImage(filename)
_ = IsBinary(filename)
_ = GetLanguage(filename)
}
}
func BenchmarkFileTypeRegistry_ConcurrentAccess(b *testing.B) {
filename := "test.go"
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = IsImage(filename)
_ = IsBinary(filename)
_ = GetLanguage(filename)
}
})
}
// TestFileTypeRegistry_Configuration tests the configuration functionality.
func TestFileTypeRegistry_Configuration(t *testing.T) {
// Create a new registry instance for testing
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
// Test ApplyCustomExtensions
t.Run("ApplyCustomExtensions", func(t *testing.T) {
customImages := []string{".webp", ".avif", ".heic"}
customBinary := []string{".custom", ".mybin"}
customLanguages := map[string]string{
".zig": "zig",
".odin": "odin",
".v": "vlang",
}
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
// Test custom image extensions
for _, ext := range customImages {
if !registry.IsImage("test" + ext) {
t.Errorf("Expected %s to be recognized as image", ext)
}
}
// Test custom binary extensions
for _, ext := range customBinary {
if !registry.IsBinary("test" + ext) {
t.Errorf("Expected %s to be recognized as binary", ext)
}
}
// Test custom language mappings
for ext, expectedLang := range customLanguages {
if lang := registry.GetLanguage("test" + ext); lang != expectedLang {
t.Errorf("Expected %s to map to %s, got %s", ext, expectedLang, lang)
}
}
})
// Test DisableExtensions
t.Run("DisableExtensions", func(t *testing.T) {
// Add some extensions first
registry.AddImageExtension(".png")
registry.AddImageExtension(".jpg")
registry.AddBinaryExtension(".exe")
registry.AddBinaryExtension(".dll")
registry.AddLanguageMapping(".go", "go")
registry.AddLanguageMapping(".py", "python")
// Verify they work
if !registry.IsImage("test.png") {
t.Error("Expected .png to be image before disabling")
}
if !registry.IsBinary("test.exe") {
t.Error("Expected .exe to be binary before disabling")
}
if registry.GetLanguage("test.go") != "go" {
t.Error("Expected .go to map to go before disabling")
}
// Disable some extensions
disabledImages := []string{".png"}
disabledBinary := []string{".exe"}
disabledLanguages := []string{".go"}
registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
// Test that disabled extensions no longer work
if registry.IsImage("test.png") {
t.Error("Expected .png to not be image after disabling")
}
if registry.IsBinary("test.exe") {
t.Error("Expected .exe to not be binary after disabling")
}
if registry.GetLanguage("test.go") != "" {
t.Error("Expected .go to not map to language after disabling")
}
// Test that non-disabled extensions still work
if !registry.IsImage("test.jpg") {
t.Error("Expected .jpg to still be image after disabling .png")
}
if !registry.IsBinary("test.dll") {
t.Error("Expected .dll to still be binary after disabling .exe")
}
if registry.GetLanguage("test.py") != "python" {
t.Error("Expected .py to still map to python after disabling .go")
}
})
// Test empty values handling
t.Run("EmptyValuesHandling", func(t *testing.T) {
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
// Test with empty values
customImages := []string{"", ".valid", ""}
customBinary := []string{"", ".valid"}
customLanguages := map[string]string{
"": "invalid",
".valid": "",
".good": "good",
}
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
// Only valid entries should be added
if registry.IsImage("test.") {
t.Error("Expected empty extension to not be added as image")
}
if !registry.IsImage("test.valid") {
t.Error("Expected .valid to be added as image")
}
if registry.IsBinary("test.") {
t.Error("Expected empty extension to not be added as binary")
}
if !registry.IsBinary("test.valid") {
t.Error("Expected .valid to be added as binary")
}
if registry.GetLanguage("test.") != "" {
t.Error("Expected empty extension to not be added as language")
}
if registry.GetLanguage("test.valid") != "" {
t.Error("Expected .valid with empty language to not be added")
}
if registry.GetLanguage("test.good") != "good" {
t.Error("Expected .good to map to good")
}
})
// Test case insensitive handling
t.Run("CaseInsensitiveHandling", func(t *testing.T) {
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
customImages := []string{".WEBP", ".Avif"}
customBinary := []string{".CUSTOM", ".MyBin"}
customLanguages := map[string]string{
".ZIG": "zig",
".Odin": "odin",
}
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
// Test that both upper and lower case work
if !registry.IsImage("test.webp") {
t.Error("Expected .webp (lowercase) to work after adding .WEBP")
}
if !registry.IsImage("test.WEBP") {
t.Error("Expected .WEBP (uppercase) to work")
}
if !registry.IsBinary("test.custom") {
t.Error("Expected .custom (lowercase) to work after adding .CUSTOM")
}
if !registry.IsBinary("test.CUSTOM") {
t.Error("Expected .CUSTOM (uppercase) to work")
}
if registry.GetLanguage("test.zig") != "zig" {
t.Error("Expected .zig (lowercase) to work after adding .ZIG")
}
if registry.GetLanguage("test.ZIG") != "zig" {
t.Error("Expected .ZIG (uppercase) to work")
}
})
}
// TestConfigureFromSettings tests the global configuration function.
func TestConfigureFromSettings(t *testing.T) {
// Reset registry to ensure clean state
registryOnce = sync.Once{}
registry = nil
// Test configuration application
customImages := []string{".webp", ".avif"}
customBinary := []string{".custom"}
customLanguages := map[string]string{".zig": "zig"}
disabledImages := []string{".gif"} // Disable default extension
disabledBinary := []string{".exe"} // Disable default extension
disabledLanguages := []string{".rb"} // Disable default extension
ConfigureFromSettings(
customImages,
customBinary,
customLanguages,
disabledImages,
disabledBinary,
disabledLanguages,
)
// Test that custom extensions work
if !IsImage("test.webp") {
t.Error("Expected custom image extension .webp to work")
}
if !IsBinary("test.custom") {
t.Error("Expected custom binary extension .custom to work")
}
if GetLanguage("test.zig") != "zig" {
t.Error("Expected custom language .zig to work")
}
// Test that disabled extensions don't work
if IsImage("test.gif") {
t.Error("Expected disabled image extension .gif to not work")
}
if IsBinary("test.exe") {
t.Error("Expected disabled binary extension .exe to not work")
}
if GetLanguage("test.rb") != "" {
t.Error("Expected disabled language extension .rb to not work")
}
// Test that non-disabled defaults still work
if !IsImage("test.png") {
t.Error("Expected non-disabled image extension .png to still work")
}
if !IsBinary("test.dll") {
t.Error("Expected non-disabled binary extension .dll to still work")
}
if GetLanguage("test.go") != "go" {
t.Error("Expected non-disabled language extension .go to still work")
}
}

28
fileproc/formats.go Normal file
View File

@@ -0,0 +1,28 @@
package fileproc
// FileData represents a single file's path and content.
type FileData struct {
Path string `json:"path" yaml:"path"`
Content string `json:"content" yaml:"content"`
Language string `json:"language" yaml:"language"`
}
// OutputData represents the full output structure.
type OutputData struct {
Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
Files []FileData `json:"files" yaml:"files"`
}
// FormatWriter defines the interface for format-specific writers.
type FormatWriter interface {
Start(prefix, suffix string) error
WriteFile(req WriteRequest) error
Close() error
}
// detectLanguage tries to infer the code block language from the file extension.
func detectLanguage(filePath string) string {
registry := GetDefaultRegistry()
return registry.GetLanguage(filePath)
}

66
fileproc/ignore_rules.go Normal file
View File

@@ -0,0 +1,66 @@
package fileproc
import (
"os"
"path/filepath"
ignore "github.com/sabhiram/go-gitignore"
)
// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
type ignoreRule struct {
gi *ignore.GitIgnore
base string
}
// loadIgnoreRules loads ignore rules from the current directory and combines them with parent rules.
func loadIgnoreRules(currentDir string, parentRules []ignoreRule) []ignoreRule {
// Pre-allocate for parent rules plus possible .gitignore and .ignore
const expectedIgnoreFiles = 2
rules := make([]ignoreRule, 0, len(parentRules)+expectedIgnoreFiles)
rules = append(rules, parentRules...)
// Check for .gitignore and .ignore files in the current directory.
for _, fileName := range []string{".gitignore", ".ignore"} {
if rule := tryLoadIgnoreFile(currentDir, fileName); rule != nil {
rules = append(rules, *rule)
}
}
return rules
}
// tryLoadIgnoreFile attempts to load an ignore file from the given directory.
func tryLoadIgnoreFile(dir, fileName string) *ignoreRule {
ignorePath := filepath.Join(dir, fileName)
if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
if gi, err := ignore.CompileIgnoreFile(ignorePath); err == nil {
return &ignoreRule{
base: dir,
gi: gi,
}
}
}
return nil
}
// matchesIgnoreRules checks if a path matches any of the ignore rules.
func matchesIgnoreRules(fullPath string, rules []ignoreRule) bool {
for _, rule := range rules {
if matchesRule(fullPath, rule) {
return true
}
}
return false
}
// matchesRule checks if a path matches a specific ignore rule.
func matchesRule(fullPath string, rule ignoreRule) bool {
// Compute the path relative to the base where the ignore rule was defined.
rel, err := filepath.Rel(rule.base, fullPath)
if err != nil {
return false
}
// If the rule matches, skip this entry.
return rule.gi.MatchesPath(rel)
}

188
fileproc/json_writer.go Normal file
View File

@@ -0,0 +1,188 @@
package fileproc
import (
"encoding/json"
"fmt"
"io"
"os"
"github.com/ivuorinen/gibidify/utils"
)
// JSONWriter handles JSON format output with streaming support.
type JSONWriter struct {
outFile *os.File
firstFile bool
}
// NewJSONWriter creates a new JSON writer.
func NewJSONWriter(outFile *os.File) *JSONWriter {
return &JSONWriter{
outFile: outFile,
firstFile: true,
}
}
// Start writes the JSON header.
func (w *JSONWriter) Start(prefix, suffix string) error {
// Start JSON structure
if _, err := w.outFile.WriteString(`{"prefix":"`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON start")
}
// Write escaped prefix
escapedPrefix := escapeJSONString(prefix)
if _, err := w.outFile.WriteString(escapedPrefix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON prefix")
}
if _, err := w.outFile.WriteString(`","suffix":"`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON middle")
}
// Write escaped suffix
escapedSuffix := escapeJSONString(suffix)
if _, err := w.outFile.WriteString(escapedSuffix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON suffix")
}
if _, err := w.outFile.WriteString(`","files":[`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON files start")
}
return nil
}
// WriteFile writes a file entry in JSON format.
func (w *JSONWriter) WriteFile(req WriteRequest) error {
if !w.firstFile {
if _, err := w.outFile.WriteString(","); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON separator")
}
}
w.firstFile = false
if req.IsStream {
return w.writeStreaming(req)
}
return w.writeInline(req)
}
// Close writes the JSON footer.
func (w *JSONWriter) Close() error {
// Close JSON structure
if _, err := w.outFile.WriteString("]}"); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON end")
}
return nil
}
// writeStreaming writes a large file as JSON in streaming chunks.
func (w *JSONWriter) writeStreaming(req WriteRequest) error {
defer w.closeReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write file start
escapedPath := escapeJSONString(req.Path)
if _, err := fmt.Fprintf(w.outFile, `{"path":"%s","language":"%s","content":"`, escapedPath, language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file start").WithFilePath(req.Path)
}
// Stream content with JSON escaping
if err := w.streamJSONContent(req.Reader, req.Path); err != nil {
return err
}
// Write file end
if _, err := w.outFile.WriteString(`"}`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file end").WithFilePath(req.Path)
}
return nil
}
// writeInline writes a small file directly as JSON.
func (w *JSONWriter) writeInline(req WriteRequest) error {
language := detectLanguage(req.Path)
fileData := FileData{
Path: req.Path,
Content: req.Content,
Language: language,
}
encoded, err := json.Marshal(fileData)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingEncode, "failed to marshal JSON").WithFilePath(req.Path)
}
if _, err := w.outFile.Write(encoded); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file").WithFilePath(req.Path)
}
return nil
}
// streamJSONContent streams content with JSON escaping.
func (w *JSONWriter) streamJSONContent(reader io.Reader, path string) error {
buf := make([]byte, StreamChunkSize)
for {
n, err := reader.Read(buf)
if n > 0 {
escaped := escapeJSONString(string(buf[:n]))
if _, writeErr := w.outFile.WriteString(escaped); writeErr != nil {
return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON chunk").WithFilePath(path)
}
}
if err == io.EOF {
break
}
if err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read JSON chunk").WithFilePath(path)
}
}
return nil
}
// closeReader safely closes a reader if it implements io.Closer.
func (w *JSONWriter) closeReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
utils.LogError(
"Failed to close file reader",
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// escapeJSONString escapes a string for JSON output.
func escapeJSONString(s string) string {
// Use json.Marshal to properly escape the string, then remove the quotes
escaped, _ := json.Marshal(s)
return string(escaped[1 : len(escaped)-1]) // Remove surrounding quotes
}
// startJSONWriter handles JSON format output with streaming support.
func startJSONWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
writer := NewJSONWriter(outFile)
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write JSON start", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write JSON file", err)
}
}
// Close writer
if err := writer.Close(); err != nil {
utils.LogError("Failed to write JSON end", err)
}
}

139
fileproc/markdown_writer.go Normal file
View File

@@ -0,0 +1,139 @@
package fileproc
import (
"fmt"
"io"
"os"
"github.com/ivuorinen/gibidify/utils"
)
// MarkdownWriter handles markdown format output with streaming support.
type MarkdownWriter struct {
outFile *os.File
}
// NewMarkdownWriter creates a new markdown writer.
func NewMarkdownWriter(outFile *os.File) *MarkdownWriter {
return &MarkdownWriter{outFile: outFile}
}
// Start writes the markdown header.
func (w *MarkdownWriter) Start(prefix, suffix string) error {
if prefix != "" {
if _, err := fmt.Fprintf(w.outFile, "# %s\n\n", prefix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write prefix")
}
}
return nil
}
// WriteFile writes a file entry in markdown format.
func (w *MarkdownWriter) WriteFile(req WriteRequest) error {
if req.IsStream {
return w.writeStreaming(req)
}
return w.writeInline(req)
}
// Close writes the markdown footer.
func (w *MarkdownWriter) Close(suffix string) error {
if suffix != "" {
if _, err := fmt.Fprintf(w.outFile, "\n# %s\n", suffix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write suffix")
}
}
return nil
}
// writeStreaming writes a large file in streaming chunks.
func (w *MarkdownWriter) writeStreaming(req WriteRequest) error {
defer w.closeReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write file header
if _, err := fmt.Fprintf(w.outFile, "## File: `%s`\n```%s\n", req.Path, language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file header").WithFilePath(req.Path)
}
// Stream file content in chunks
if err := w.streamContent(req.Reader, req.Path); err != nil {
return err
}
// Write file footer
if _, err := w.outFile.WriteString("\n```\n\n"); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file footer").WithFilePath(req.Path)
}
return nil
}
// writeInline writes a small file directly from content.
func (w *MarkdownWriter) writeInline(req WriteRequest) error {
language := detectLanguage(req.Path)
formatted := fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", req.Path, language, req.Content)
if _, err := w.outFile.WriteString(formatted); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write inline content").WithFilePath(req.Path)
}
return nil
}
// streamContent streams file content in chunks.
func (w *MarkdownWriter) streamContent(reader io.Reader, path string) error {
buf := make([]byte, StreamChunkSize)
for {
n, err := reader.Read(buf)
if n > 0 {
if _, writeErr := w.outFile.Write(buf[:n]); writeErr != nil {
return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write chunk").WithFilePath(path)
}
}
if err == io.EOF {
break
}
if err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read chunk").WithFilePath(path)
}
}
return nil
}
// closeReader safely closes a reader if it implements io.Closer.
func (w *MarkdownWriter) closeReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
utils.LogError(
"Failed to close file reader",
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// startMarkdownWriter handles markdown format output with streaming support.
func startMarkdownWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
writer := NewMarkdownWriter(outFile)
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write markdown prefix", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write markdown file", err)
}
}
// Close writer
if err := writer.Close(suffix); err != nil {
utils.LogError("Failed to write markdown suffix", err)
}
}

View File

@@ -3,34 +3,157 @@ package fileproc
import ( import (
"fmt" "fmt"
"io"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"github.com/sirupsen/logrus" "github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/utils"
)
const (
// StreamChunkSize is the size of chunks when streaming large files (64KB).
StreamChunkSize = 65536
// StreamThreshold is the file size above which we use streaming (1MB).
StreamThreshold = 1048576
// MaxMemoryBuffer is the maximum memory to use for buffering content (10MB).
MaxMemoryBuffer = 10485760
) )
// WriteRequest represents the content to be written. // WriteRequest represents the content to be written.
type WriteRequest struct { type WriteRequest struct {
Path string Path string
Content string Content string
IsStream bool
Reader io.Reader
}
// FileProcessor handles file processing operations.
type FileProcessor struct {
rootPath string
sizeLimit int64
}
// NewFileProcessor creates a new file processor.
func NewFileProcessor(rootPath string) *FileProcessor {
return &FileProcessor{
rootPath: rootPath,
sizeLimit: config.GetFileSizeLimit(),
}
} }
// ProcessFile reads the file at filePath and sends a formatted output to outCh. // ProcessFile reads the file at filePath and sends a formatted output to outCh.
// It automatically chooses between loading the entire file or streaming based on file size.
func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) { func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) {
content, err := os.ReadFile(filePath) processor := NewFileProcessor(rootPath)
processor.Process(filePath, outCh)
}
// Process handles file processing with the configured settings.
func (p *FileProcessor) Process(filePath string, outCh chan<- WriteRequest) {
// Validate file
fileInfo, err := p.validateFile(filePath)
if err != nil { if err != nil {
logrus.Errorf("Failed to read file %s: %v", filePath, err) return // Error already logged
}
// Get relative path
relPath := p.getRelativePath(filePath)
// Choose processing strategy based on file size
if fileInfo.Size() <= StreamThreshold {
p.processInMemory(filePath, relPath, outCh)
} else {
p.processStreaming(filePath, relPath, outCh)
}
}
// validateFile checks if the file can be processed.
func (p *FileProcessor) validateFile(filePath string) (os.FileInfo, error) {
fileInfo, err := os.Stat(filePath)
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to stat file").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to stat file %s", filePath)
return nil, err
}
// Check size limit
if fileInfo.Size() > p.sizeLimit {
utils.LogErrorf(
utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationSize,
fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", fileInfo.Size(), p.sizeLimit),
).WithFilePath(filePath).WithContext("file_size", fileInfo.Size()).WithContext("size_limit", p.sizeLimit),
"Skipping large file %s", filePath,
)
return nil, fmt.Errorf("file too large")
}
return fileInfo, nil
}
// getRelativePath computes the path relative to rootPath.
func (p *FileProcessor) getRelativePath(filePath string) string {
relPath, err := filepath.Rel(p.rootPath, filePath)
if err != nil {
return filePath // Fallback
}
return relPath
}
// processInMemory loads the entire file into memory (for small files).
func (p *FileProcessor) processInMemory(filePath, relPath string, outCh chan<- WriteRequest) {
content, err := os.ReadFile(filePath) // #nosec G304 - filePath is validated by walker
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to read file").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to read file %s", filePath)
return return
} }
// Compute path relative to rootPath, so /a/b/c/d.c becomes c/d.c outCh <- WriteRequest{
relPath, err := filepath.Rel(rootPath, filePath) Path: relPath,
if err != nil { Content: p.formatContent(relPath, string(content)),
// Fallback if something unexpected happens IsStream: false,
relPath = filePath }
}
// processStreaming creates a streaming reader for large files.
func (p *FileProcessor) processStreaming(filePath, relPath string, outCh chan<- WriteRequest) {
reader := p.createStreamReader(filePath, relPath)
if reader == nil {
return // Error already logged
} }
// Format: separator, then relative path, then content outCh <- WriteRequest{
formatted := fmt.Sprintf("\n---\n%s\n%s\n", relPath, string(content)) Path: relPath,
outCh <- WriteRequest{Path: relPath, Content: formatted} Content: "", // Empty since content is in Reader
IsStream: true,
Reader: reader,
}
}
// createStreamReader creates a reader that combines header and file content.
func (p *FileProcessor) createStreamReader(filePath, relPath string) io.Reader {
file, err := os.Open(filePath) // #nosec G304 - filePath is validated by walker
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to open file for streaming").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to open file for streaming %s", filePath)
return nil
}
// Note: file will be closed by the writer
header := p.formatHeader(relPath)
return io.MultiReader(header, file)
}
// formatContent formats the file content with header.
func (p *FileProcessor) formatContent(relPath, content string) string {
return fmt.Sprintf("\n---\n%s\n%s\n", relPath, content)
}
// formatHeader creates a reader for the file header.
func (p *FileProcessor) formatHeader(relPath string) io.Reader {
return strings.NewReader(fmt.Sprintf("\n---\n%s\n", relPath))
} }

View File

@@ -6,12 +6,15 @@ import (
"sync" "sync"
"testing" "testing"
fileproc "github.com/ivuorinen/gibidify/fileproc" "github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/testutil"
) )
func TestProcessFile(t *testing.T) { func TestProcessFile(t *testing.T) {
// Reset and load default config to ensure proper file size limits
testutil.ResetViperConfig(t, "")
// Create a temporary file with known content. // Create a temporary file with known content.
tmpFile, err := os.CreateTemp("", "testfile") tmpFile, err := os.CreateTemp(t.TempDir(), "testfile")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

107
fileproc/registry.go Normal file
View File

@@ -0,0 +1,107 @@
// Package fileproc provides file processing utilities.
package fileproc
import (
"path/filepath"
"strings"
"sync"
)
const minExtensionLength = 2
var (
registry *FileTypeRegistry
registryOnce sync.Once
)
// FileTypeRegistry manages file type detection and classification.
type FileTypeRegistry struct {
imageExts map[string]bool
binaryExts map[string]bool
languageMap map[string]string
// Cache for frequent lookups to avoid repeated string operations
extCache map[string]string // filename -> normalized extension
resultCache map[string]FileTypeResult // extension -> cached result
cacheMutex sync.RWMutex
maxCacheSize int
// Performance statistics
stats RegistryStats
}
// RegistryStats tracks performance metrics for the registry.
type RegistryStats struct {
TotalLookups uint64
CacheHits uint64
CacheMisses uint64
CacheEvictions uint64
}
// FileTypeResult represents cached file type detection results.
type FileTypeResult struct {
IsImage bool
IsBinary bool
Language string
Extension string
}
// initRegistry initializes the default file type registry with common extensions.
func initRegistry() *FileTypeRegistry {
return &FileTypeRegistry{
imageExts: getImageExtensions(),
binaryExts: getBinaryExtensions(),
languageMap: getLanguageMap(),
extCache: make(map[string]string, 1000), // Cache for extension normalization
resultCache: make(map[string]FileTypeResult, 500), // Cache for type results
maxCacheSize: 500,
}
}
// getRegistry returns the singleton file type registry, creating it if necessary.
func getRegistry() *FileTypeRegistry {
registryOnce.Do(func() {
registry = initRegistry()
})
return registry
}
// GetDefaultRegistry returns the default file type registry.
func GetDefaultRegistry() *FileTypeRegistry {
return getRegistry()
}
// GetStats returns a copy of the current registry statistics.
func (r *FileTypeRegistry) GetStats() RegistryStats {
r.cacheMutex.RLock()
defer r.cacheMutex.RUnlock()
return r.stats
}
// GetCacheInfo returns current cache size information.
func (r *FileTypeRegistry) GetCacheInfo() (extCacheSize, resultCacheSize, maxCacheSize int) {
r.cacheMutex.RLock()
defer r.cacheMutex.RUnlock()
return len(r.extCache), len(r.resultCache), r.maxCacheSize
}
// ResetRegistryForTesting resets the registry to its initial state.
// This function should only be used in tests.
func ResetRegistryForTesting() {
registryOnce = sync.Once{}
registry = nil
}
// normalizeExtension extracts and normalizes the file extension.
func normalizeExtension(filename string) string {
return strings.ToLower(filepath.Ext(filename))
}
// isSpecialFile checks if the filename matches special cases like .DS_Store.
func isSpecialFile(filename string, extensions map[string]bool) bool {
if filepath.Ext(filename) == "" {
basename := strings.ToLower(filepath.Base(filename))
return extensions[basename]
}
return false
}

View File

@@ -4,10 +4,8 @@ package fileproc
import ( import (
"os" "os"
"path/filepath" "path/filepath"
"strings"
"github.com/ivuorinen/gibidify/config" "github.com/ivuorinen/gibidify/utils"
ignore "github.com/sabhiram/go-gitignore"
) )
// Walker defines an interface for scanning directories. // Walker defines an interface for scanning directories.
@@ -18,22 +16,25 @@ type Walker interface {
// ProdWalker implements Walker using a custom directory walker that // ProdWalker implements Walker using a custom directory walker that
// respects .gitignore and .ignore files, configuration-defined ignore directories, // respects .gitignore and .ignore files, configuration-defined ignore directories,
// and ignores binary and image files by default. // and ignores binary and image files by default.
type ProdWalker struct{} type ProdWalker struct {
filter *FileFilter
}
// ignoreRule holds an ignore matcher along with the base directory where it was loaded. // NewProdWalker creates a new production walker with current configuration.
type ignoreRule struct { func NewProdWalker() *ProdWalker {
base string return &ProdWalker{
gi *ignore.GitIgnore filter: NewFileFilter(),
}
} }
// Walk scans the given root directory recursively and returns a slice of file paths // Walk scans the given root directory recursively and returns a slice of file paths
// that are not ignored based on .gitignore/.ignore files, the configuration, or the default binary/image filter. // that are not ignored based on .gitignore/.ignore files, the configuration, or the default binary/image filter.
func (pw ProdWalker) Walk(root string) ([]string, error) { func (w *ProdWalker) Walk(root string) ([]string, error) {
absRoot, err := filepath.Abs(root) absRoot, err := utils.GetAbsolutePath(root)
if err != nil { if err != nil {
return nil, err return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to resolve root path").WithFilePath(root)
} }
return walkDir(absRoot, absRoot, []ignoreRule{}) return w.walkDir(absRoot, []ignoreRule{})
} }
// walkDir recursively walks the directory tree starting at currentDir. // walkDir recursively walks the directory tree starting at currentDir.
@@ -41,122 +42,34 @@ func (pw ProdWalker) Walk(root string) ([]string, error) {
// appends the corresponding rules to the inherited list. Each file/directory is // appends the corresponding rules to the inherited list. Each file/directory is
// then checked against the accumulated ignore rules, the configuration's list of ignored directories, // then checked against the accumulated ignore rules, the configuration's list of ignored directories,
// and a default filter that ignores binary and image files. // and a default filter that ignores binary and image files.
func walkDir(root string, currentDir string, parentRules []ignoreRule) ([]string, error) { func (w *ProdWalker) walkDir(currentDir string, parentRules []ignoreRule) ([]string, error) {
var results []string var results []string
entries, err := os.ReadDir(currentDir) entries, err := os.ReadDir(currentDir)
if err != nil { if err != nil {
return nil, err return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to read directory").WithFilePath(currentDir)
} }
// Start with the parent's ignore rules. rules := loadIgnoreRules(currentDir, parentRules)
rules := make([]ignoreRule, len(parentRules))
copy(rules, parentRules)
// Check for .gitignore and .ignore files in the current directory.
for _, fileName := range []string{".gitignore", ".ignore"} {
ignorePath := filepath.Join(currentDir, fileName)
if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
gi, err := ignore.CompileIgnoreFile(ignorePath)
if err == nil {
rules = append(rules, ignoreRule{
base: currentDir,
gi: gi,
})
}
}
}
// Get the list of directories to ignore from configuration.
ignoredDirs := config.GetIgnoredDirectories()
sizeLimit := config.GetFileSizeLimit() // e.g., 5242880 for 5 MB
for _, entry := range entries { for _, entry := range entries {
fullPath := filepath.Join(currentDir, entry.Name()) fullPath := filepath.Join(currentDir, entry.Name())
// For directories, check if its name is in the config ignore list. if w.filter.shouldSkipEntry(entry, fullPath, rules) {
if entry.IsDir() { continue
for _, d := range ignoredDirs {
if entry.Name() == d {
// Skip this directory entirely.
goto SkipEntry
}
}
} else {
// Check if file exceeds the configured size limit.
info, err := entry.Info()
if err == nil && info.Size() > sizeLimit {
goto SkipEntry
}
// For files, apply the default filter to ignore binary and image files.
if isBinaryOrImage(fullPath) {
goto SkipEntry
}
} }
// Check accumulated ignore rules. // Process entry
for _, rule := range rules {
// Compute the path relative to the base where the ignore rule was defined.
rel, err := filepath.Rel(rule.base, fullPath)
if err != nil {
continue
}
// If the rule matches, skip this entry.
if rule.gi.MatchesPath(rel) {
goto SkipEntry
}
}
// If not ignored, then process the entry.
if entry.IsDir() { if entry.IsDir() {
subFiles, err := walkDir(root, fullPath, rules) subFiles, err := w.walkDir(fullPath, rules)
if err != nil { if err != nil {
return nil, err return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingTraversal, "failed to traverse subdirectory").WithFilePath(fullPath)
} }
results = append(results, subFiles...) results = append(results, subFiles...)
} else { } else {
results = append(results, fullPath) results = append(results, fullPath)
} }
SkipEntry:
continue
} }
return results, nil return results, nil
} }
// isBinaryOrImage checks if a file should be considered binary or an image based on its extension.
// The check is case-insensitive.
func isBinaryOrImage(filePath string) bool {
ext := strings.ToLower(filepath.Ext(filePath))
// Common image file extensions.
imageExtensions := map[string]bool{
".png": true,
".jpg": true,
".jpeg": true,
".gif": true,
".bmp": true,
".tiff": true,
".ico": true,
".svg": true,
".webp": true,
}
// Common binary file extensions.
binaryExtensions := map[string]bool{
".exe": true,
".dll": true,
".so": true,
".bin": true,
".dat": true,
".zip": true,
".tar": true,
".gz": true,
".7z": true,
".rar": true,
".DS_Store": true,
}
if imageExtensions[ext] || binaryExtensions[ext] {
return true
}
return false
}

View File

@@ -1,64 +1,42 @@
package fileproc_test package fileproc_test
import ( import (
"os"
"path/filepath" "path/filepath"
"testing" "testing"
"github.com/ivuorinen/gibidify/config"
fileproc "github.com/ivuorinen/gibidify/fileproc"
"github.com/spf13/viper" "github.com/spf13/viper"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/testutil"
) )
func TestProdWalkerWithIgnore(t *testing.T) { func TestProdWalkerWithIgnore(t *testing.T) {
// Create a temporary directory structure. // Create a temporary directory structure.
rootDir, err := os.MkdirTemp("", "walker_test_root") rootDir := t.TempDir()
if err != nil {
t.Fatalf("Failed to create temp root directory: %v", err)
}
defer func() {
if err := os.RemoveAll(rootDir); err != nil {
t.Fatalf("cleanup failed: %v", err)
}
}()
subDir := filepath.Join(rootDir, "vendor") subDir := testutil.CreateTestDirectory(t, rootDir, "vendor")
if err := os.Mkdir(subDir, 0755); err != nil {
t.Fatalf("Failed to create subDir: %v", err)
}
// Write sample files // Write sample files
filePaths := []string{ testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{
filepath.Join(rootDir, "file1.go"), {Name: "file1.go", Content: "content"},
filepath.Join(rootDir, "file2.txt"), {Name: "file2.txt", Content: "content"},
filepath.Join(subDir, "file_in_vendor.txt"), // should be ignored })
} testutil.CreateTestFile(t, subDir, "file_in_vendor.txt", []byte("content")) // should be ignored
for _, fp := range filePaths {
if err := os.WriteFile(fp, []byte("content"), 0644); err != nil {
t.Fatalf("Failed to write file %s: %v", fp, err)
}
}
// .gitignore that ignores *.txt and itself // .gitignore that ignores *.txt and itself
gitignoreContent := `*.txt gitignoreContent := `*.txt
.gitignore .gitignore
` `
gitignorePath := filepath.Join(rootDir, ".gitignore") testutil.CreateTestFile(t, rootDir, ".gitignore", []byte(gitignoreContent))
if err := os.WriteFile(gitignorePath, []byte(gitignoreContent), 0644); err != nil {
t.Fatalf("Failed to write .gitignore: %v", err)
}
// Initialize config to ignore "vendor" directory // Initialize config to ignore "vendor" directory
viper.Reset() testutil.ResetViperConfig(t, "")
config.LoadConfig()
viper.Set("ignoreDirectories", []string{"vendor"}) viper.Set("ignoreDirectories", []string{"vendor"})
// Run walker // Run walker
var w fileproc.Walker = fileproc.ProdWalker{} w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir) found, err := w.Walk(rootDir)
if err != nil { testutil.MustSucceed(t, err, "walking directory")
t.Fatalf("Walk returned error: %v", err)
}
// We expect only file1.go to appear // We expect only file1.go to appear
if len(found) != 1 { if len(found) != 1 {
@@ -70,38 +48,24 @@ func TestProdWalkerWithIgnore(t *testing.T) {
} }
func TestProdWalkerBinaryCheck(t *testing.T) { func TestProdWalkerBinaryCheck(t *testing.T) {
rootDir, err := os.MkdirTemp("", "walker_test_bincheck") rootDir := t.TempDir()
if err != nil {
t.Fatalf("Failed to create temp root directory: %v", err)
}
defer func() {
if err := os.RemoveAll(rootDir); err != nil {
t.Fatalf("cleanup failed: %v", err)
}
}()
// Create a mock binary file // Create test files
binFile := filepath.Join(rootDir, "somefile.exe") testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{
if err := os.WriteFile(binFile, []byte("fake-binary-content"), 0644); err != nil { {Name: "somefile.exe", Content: "fake-binary-content"},
t.Fatalf("Failed to write file %s: %v", binFile, err) {Name: "keep.go", Content: "package main"},
} })
// Create a normal file
normalFile := filepath.Join(rootDir, "keep.go")
if err := os.WriteFile(normalFile, []byte("package main"), 0644); err != nil {
t.Fatalf("Failed to write file %s: %v", normalFile, err)
}
// Reset and load default config // Reset and load default config
viper.Reset() testutil.ResetViperConfig(t, "")
config.LoadConfig()
// Reset FileTypeRegistry to ensure clean state
fileproc.ResetRegistryForTesting()
// Run walker // Run walker
var w fileproc.Walker = fileproc.ProdWalker{} w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir) found, err := w.Walk(rootDir)
if err != nil { testutil.MustSucceed(t, err, "walking directory")
t.Fatalf("Walk returned error: %v", err)
}
// Only "keep.go" should be returned // Only "keep.go" should be returned
if len(found) != 1 { if len(found) != 1 {
@@ -113,34 +77,17 @@ func TestProdWalkerBinaryCheck(t *testing.T) {
} }
func TestProdWalkerSizeLimit(t *testing.T) { func TestProdWalkerSizeLimit(t *testing.T) {
rootDir, err := os.MkdirTemp("", "walker_test_sizelimit") rootDir := t.TempDir()
if err != nil {
t.Fatalf("Failed to create temp root directory: %v", err)
}
defer func() {
if err := os.RemoveAll(rootDir); err != nil {
t.Fatalf("cleanup failed: %v", err)
}
}()
// Create a file exceeding the size limit // Create test files
largeFilePath := filepath.Join(rootDir, "largefile.txt")
largeFileData := make([]byte, 6*1024*1024) // 6 MB largeFileData := make([]byte, 6*1024*1024) // 6 MB
if err := os.WriteFile(largeFilePath, largeFileData, 0644); err != nil { testutil.CreateTestFile(t, rootDir, "largefile.txt", largeFileData)
t.Fatalf("Failed to write large file: %v", err) testutil.CreateTestFile(t, rootDir, "smallfile.go", []byte("package main"))
}
// Create a small file
smallFilePath := filepath.Join(rootDir, "smallfile.go")
if err := os.WriteFile(smallFilePath, []byte("package main"), 0644); err != nil {
t.Fatalf("Failed to write small file: %v", err)
}
// Reset and load default config, which sets size limit to 5 MB // Reset and load default config, which sets size limit to 5 MB
viper.Reset() testutil.ResetViperConfig(t, "")
config.LoadConfig()
var w fileproc.Walker = fileproc.ProdWalker{} w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir) found, err := w.Walk(rootDir)
if err != nil { if err != nil {
t.Fatalf("Walk returned error: %v", err) t.Fatalf("Walk returned error: %v", err)

View File

@@ -1,101 +1,29 @@
// Package fileproc provides a writer for the output of the file processor. // Package fileproc provides a writer for the output of the file processor.
//
// The StartWriter function writes the output in the specified format.
// The formatMarkdown function formats the output in Markdown format.
// The detectLanguage function tries to infer the code block language from the file extension.
// The OutputData struct represents the full output structure.
// The FileData struct represents a single file's path and content.
package fileproc package fileproc
import ( import (
"encoding/json"
"fmt" "fmt"
"os" "os"
"github.com/sirupsen/logrus" "github.com/ivuorinen/gibidify/utils"
"gopkg.in/yaml.v3"
) )
// FileData represents a single file's path and content. // StartWriter writes the output in the specified format with memory optimization.
type FileData struct { func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format, prefix, suffix string) {
Path string `json:"path" yaml:"path"`
Content string `json:"content" yaml:"content"`
}
// OutputData represents the full output structure.
type OutputData struct {
Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
Files []FileData `json:"files" yaml:"files"`
Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
}
// StartWriter writes the output in the specified format.
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format string, prefix, suffix string) {
var files []FileData
// Read from channel until closed
for req := range writeCh {
files = append(files, FileData(req))
}
// Create output struct
output := OutputData{Prefix: prefix, Files: files, Suffix: suffix}
// Serialize based on format
var outputData []byte
var err error
switch format { switch format {
case "json":
outputData, err = json.MarshalIndent(output, "", " ")
case "yaml":
outputData, err = yaml.Marshal(output)
case "markdown": case "markdown":
outputData = []byte(formatMarkdown(output)) startMarkdownWriter(outFile, writeCh, done, prefix, suffix)
case "json":
startJSONWriter(outFile, writeCh, done, prefix, suffix)
case "yaml":
startYAMLWriter(outFile, writeCh, done, prefix, suffix)
default: default:
err = fmt.Errorf("unsupported format: %s", format) err := utils.NewStructuredError(
} utils.ErrorTypeValidation,
utils.CodeValidationFormat,
if err != nil { fmt.Sprintf("unsupported format: %s", format),
logrus.Errorf("Error encoding output: %v", err) ).WithContext("format", format)
utils.LogError("Failed to encode output", err)
close(done) close(done)
return
}
// Write to file
if _, err := outFile.Write(outputData); err != nil {
logrus.Errorf("Error writing to file: %v", err)
}
close(done)
}
func formatMarkdown(output OutputData) string {
markdown := "# " + output.Prefix + "\n\n"
for _, file := range output.Files {
markdown += fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", file.Path, detectLanguage(file.Path), file.Content)
}
markdown += "# " + output.Suffix
return markdown
}
// detectLanguage tries to infer code block language from file extension.
func detectLanguage(filename string) string {
if len(filename) < 3 {
return ""
}
switch {
case len(filename) >= 3 && filename[len(filename)-3:] == ".go":
return "go"
case len(filename) >= 3 && filename[len(filename)-3:] == ".py":
return "python"
case len(filename) >= 2 && filename[len(filename)-2:] == ".c":
return "c"
case len(filename) >= 3 && filename[len(filename)-3:] == ".js":
return "javascript"
default:
return ""
} }
} }

View File

@@ -7,8 +7,9 @@ import (
"sync" "sync"
"testing" "testing"
fileproc "github.com/ivuorinen/gibidify/fileproc"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
"github.com/ivuorinen/gibidify/fileproc"
) )
func TestStartWriter_Formats(t *testing.T) { func TestStartWriter_Formats(t *testing.T) {
@@ -18,107 +19,109 @@ func TestStartWriter_Formats(t *testing.T) {
format string format string
expectError bool expectError bool
}{ }{
{ {"JSON format", "json", false},
name: "JSON format", {"YAML format", "yaml", false},
format: "json", {"Markdown format", "markdown", false},
expectError: false, {"Invalid format", "invalid", true},
},
{
name: "YAML format",
format: "yaml",
expectError: false,
},
{
name: "Markdown format",
format: "markdown",
expectError: false,
},
{
name: "Invalid format",
format: "invalid",
expectError: true,
},
} }
for _, tc := range tests { for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
outFile, err := os.CreateTemp("", "gibidify_test_output") data := runWriterTest(t, tc.format)
if err != nil {
t.Fatalf("Failed to create temp file: %v", err)
}
defer func() {
if err := outFile.Close(); err != nil {
t.Errorf("close temp file: %v", err)
}
if err := os.Remove(outFile.Name()); err != nil {
t.Errorf("remove temp file: %v", err)
}
}()
// Prepare channels
writeCh := make(chan fileproc.WriteRequest, 2)
doneCh := make(chan struct{})
// Write a couple of sample requests
writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"}
writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"}
close(writeCh)
// Start the writer
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
fileproc.StartWriter(outFile, writeCh, doneCh, tc.format, "PREFIX", "SUFFIX")
}()
// Wait until writer signals completion
wg.Wait()
<-doneCh // make sure all writes finished
// Read output
data, err := os.ReadFile(outFile.Name())
if err != nil {
t.Fatalf("Error reading output file: %v", err)
}
if tc.expectError { if tc.expectError {
// For an invalid format, we expect StartWriter to log an error verifyErrorOutput(t, data)
// and produce no content or minimal content. There's no official
// error returned, so check if it's empty or obviously incorrect.
if len(data) != 0 {
t.Errorf("Expected no output for invalid format, got:\n%s", data)
}
} else { } else {
// Valid format: check basic properties in the output verifyValidOutput(t, data, tc.format)
content := string(data) verifyPrefixSuffix(t, data)
switch tc.format {
case "json":
// Quick parse check
var outStruct fileproc.OutputData
if err := json.Unmarshal(data, &outStruct); err != nil {
t.Errorf("JSON unmarshal failed: %v", err)
}
case "yaml":
var outStruct fileproc.OutputData
if err := yaml.Unmarshal(data, &outStruct); err != nil {
t.Errorf("YAML unmarshal failed: %v", err)
}
case "markdown":
// Check presence of code fences or "## File: ..."
if !strings.Contains(content, "```") {
t.Error("Expected markdown code fences not found")
}
}
// Prefix and suffix checks (common to JSON, YAML, markdown)
if !strings.Contains(string(data), "PREFIX") {
t.Errorf("Missing prefix in output: %s", data)
}
if !strings.Contains(string(data), "SUFFIX") {
t.Errorf("Missing suffix in output: %s", data)
}
} }
}) })
} }
} }
// runWriterTest executes the writer with the given format and returns the output data.
func runWriterTest(t *testing.T, format string) []byte {
t.Helper()
outFile, err := os.CreateTemp(t.TempDir(), "gibidify_test_output")
if err != nil {
t.Fatalf("Failed to create temp file: %v", err)
}
defer func() {
if closeErr := outFile.Close(); closeErr != nil {
t.Errorf("close temp file: %v", closeErr)
}
if removeErr := os.Remove(outFile.Name()); removeErr != nil {
t.Errorf("remove temp file: %v", removeErr)
}
}()
// Prepare channels
writeCh := make(chan fileproc.WriteRequest, 2)
doneCh := make(chan struct{})
// Write a couple of sample requests
writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"}
writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"}
close(writeCh)
// Start the writer
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
fileproc.StartWriter(outFile, writeCh, doneCh, format, "PREFIX", "SUFFIX")
}()
// Wait until writer signals completion
wg.Wait()
<-doneCh // make sure all writes finished
// Read output
data, err := os.ReadFile(outFile.Name())
if err != nil {
t.Fatalf("Error reading output file: %v", err)
}
return data
}
// verifyErrorOutput checks that error cases produce no output.
func verifyErrorOutput(t *testing.T, data []byte) {
t.Helper()
if len(data) != 0 {
t.Errorf("Expected no output for invalid format, got:\n%s", data)
}
}
// verifyValidOutput checks format-specific output validity.
func verifyValidOutput(t *testing.T, data []byte, format string) {
t.Helper()
content := string(data)
switch format {
case "json":
var outStruct fileproc.OutputData
if err := json.Unmarshal(data, &outStruct); err != nil {
t.Errorf("JSON unmarshal failed: %v", err)
}
case "yaml":
var outStruct fileproc.OutputData
if err := yaml.Unmarshal(data, &outStruct); err != nil {
t.Errorf("YAML unmarshal failed: %v", err)
}
case "markdown":
if !strings.Contains(content, "```") {
t.Error("Expected markdown code fences not found")
}
}
}
// verifyPrefixSuffix checks that output contains expected prefix and suffix.
func verifyPrefixSuffix(t *testing.T, data []byte) {
t.Helper()
content := string(data)
if !strings.Contains(content, "PREFIX") {
t.Errorf("Missing prefix in output: %s", data)
}
if !strings.Contains(content, "SUFFIX") {
t.Errorf("Missing suffix in output: %s", data)
}
}

148
fileproc/yaml_writer.go Normal file
View File

@@ -0,0 +1,148 @@
package fileproc
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"github.com/ivuorinen/gibidify/utils"
)
// YAMLWriter handles YAML format output with streaming support.
type YAMLWriter struct {
outFile *os.File
}
// NewYAMLWriter creates a new YAML writer.
func NewYAMLWriter(outFile *os.File) *YAMLWriter {
return &YAMLWriter{outFile: outFile}
}
// Start writes the YAML header.
func (w *YAMLWriter) Start(prefix, suffix string) error {
// Write YAML header
if _, err := fmt.Fprintf(w.outFile, "prefix: %s\nsuffix: %s\nfiles:\n", yamlQuoteString(prefix), yamlQuoteString(suffix)); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML header")
}
return nil
}
// WriteFile writes a file entry in YAML format.
func (w *YAMLWriter) WriteFile(req WriteRequest) error {
if req.IsStream {
return w.writeStreaming(req)
}
return w.writeInline(req)
}
// Close writes the YAML footer (no footer needed for YAML).
func (w *YAMLWriter) Close() error {
return nil
}
// writeStreaming writes a large file as YAML in streaming chunks.
func (w *YAMLWriter) writeStreaming(req WriteRequest) error {
defer w.closeReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write YAML file entry start
if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(req.Path), language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML file start").WithFilePath(req.Path)
}
// Stream content with YAML indentation
return w.streamYAMLContent(req.Reader, req.Path)
}
// writeInline writes a small file directly as YAML.
func (w *YAMLWriter) writeInline(req WriteRequest) error {
language := detectLanguage(req.Path)
fileData := FileData{
Path: req.Path,
Content: req.Content,
Language: language,
}
// Write YAML entry
if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(fileData.Path), fileData.Language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML entry start").WithFilePath(req.Path)
}
// Write indented content
lines := strings.Split(fileData.Content, "\n")
for _, line := range lines {
if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML content line").WithFilePath(req.Path)
}
}
return nil
}
// streamYAMLContent streams content with YAML indentation.
func (w *YAMLWriter) streamYAMLContent(reader io.Reader, path string) error {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML line").WithFilePath(path)
}
}
if err := scanner.Err(); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to scan YAML content").WithFilePath(path)
}
return nil
}
// closeReader safely closes a reader if it implements io.Closer.
func (w *YAMLWriter) closeReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
utils.LogError(
"Failed to close file reader",
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// yamlQuoteString quotes a string for YAML output if needed.
func yamlQuoteString(s string) string {
if s == "" {
return `""`
}
// Simple YAML quoting - use double quotes if string contains special characters
if strings.ContainsAny(s, "\n\r\t:\"'\\") {
return fmt.Sprintf(`"%s"`, strings.ReplaceAll(s, `"`, `\"`))
}
return s
}
// startYAMLWriter handles YAML format output with streaming support.
func startYAMLWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
writer := NewYAMLWriter(outFile)
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write YAML header", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write YAML file", err)
}
}
// Close writer
if err := writer.Close(); err != nil {
utils.LogError("Failed to write YAML end", err)
}
}

7
go.mod
View File

@@ -3,7 +3,9 @@ module github.com/ivuorinen/gibidify
go 1.24.1 go 1.24.1
require ( require (
github.com/fatih/color v1.18.0
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
github.com/schollz/progressbar/v3 v3.18.0
github.com/sirupsen/logrus v1.9.3 github.com/sirupsen/logrus v1.9.3
github.com/spf13/viper v1.20.0 github.com/spf13/viper v1.20.0
gopkg.in/yaml.v3 v3.0.1 gopkg.in/yaml.v3 v3.0.1
@@ -12,7 +14,11 @@ require (
require ( require (
github.com/fsnotify/fsnotify v1.8.0 // indirect github.com/fsnotify/fsnotify v1.8.0 // indirect
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/pelletier/go-toml/v2 v2.2.3 // indirect github.com/pelletier/go-toml/v2 v2.2.3 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/sagikazarmark/locafero v0.8.0 // indirect github.com/sagikazarmark/locafero v0.8.0 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect
github.com/spf13/afero v1.14.0 // indirect github.com/spf13/afero v1.14.0 // indirect
@@ -21,5 +27,6 @@ require (
github.com/subosito/gotenv v1.6.0 // indirect github.com/subosito/gotenv v1.6.0 // indirect
go.uber.org/multierr v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect
golang.org/x/sys v0.31.0 // indirect golang.org/x/sys v0.31.0 // indirect
golang.org/x/term v0.28.0 // indirect
golang.org/x/text v0.23.0 // indirect golang.org/x/text v0.23.0 // indirect
) )

21
go.sum
View File

@@ -1,6 +1,10 @@
github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM=
github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
@@ -13,16 +17,29 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI=
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs=
github.com/sagikazarmark/locafero v0.8.0 h1:mXaMVw7IqxNBxfv3LdWt9MDmcWDQ1fagDH918lOdVaQ= github.com/sagikazarmark/locafero v0.8.0 h1:mXaMVw7IqxNBxfv3LdWt9MDmcWDQ1fagDH918lOdVaQ=
github.com/sagikazarmark/locafero v0.8.0/go.mod h1:UBUyz37V+EdMS3hDF3QWIiVr/2dPrx49OMO0Bn0hJqk= github.com/sagikazarmark/locafero v0.8.0/go.mod h1:UBUyz37V+EdMS3hDF3QWIiVr/2dPrx49OMO0Bn0hJqk=
github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA=
github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
@@ -45,8 +62,12 @@ github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSW
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg=
golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek=
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

152
main.go
View File

@@ -4,152 +4,46 @@ package main
import ( import (
"context" "context"
"flag"
"fmt"
"os" "os"
"path/filepath"
"runtime"
"sync"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
)
var ( "github.com/ivuorinen/gibidify/cli"
sourceDir string "github.com/ivuorinen/gibidify/config"
destination string
prefix string
suffix string
concurrency int
format string
) )
func init() {
flag.StringVar(&sourceDir, "source", "", "Source directory to scan recursively")
flag.StringVar(&destination, "destination", "", "Output file to write aggregated code")
flag.StringVar(&prefix, "prefix", "", "Text to add at the beginning of the output file")
flag.StringVar(&suffix, "suffix", "", "Text to add at the end of the output file")
flag.StringVar(&format, "format", "markdown", "Output format (json, markdown, yaml)")
flag.IntVar(&concurrency, "concurrency", runtime.NumCPU(), "Number of concurrent workers (default: number of CPU cores)")
}
func main() { func main() {
// Initialize UI for error handling
ui := cli.NewUIManager()
errorFormatter := cli.NewErrorFormatter(ui)
// In production, use a background context. // In production, use a background context.
if err := run(context.Background()); err != nil { if err := run(context.Background()); err != nil {
fmt.Println("Error:", err) // Handle errors with better formatting and suggestions
os.Exit(1) if cli.IsUserError(err) {
errorFormatter.FormatError(err)
os.Exit(1)
} else {
// System errors still go to logrus for debugging
logrus.Errorf("System error: %v", err)
ui.PrintError("An unexpected error occurred. Please check the logs.")
os.Exit(2)
}
} }
} }
// Run executes the main logic of the CLI application using the provided context. // Run executes the main logic of the CLI application using the provided context.
func run(ctx context.Context) error { func run(ctx context.Context) error {
flag.Parse() // Parse CLI flags
flags, err := cli.ParseFlags()
if err := validateFlags(); err != nil { if err != nil {
return err
}
if err := setDestination(); err != nil {
return err return err
} }
// Load configuration
config.LoadConfig() config.LoadConfig()
logrus.Infof( // Create and run processor
"Starting gibidify. Format: %s, Source: %s, Destination: %s, Workers: %d", processor := cli.NewProcessor(flags)
format, return processor.Process(ctx)
sourceDir,
destination,
concurrency,
)
files, err := fileproc.CollectFiles(sourceDir)
if err != nil {
return fmt.Errorf("error collecting files: %w", err)
}
logrus.Infof("Found %d files to process", len(files))
outFile, err := os.Create(destination)
if err != nil {
return fmt.Errorf("failed to create output file %s: %w", destination, err)
}
defer func(outFile *os.File) {
if err := outFile.Close(); err != nil {
logrus.Errorf("Error closing output file: %v", err)
}
}(outFile)
fileCh := make(chan string)
writeCh := make(chan fileproc.WriteRequest)
writerDone := make(chan struct{})
go fileproc.StartWriter(outFile, writeCh, writerDone, format, prefix, suffix)
var wg sync.WaitGroup
startWorkers(ctx, &wg, fileCh, writeCh)
for _, fp := range files {
select {
case <-ctx.Done():
close(fileCh)
return ctx.Err()
case fileCh <- fp:
}
}
close(fileCh)
wg.Wait()
close(writeCh)
<-writerDone
logrus.Infof("Processing completed. Output saved to %s", destination)
return nil
}
func validateFlags() error {
if sourceDir == "" {
return fmt.Errorf("usage: gibidify -source <source_directory> [--destination <output_file>] [--format=json|yaml|markdown] ")
}
return nil
}
func setDestination() error {
if destination == "" {
absRoot, err := filepath.Abs(sourceDir)
if err != nil {
return fmt.Errorf("failed to get absolute path for %s: %w", sourceDir, err)
}
baseName := filepath.Base(absRoot)
if baseName == "." || baseName == "" {
baseName = "output"
}
destination = baseName + "." + format
}
return nil
}
func startWorkers(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) {
for i := 0; i < concurrency; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for {
select {
case <-ctx.Done():
return
case filePath, ok := <-fileCh:
if !ok {
return
}
absRoot, err := filepath.Abs(sourceDir)
if err != nil {
logrus.Errorf("Failed to get absolute path for %s: %v", sourceDir, err)
return
}
fileproc.ProcessFile(filePath, writeCh, absRoot)
}
}
}()
}
} }

View File

@@ -4,139 +4,103 @@ import (
"context" "context"
"fmt" "fmt"
"os" "os"
"path/filepath"
"strings"
"testing" "testing"
"time" "time"
"github.com/ivuorinen/gibidify/testutil"
)
const (
testFileCount = 1000
) )
// TestIntegrationFullCLI simulates a full run of the CLI application using adaptive concurrency. // TestIntegrationFullCLI simulates a full run of the CLI application using adaptive concurrency.
func TestIntegrationFullCLI(t *testing.T) { func TestIntegrationFullCLI(t *testing.T) {
// Create a temporary source directory and populate it with test files. srcDir := setupTestFiles(t)
srcDir, err := os.MkdirTemp("", "gibidify_src") outFilePath := setupOutputFile(t)
if err != nil { setupCLIArgs(srcDir, outFilePath)
t.Fatalf("Failed to create temp source directory: %v", err)
}
defer func() {
if err := os.RemoveAll(srcDir); err != nil {
t.Fatalf("cleanup failed: %v", err)
}
}()
// Create two test files.
file1 := filepath.Join(srcDir, "file1.txt")
if err := os.WriteFile(file1, []byte("Hello World"), 0644); err != nil {
t.Fatalf("Failed to write file1: %v", err)
}
file2 := filepath.Join(srcDir, "file2.go")
if err := os.WriteFile(file2, []byte("package main\nfunc main() {}"), 0644); err != nil {
t.Fatalf("Failed to write file2: %v", err)
}
// Create a temporary output file.
outFile, err := os.CreateTemp("", "gibidify_output.txt")
if err != nil {
t.Fatalf("Failed to create temp output file: %v", err)
}
outFilePath := outFile.Name()
if err := outFile.Close(); err != nil {
t.Fatalf("close temp file: %v", err)
}
defer func() {
if err := os.Remove(outFilePath); err != nil {
t.Fatalf("cleanup output file: %v", err)
}
}()
// Set up CLI arguments.
os.Args = []string{
"gibidify",
"-source", srcDir,
"-destination", outFilePath,
"-prefix", "PREFIX",
"-suffix", "SUFFIX",
"-concurrency", "2", // For testing, set concurrency to 2.
}
// Run the application with a background context. // Run the application with a background context.
ctx := context.Background() ctx := t.Context()
if err := run(ctx); err != nil { if runErr := run(ctx); runErr != nil {
t.Fatalf("Run failed: %v", err) t.Fatalf("Run failed: %v", runErr)
} }
// Verify the output file contains the expected prefix, file contents, and suffix. verifyOutput(t, outFilePath)
}
// setupTestFiles creates test files and returns the source directory.
func setupTestFiles(t *testing.T) string {
t.Helper()
srcDir := t.TempDir()
// Create two test files.
testutil.CreateTestFiles(t, srcDir, []testutil.FileSpec{
{Name: "file1.txt", Content: "Hello World"},
{Name: "file2.go", Content: "package main\nfunc main() {}"},
})
return srcDir
}
// setupOutputFile creates a temporary output file and returns its path.
func setupOutputFile(t *testing.T) string {
t.Helper()
outFile, outFilePath := testutil.CreateTempOutputFile(t, "gibidify_output.txt")
testutil.CloseFile(t, outFile)
return outFilePath
}
// setupCLIArgs configures the CLI arguments for testing.
func setupCLIArgs(srcDir, outFilePath string) {
testutil.SetupCLIArgs(srcDir, outFilePath, "PREFIX", "SUFFIX", 2)
}
// verifyOutput checks that the output file contains expected content.
func verifyOutput(t *testing.T, outFilePath string) {
t.Helper()
data, err := os.ReadFile(outFilePath) data, err := os.ReadFile(outFilePath)
if err != nil { if err != nil {
t.Fatalf("Failed to read output file: %v", err) t.Fatalf("Failed to read output file: %v", err)
} }
output := string(data) output := string(data)
if !strings.Contains(output, "PREFIX") { testutil.VerifyContentContains(t, output, []string{"PREFIX", "Hello World", "SUFFIX"})
t.Error("Output missing prefix")
}
if !strings.Contains(output, "Hello World") {
t.Error("Output missing content from file1.txt")
}
if !strings.Contains(output, "SUFFIX") {
t.Error("Output missing suffix")
}
} }
// TestIntegrationCancellation verifies that the application correctly cancels processing when the context times out. // TestIntegrationCancellation verifies that the application correctly cancels processing when the context times out.
func TestIntegrationCancellation(t *testing.T) { func TestIntegrationCancellation(t *testing.T) {
// Create a temporary source directory with many files to simulate a long-running process. // Create a temporary source directory with many files to simulate a long-running process.
srcDir, err := os.MkdirTemp("", "gibidify_src_long") srcDir := t.TempDir()
if err != nil {
t.Fatalf("Failed to create temp source directory: %v", err)
}
defer func() {
if err := os.RemoveAll(srcDir); err != nil {
t.Fatalf("cleanup failed: %v", err)
}
}()
// Create a large number of small files. // Create a large number of small files.
for i := 0; i < 1000; i++ { for i := range testFileCount {
filePath := filepath.Join(srcDir, fmt.Sprintf("file%d.txt", i)) fileName := fmt.Sprintf("file%d.txt", i)
if err := os.WriteFile(filePath, []byte("Content"), 0644); err != nil { testutil.CreateTestFile(t, srcDir, fileName, []byte("Content"))
t.Fatalf("Failed to write %s: %v", filePath, err)
}
} }
// Create a temporary output file. // Create a temporary output file.
outFile, err := os.CreateTemp("", "gibidify_output.txt") outFile, outFilePath := testutil.CreateTempOutputFile(t, "gibidify_output.txt")
if err != nil { testutil.CloseFile(t, outFile)
t.Fatalf("Failed to create temp output file: %v", err)
}
outFilePath := outFile.Name()
if err := outFile.Close(); err != nil {
t.Fatalf("close temp file: %v", err)
}
defer func() { defer func() {
if err := os.Remove(outFilePath); err != nil { if removeErr := os.Remove(outFilePath); removeErr != nil {
t.Fatalf("cleanup output file: %v", err) t.Fatalf("cleanup output file: %v", removeErr)
} }
}() }()
// Set up CLI arguments. // Set up CLI arguments.
os.Args = []string{ testutil.SetupCLIArgs(srcDir, outFilePath, "PREFIX", "SUFFIX", 2)
"gibidify",
"-source", srcDir,
"-destination", outFilePath,
"-prefix", "PREFIX",
"-suffix", "SUFFIX",
"-concurrency", "2",
}
// Create a context with a very short timeout to force cancellation. // Create a context with a very short timeout to force cancellation.
ctx, cancel := context.WithTimeout( ctx, cancel := context.WithTimeout(
context.Background(), t.Context(),
10*time.Millisecond, 1*time.Millisecond,
) )
defer cancel() defer cancel()
// Run the application; we expect an error due to cancellation. // Run the application; we expect an error due to cancellation.
err = run(ctx) runErr := run(ctx)
if err == nil { if runErr == nil {
t.Error("Expected Run to fail due to cancellation, but it succeeded") t.Error("Expected Run to fail due to cancellation, but it succeeded")
} }
} }

117
testutil/testutil.go Normal file
View File

@@ -0,0 +1,117 @@
// Package testutil provides common testing utilities and helper functions.
package testutil
import (
"os"
"path/filepath"
"strconv"
"strings"
"testing"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/config"
)
const (
// FilePermission is the default file permission for test files.
FilePermission = 0o644
// DirPermission is the default directory permission for test directories.
DirPermission = 0o755
)
// CreateTestFile creates a test file with the given content and returns its path.
func CreateTestFile(t *testing.T, dir, filename string, content []byte) string {
t.Helper()
filePath := filepath.Join(dir, filename)
if err := os.WriteFile(filePath, content, FilePermission); err != nil {
t.Fatalf("Failed to write file %s: %v", filePath, err)
}
return filePath
}
// CreateTempOutputFile creates a temporary output file and returns the file handle and path.
func CreateTempOutputFile(t *testing.T, pattern string) (file *os.File, path string) {
t.Helper()
outFile, err := os.CreateTemp(t.TempDir(), pattern)
if err != nil {
t.Fatalf("Failed to create temp output file: %v", err)
}
path = outFile.Name()
return outFile, path
}
// CreateTestDirectory creates a test directory and returns its path.
func CreateTestDirectory(t *testing.T, parent, name string) string {
t.Helper()
dirPath := filepath.Join(parent, name)
if err := os.Mkdir(dirPath, DirPermission); err != nil {
t.Fatalf("Failed to create directory %s: %v", dirPath, err)
}
return dirPath
}
// FileSpec represents a file specification for creating test files.
type FileSpec struct {
Name string
Content string
}
// CreateTestFiles creates multiple test files from specifications.
func CreateTestFiles(t *testing.T, rootDir string, fileSpecs []FileSpec) []string {
t.Helper()
createdFiles := make([]string, 0, len(fileSpecs))
for _, spec := range fileSpecs {
filePath := CreateTestFile(t, rootDir, spec.Name, []byte(spec.Content))
createdFiles = append(createdFiles, filePath)
}
return createdFiles
}
// ResetViperConfig resets Viper configuration and optionally sets a config path.
func ResetViperConfig(t *testing.T, configPath string) {
t.Helper()
viper.Reset()
if configPath != "" {
viper.AddConfigPath(configPath)
}
config.LoadConfig()
}
// SetupCLIArgs configures os.Args for CLI testing.
func SetupCLIArgs(srcDir, outFilePath, prefix, suffix string, concurrency int) {
os.Args = []string{
"gibidify",
"-source", srcDir,
"-destination", outFilePath,
"-prefix", prefix,
"-suffix", suffix,
"-concurrency", strconv.Itoa(concurrency),
}
}
// VerifyContentContains checks that content contains all expected substrings.
func VerifyContentContains(t *testing.T, content string, expectedSubstrings []string) {
t.Helper()
for _, expected := range expectedSubstrings {
if !strings.Contains(content, expected) {
t.Errorf("Content missing expected substring: %s", expected)
}
}
}
// MustSucceed fails the test if the error is not nil.
func MustSucceed(t *testing.T, err error, operation string) {
t.Helper()
if err != nil {
t.Fatalf("Operation %s failed: %v", operation, err)
}
}
// CloseFile closes a file and reports errors to the test.
func CloseFile(t *testing.T, file *os.File) {
t.Helper()
if err := file.Close(); err != nil {
t.Errorf("Failed to close file: %v", err)
}
}

591
testutil/testutil_test.go Normal file
View File

@@ -0,0 +1,591 @@
package testutil
import (
"errors"
"os"
"path/filepath"
"strings"
"testing"
"github.com/spf13/viper"
)
func TestCreateTestFile(t *testing.T) {
tests := []struct {
name string
dir string
filename string
content []byte
wantErr bool
}{
{
name: "create simple test file",
filename: "test.txt",
content: []byte("hello world"),
wantErr: false,
},
{
name: "create file with empty content",
filename: "empty.txt",
content: []byte{},
wantErr: false,
},
{
name: "create file with binary content",
filename: "binary.bin",
content: []byte{0x00, 0xFF, 0x42},
wantErr: false,
},
{
name: "create file with subdirectory",
filename: "subdir/test.txt",
content: []byte("nested file"),
wantErr: false,
},
{
name: "create file with special characters",
filename: "special-file_123.go",
content: []byte("package main"),
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Use a temporary directory for each test
tempDir := t.TempDir()
if tt.dir == "" {
tt.dir = tempDir
}
// Create subdirectory if needed
if strings.Contains(tt.filename, "/") {
subdir := filepath.Join(tt.dir, filepath.Dir(tt.filename))
if err := os.MkdirAll(subdir, DirPermission); err != nil {
t.Fatalf("Failed to create subdirectory: %v", err)
}
}
// Test CreateTestFile
filePath := CreateTestFile(t, tt.dir, tt.filename, tt.content)
// Verify file exists
info, err := os.Stat(filePath)
if err != nil {
t.Fatalf("Created file does not exist: %v", err)
}
// Verify it's a regular file
if !info.Mode().IsRegular() {
t.Errorf("Created path is not a regular file")
}
// Verify permissions
if info.Mode().Perm() != FilePermission {
t.Errorf("File permissions = %v, want %v", info.Mode().Perm(), FilePermission)
}
// Verify content
readContent, err := os.ReadFile(filePath)
if err != nil {
t.Fatalf("Failed to read created file: %v", err)
}
if string(readContent) != string(tt.content) {
t.Errorf("File content = %q, want %q", readContent, tt.content)
}
})
}
}
func TestCreateTempOutputFile(t *testing.T) {
tests := []struct {
name string
pattern string
}{
{
name: "simple pattern",
pattern: "output-*.txt",
},
{
name: "pattern with prefix only",
pattern: "test-",
},
{
name: "pattern with suffix only",
pattern: "*.json",
},
{
name: "empty pattern",
pattern: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
file, path := CreateTempOutputFile(t, tt.pattern)
defer CloseFile(t, file)
// Verify file exists
info, err := os.Stat(path)
if err != nil {
t.Fatalf("Temp file does not exist: %v", err)
}
// Verify it's a regular file
if !info.Mode().IsRegular() {
t.Errorf("Created path is not a regular file")
}
// Verify we can write to it
testContent := []byte("test content")
if _, err := file.Write(testContent); err != nil {
t.Errorf("Failed to write to temp file: %v", err)
}
// Verify the path is in a temp directory (any temp directory)
if !strings.Contains(path, os.TempDir()) {
t.Errorf("Temp file not in temp directory: %s", path)
}
})
}
}
func TestCreateTestDirectory(t *testing.T) {
tests := []struct {
name string
parent string
dir string
}{
{
name: "simple directory",
dir: "testdir",
},
{
name: "directory with special characters",
dir: "test-dir_123",
},
{
name: "nested directory name",
dir: "nested/dir",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tempDir := t.TempDir()
if tt.parent == "" {
tt.parent = tempDir
}
// For nested directories, create parent first
if strings.Contains(tt.dir, "/") {
parentPath := filepath.Join(tt.parent, filepath.Dir(tt.dir))
if err := os.MkdirAll(parentPath, DirPermission); err != nil {
t.Fatalf("Failed to create parent directory: %v", err)
}
tt.dir = filepath.Base(tt.dir)
tt.parent = parentPath
}
dirPath := CreateTestDirectory(t, tt.parent, tt.dir)
// Verify directory exists
info, err := os.Stat(dirPath)
if err != nil {
t.Fatalf("Created directory does not exist: %v", err)
}
// Verify it's a directory
if !info.IsDir() {
t.Errorf("Created path is not a directory")
}
// Verify permissions
if info.Mode().Perm() != DirPermission {
t.Errorf("Directory permissions = %v, want %v", info.Mode().Perm(), DirPermission)
}
// Verify we can create files in it
testFile := filepath.Join(dirPath, "test.txt")
if err := os.WriteFile(testFile, []byte("test"), FilePermission); err != nil {
t.Errorf("Cannot create file in directory: %v", err)
}
})
}
}
func TestCreateTestFiles(t *testing.T) {
tests := []struct {
name string
fileSpecs []FileSpec
wantCount int
}{
{
name: "create multiple files",
fileSpecs: []FileSpec{
{Name: "file1.txt", Content: "content1"},
{Name: "file2.go", Content: "package main"},
{Name: "file3.json", Content: `{"key": "value"}`},
},
wantCount: 3,
},
{
name: "create files with subdirectories",
fileSpecs: []FileSpec{
{Name: "src/main.go", Content: "package main"},
{Name: "test/test.go", Content: "package test"},
},
wantCount: 2,
},
{
name: "empty file specs",
fileSpecs: []FileSpec{},
wantCount: 0,
},
{
name: "files with empty content",
fileSpecs: []FileSpec{
{Name: "empty1.txt", Content: ""},
{Name: "empty2.txt", Content: ""},
},
wantCount: 2,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rootDir := t.TempDir()
// Create necessary subdirectories
for _, spec := range tt.fileSpecs {
if strings.Contains(spec.Name, "/") {
subdir := filepath.Join(rootDir, filepath.Dir(spec.Name))
if err := os.MkdirAll(subdir, DirPermission); err != nil {
t.Fatalf("Failed to create subdirectory: %v", err)
}
}
}
createdFiles := CreateTestFiles(t, rootDir, tt.fileSpecs)
// Verify count
if len(createdFiles) != tt.wantCount {
t.Errorf("Created %d files, want %d", len(createdFiles), tt.wantCount)
}
// Verify each file
for i, filePath := range createdFiles {
content, err := os.ReadFile(filePath)
if err != nil {
t.Errorf("Failed to read file %s: %v", filePath, err)
continue
}
if string(content) != tt.fileSpecs[i].Content {
t.Errorf("File %s content = %q, want %q", filePath, content, tt.fileSpecs[i].Content)
}
}
})
}
}
func TestResetViperConfig(t *testing.T) {
tests := []struct {
name string
configPath string
preSetup func()
verify func(t *testing.T)
}{
{
name: "reset with empty config path",
configPath: "",
preSetup: func() {
viper.Set("test.key", "value")
},
verify: func(t *testing.T) {
if viper.IsSet("test.key") {
t.Error("Viper config not reset properly")
}
},
},
{
name: "reset with config path",
configPath: t.TempDir(),
preSetup: func() {
viper.Set("test.key", "value")
},
verify: func(t *testing.T) {
if viper.IsSet("test.key") {
t.Error("Viper config not reset properly")
}
// Verify config path was added
paths := viper.ConfigFileUsed()
if paths == "" {
// This is expected as no config file exists
return
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.preSetup()
ResetViperConfig(t, tt.configPath)
tt.verify(t)
})
}
}
func TestSetupCLIArgs(t *testing.T) {
// Save original args
originalArgs := os.Args
defer func() {
os.Args = originalArgs
}()
tests := []struct {
name string
srcDir string
outFile string
prefix string
suffix string
concurrency int
wantLen int
}{
{
name: "basic CLI args",
srcDir: "/src",
outFile: "/out.txt",
prefix: "PREFIX",
suffix: "SUFFIX",
concurrency: 4,
wantLen: 11,
},
{
name: "empty strings",
srcDir: "",
outFile: "",
prefix: "",
suffix: "",
concurrency: 1,
wantLen: 11,
},
{
name: "special characters in args",
srcDir: "/path with spaces/src",
outFile: "/path/to/output file.txt",
prefix: "Prefix with\nnewline",
suffix: "Suffix with\ttab",
concurrency: 8,
wantLen: 11,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
SetupCLIArgs(tt.srcDir, tt.outFile, tt.prefix, tt.suffix, tt.concurrency)
if len(os.Args) != tt.wantLen {
t.Errorf("os.Args length = %d, want %d", len(os.Args), tt.wantLen)
}
// Verify specific args
if os.Args[0] != "gibidify" {
t.Errorf("Program name = %s, want gibidify", os.Args[0])
}
if os.Args[2] != tt.srcDir {
t.Errorf("Source dir = %s, want %s", os.Args[2], tt.srcDir)
}
if os.Args[4] != tt.outFile {
t.Errorf("Output file = %s, want %s", os.Args[4], tt.outFile)
}
if os.Args[6] != tt.prefix {
t.Errorf("Prefix = %s, want %s", os.Args[6], tt.prefix)
}
if os.Args[8] != tt.suffix {
t.Errorf("Suffix = %s, want %s", os.Args[8], tt.suffix)
}
if os.Args[10] != string(rune(tt.concurrency+'0')) {
t.Errorf("Concurrency = %s, want %d", os.Args[10], tt.concurrency)
}
})
}
}
func TestVerifyContentContains(t *testing.T) {
// Test successful verification
t.Run("all substrings present", func(t *testing.T) {
content := "This is a test file with multiple lines"
VerifyContentContains(t, content, []string{"test file", "multiple lines"})
// If we get here, the test passed
})
// Test empty expected substrings
t.Run("empty expected substrings", func(t *testing.T) {
content := "Any content"
VerifyContentContains(t, content, []string{})
// Should pass with no expected strings
})
// For failure cases, we'll test indirectly by verifying behavior
t.Run("verify error reporting", func(t *testing.T) {
// We can't easily test the failure case directly since it calls t.Errorf
// But we can at least verify the function doesn't panic
defer func() {
if r := recover(); r != nil {
t.Errorf("VerifyContentContains panicked: %v", r)
}
}()
// This would normally fail but we're just checking it doesn't panic
content := "test"
expected := []string{"not found"}
// Create a sub-test that we expect to fail
t.Run("expected_failure", func(t *testing.T) {
t.Skip("Skipping actual failure test")
VerifyContentContains(t, content, expected)
})
})
}
func TestMustSucceed(t *testing.T) {
// Test with nil error (should succeed)
t.Run("nil error", func(t *testing.T) {
MustSucceed(t, nil, "successful operation")
// If we get here, the test passed
})
// Test error behavior without causing test failure
t.Run("verify error handling", func(t *testing.T) {
// We can't test the failure case directly since it calls t.Fatalf
// But we can verify the function exists and is callable
defer func() {
if r := recover(); r != nil {
t.Errorf("MustSucceed panicked: %v", r)
}
}()
// Create a sub-test that we expect to fail
t.Run("expected_failure", func(t *testing.T) {
t.Skip("Skipping actual failure test")
MustSucceed(t, errors.New("test error"), "failed operation")
})
})
}
func TestCloseFile(t *testing.T) {
// Test closing a normal file
t.Run("close normal file", func(t *testing.T) {
file, err := os.CreateTemp(t.TempDir(), "test")
if err != nil {
t.Fatalf("Failed to create test file: %v", err)
}
CloseFile(t, file)
// Verify file is closed by trying to write to it
_, writeErr := file.Write([]byte("test"))
if writeErr == nil {
t.Error("Expected write to fail on closed file")
}
})
// Test that CloseFile doesn't panic on already closed files
// Note: We can't easily test the error case without causing test failure
// since CloseFile calls t.Errorf, which is the expected behavior
t.Run("verify CloseFile function exists and is callable", func(t *testing.T) {
// This test just verifies the function signature and basic functionality
// The error case is tested in integration tests where failures are expected
file, err := os.CreateTemp(t.TempDir(), "test")
if err != nil {
t.Fatalf("Failed to create test file: %v", err)
}
// Test normal case - file should close successfully
CloseFile(t, file)
// Verify file is closed
_, writeErr := file.Write([]byte("test"))
if writeErr == nil {
t.Error("Expected write to fail on closed file")
}
})
}
// Test thread safety of functions that might be called concurrently
func TestConcurrentOperations(t *testing.T) {
tempDir := t.TempDir()
done := make(chan bool)
// Test concurrent file creation
for i := 0; i < 5; i++ {
go func(n int) {
CreateTestFile(t, tempDir, string(rune('a'+n))+".txt", []byte("content"))
done <- true
}(i)
}
// Test concurrent directory creation
for i := 0; i < 5; i++ {
go func(n int) {
CreateTestDirectory(t, tempDir, "dir"+string(rune('0'+n)))
done <- true
}(i)
}
// Wait for all goroutines
for i := 0; i < 10; i++ {
<-done
}
}
// Benchmarks
func BenchmarkCreateTestFile(b *testing.B) {
tempDir := b.TempDir()
content := []byte("benchmark content")
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Use a unique filename for each iteration to avoid conflicts
filename := "bench" + string(rune(i%26+'a')) + ".txt"
filePath := filepath.Join(tempDir, filename)
if err := os.WriteFile(filePath, content, FilePermission); err != nil {
b.Fatalf("Failed to write file: %v", err)
}
}
}
func BenchmarkCreateTestFiles(b *testing.B) {
tempDir := b.TempDir()
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Create specs with unique names for each iteration
specs := []FileSpec{
{Name: "file1_" + string(rune(i%26+'a')) + ".txt", Content: "content1"},
{Name: "file2_" + string(rune(i%26+'a')) + ".txt", Content: "content2"},
{Name: "file3_" + string(rune(i%26+'a')) + ".txt", Content: "content3"},
}
for _, spec := range specs {
filePath := filepath.Join(tempDir, spec.Name)
if err := os.WriteFile(filePath, []byte(spec.Content), FilePermission); err != nil {
b.Fatalf("Failed to write file: %v", err)
}
}
}
}
func BenchmarkVerifyContentContains(b *testing.B) {
content := strings.Repeat("test content with various words ", 100)
expected := []string{"test", "content", "various", "words"}
b.ResetTimer()
for i := 0; i < b.N; i++ {
// We can't use the actual function in benchmark since it needs testing.T
// So we'll benchmark the core logic
for _, exp := range expected {
_ = strings.Contains(content, exp)
}
}
}

228
utils/errors.go Normal file
View File

@@ -0,0 +1,228 @@
// Package utils provides common utility functions.
package utils
import (
"fmt"
"github.com/sirupsen/logrus"
)
// ErrorType represents the category of error.
type ErrorType int
const (
// ErrorTypeUnknown represents an unknown error type.
ErrorTypeUnknown ErrorType = iota
// ErrorTypeCLI represents command-line interface errors.
ErrorTypeCLI
// ErrorTypeFileSystem represents file system operation errors.
ErrorTypeFileSystem
// ErrorTypeProcessing represents file processing errors.
ErrorTypeProcessing
// ErrorTypeConfiguration represents configuration errors.
ErrorTypeConfiguration
// ErrorTypeIO represents input/output errors.
ErrorTypeIO
// ErrorTypeValidation represents validation errors.
ErrorTypeValidation
)
// String returns the string representation of the error type.
func (e ErrorType) String() string {
switch e {
case ErrorTypeCLI:
return "CLI"
case ErrorTypeFileSystem:
return "FileSystem"
case ErrorTypeProcessing:
return "Processing"
case ErrorTypeConfiguration:
return "Configuration"
case ErrorTypeIO:
return "IO"
case ErrorTypeValidation:
return "Validation"
default:
return "Unknown"
}
}
// StructuredError represents a structured error with type, code, and context.
type StructuredError struct {
Type ErrorType
Code string
Message string
Cause error
Context map[string]any
FilePath string
Line int
}
// Error implements the error interface.
func (e *StructuredError) Error() string {
if e.Cause != nil {
return fmt.Sprintf("%s [%s]: %s: %v", e.Type, e.Code, e.Message, e.Cause)
}
return fmt.Sprintf("%s [%s]: %s", e.Type, e.Code, e.Message)
}
// Unwrap returns the underlying cause error.
func (e *StructuredError) Unwrap() error {
return e.Cause
}
// WithContext adds context information to the error.
func (e *StructuredError) WithContext(key string, value any) *StructuredError {
if e.Context == nil {
e.Context = make(map[string]any)
}
e.Context[key] = value
return e
}
// WithFilePath adds file path information to the error.
func (e *StructuredError) WithFilePath(filePath string) *StructuredError {
e.FilePath = filePath
return e
}
// WithLine adds line number information to the error.
func (e *StructuredError) WithLine(line int) *StructuredError {
e.Line = line
return e
}
// NewStructuredError creates a new structured error.
func NewStructuredError(errorType ErrorType, code, message string) *StructuredError {
return &StructuredError{
Type: errorType,
Code: code,
Message: message,
}
}
// NewStructuredErrorf creates a new structured error with formatted message.
func NewStructuredErrorf(errorType ErrorType, code, format string, args ...any) *StructuredError {
return &StructuredError{
Type: errorType,
Code: code,
Message: fmt.Sprintf(format, args...),
}
}
// WrapError wraps an existing error with structured error information.
func WrapError(err error, errorType ErrorType, code, message string) *StructuredError {
return &StructuredError{
Type: errorType,
Code: code,
Message: message,
Cause: err,
}
}
// WrapErrorf wraps an existing error with formatted message.
func WrapErrorf(err error, errorType ErrorType, code, format string, args ...any) *StructuredError {
return &StructuredError{
Type: errorType,
Code: code,
Message: fmt.Sprintf(format, args...),
Cause: err,
}
}
// Common error codes for each type
const (
// CLI Error Codes
CodeCLIMissingSource = "MISSING_SOURCE"
CodeCLIInvalidArgs = "INVALID_ARGS"
// FileSystem Error Codes
CodeFSPathResolution = "PATH_RESOLUTION"
CodeFSPermission = "PERMISSION_DENIED"
CodeFSNotFound = "NOT_FOUND"
CodeFSAccess = "ACCESS_DENIED"
// Processing Error Codes
CodeProcessingFileRead = "FILE_READ"
CodeProcessingCollection = "COLLECTION"
CodeProcessingTraversal = "TRAVERSAL"
CodeProcessingEncode = "ENCODE"
// Configuration Error Codes
CodeConfigValidation = "VALIDATION"
CodeConfigMissing = "MISSING"
// IO Error Codes
CodeIOFileCreate = "FILE_CREATE"
CodeIOFileWrite = "FILE_WRITE"
CodeIOEncoding = "ENCODING"
CodeIOWrite = "WRITE"
CodeIORead = "READ"
CodeIOClose = "CLOSE"
// Validation Error Codes
CodeValidationFormat = "FORMAT"
CodeValidationFileType = "FILE_TYPE"
CodeValidationSize = "SIZE_LIMIT"
)
// Predefined error constructors for common error scenarios
// NewCLIMissingSourceError creates a CLI error for missing source argument.
func NewCLIMissingSourceError() *StructuredError {
return NewStructuredError(ErrorTypeCLI, CodeCLIMissingSource, "usage: gibidify -source <source_directory> [--destination <output_file>] [--format=json|yaml|markdown]")
}
// NewFileSystemError creates a file system error.
func NewFileSystemError(code, message string) *StructuredError {
return NewStructuredError(ErrorTypeFileSystem, code, message)
}
// NewProcessingError creates a processing error.
func NewProcessingError(code, message string) *StructuredError {
return NewStructuredError(ErrorTypeProcessing, code, message)
}
// NewIOError creates an IO error.
func NewIOError(code, message string) *StructuredError {
return NewStructuredError(ErrorTypeIO, code, message)
}
// NewValidationError creates a validation error.
func NewValidationError(code, message string) *StructuredError {
return NewStructuredError(ErrorTypeValidation, code, message)
}
// LogError logs an error with a consistent format if the error is not nil.
// The operation parameter describes what was being attempted.
// Additional context can be provided via the args parameter.
func LogError(operation string, err error, args ...any) {
if err != nil {
msg := operation
if len(args) > 0 {
// Format the operation string with the provided arguments
msg = fmt.Sprintf(operation, args...)
}
// Check if it's a structured error and log with additional context
if structErr, ok := err.(*StructuredError); ok {
logrus.WithFields(logrus.Fields{
"error_type": structErr.Type.String(),
"error_code": structErr.Code,
"context": structErr.Context,
"file_path": structErr.FilePath,
"line": structErr.Line,
}).Errorf("%s: %v", msg, err)
} else {
logrus.Errorf("%s: %v", msg, err)
}
}
}
// LogErrorf logs an error with a formatted message if the error is not nil.
// This is a convenience wrapper around LogError for cases where formatting is needed.
func LogErrorf(err error, format string, args ...any) {
if err != nil {
LogError(format, err, args...)
}
}

242
utils/errors_test.go Normal file
View File

@@ -0,0 +1,242 @@
package utils
import (
"bytes"
"errors"
"fmt"
"strings"
"testing"
"github.com/sirupsen/logrus"
)
// captureLogOutput captures logrus output for testing
func captureLogOutput(f func()) string {
var buf bytes.Buffer
logrus.SetOutput(&buf)
defer logrus.SetOutput(logrus.StandardLogger().Out)
f()
return buf.String()
}
func TestLogError(t *testing.T) {
tests := []struct {
name string
operation string
err error
args []any
wantLog string
wantEmpty bool
}{
{
name: "nil error should not log",
operation: "test operation",
err: nil,
args: nil,
wantEmpty: true,
},
{
name: "basic error logging",
operation: "failed to read file",
err: errors.New("permission denied"),
args: nil,
wantLog: "failed to read file: permission denied",
},
{
name: "error with formatting args",
operation: "failed to process file %s",
err: errors.New("file too large"),
args: []any{"test.txt"},
wantLog: "failed to process file test.txt: file too large",
},
{
name: "error with multiple formatting args",
operation: "failed to copy from %s to %s",
err: errors.New("disk full"),
args: []any{"source.txt", "dest.txt"},
wantLog: "failed to copy from source.txt to dest.txt: disk full",
},
{
name: "wrapped error",
operation: "database operation failed",
err: fmt.Errorf("connection error: %w", errors.New("timeout")),
args: nil,
wantLog: "database operation failed: connection error: timeout",
},
{
name: "empty operation string",
operation: "",
err: errors.New("some error"),
args: nil,
wantLog: ": some error",
},
{
name: "operation with percentage sign",
operation: "processing 50% complete",
err: errors.New("interrupted"),
args: nil,
wantLog: "processing 50% complete: interrupted",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
output := captureLogOutput(func() {
LogError(tt.operation, tt.err, tt.args...)
})
if tt.wantEmpty {
if output != "" {
t.Errorf("LogError() logged output when error was nil: %q", output)
}
return
}
if !strings.Contains(output, tt.wantLog) {
t.Errorf("LogError() output = %q, want to contain %q", output, tt.wantLog)
}
// Verify it's logged at ERROR level
if !strings.Contains(output, "level=error") {
t.Errorf("LogError() should log at ERROR level, got: %q", output)
}
})
}
}
func TestLogErrorf(t *testing.T) {
tests := []struct {
name string
err error
format string
args []any
wantLog string
wantEmpty bool
}{
{
name: "nil error should not log",
err: nil,
format: "operation %s failed",
args: []any{"test"},
wantEmpty: true,
},
{
name: "basic formatted error",
err: errors.New("not found"),
format: "file %s not found",
args: []any{"config.yaml"},
wantLog: "file config.yaml not found: not found",
},
{
name: "multiple format arguments",
err: errors.New("invalid range"),
format: "value %d is not between %d and %d",
args: []any{150, 0, 100},
wantLog: "value 150 is not between 0 and 100: invalid range",
},
{
name: "no format arguments",
err: errors.New("generic error"),
format: "operation failed",
args: nil,
wantLog: "operation failed: generic error",
},
{
name: "format with different types",
err: errors.New("type mismatch"),
format: "expected %s but got %d",
args: []any{"string", 42},
wantLog: "expected string but got 42: type mismatch",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
output := captureLogOutput(func() {
LogErrorf(tt.err, tt.format, tt.args...)
})
if tt.wantEmpty {
if output != "" {
t.Errorf("LogErrorf() logged output when error was nil: %q", output)
}
return
}
if !strings.Contains(output, tt.wantLog) {
t.Errorf("LogErrorf() output = %q, want to contain %q", output, tt.wantLog)
}
// Verify it's logged at ERROR level
if !strings.Contains(output, "level=error") {
t.Errorf("LogErrorf() should log at ERROR level, got: %q", output)
}
})
}
}
func TestLogErrorConcurrency(t *testing.T) {
// Test that LogError is safe for concurrent use
done := make(chan bool)
for i := 0; i < 10; i++ {
go func(n int) {
LogError("concurrent operation", fmt.Errorf("error %d", n))
done <- true
}(i)
}
// Wait for all goroutines to complete
for i := 0; i < 10; i++ {
<-done
}
}
func TestLogErrorfConcurrency(t *testing.T) {
// Test that LogErrorf is safe for concurrent use
done := make(chan bool)
for i := 0; i < 10; i++ {
go func(n int) {
LogErrorf(fmt.Errorf("error %d", n), "concurrent operation %d", n)
done <- true
}(i)
}
// Wait for all goroutines to complete
for i := 0; i < 10; i++ {
<-done
}
}
// BenchmarkLogError benchmarks the LogError function
func BenchmarkLogError(b *testing.B) {
err := errors.New("benchmark error")
// Disable output during benchmark
logrus.SetOutput(bytes.NewBuffer(nil))
defer logrus.SetOutput(logrus.StandardLogger().Out)
b.ResetTimer()
for i := 0; i < b.N; i++ {
LogError("benchmark operation", err)
}
}
// BenchmarkLogErrorf benchmarks the LogErrorf function
func BenchmarkLogErrorf(b *testing.B) {
err := errors.New("benchmark error")
// Disable output during benchmark
logrus.SetOutput(bytes.NewBuffer(nil))
defer logrus.SetOutput(logrus.StandardLogger().Out)
b.ResetTimer()
for i := 0; i < b.N; i++ {
LogErrorf(err, "benchmark operation %d", i)
}
}
// BenchmarkLogErrorNil benchmarks LogError with nil error (no-op case)
func BenchmarkLogErrorNil(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
LogError("benchmark operation", nil)
}
}

26
utils/paths.go Normal file
View File

@@ -0,0 +1,26 @@
// Package utils provides common utility functions.
package utils
import (
"fmt"
"path/filepath"
)
// GetAbsolutePath returns the absolute path for the given path.
// It wraps filepath.Abs with consistent error handling.
func GetAbsolutePath(path string) (string, error) {
abs, err := filepath.Abs(path)
if err != nil {
return "", fmt.Errorf("failed to get absolute path for %s: %w", path, err)
}
return abs, nil
}
// GetBaseName returns the base name for the given path, handling special cases.
func GetBaseName(absPath string) string {
baseName := filepath.Base(absPath)
if baseName == "." || baseName == "" {
return "output"
}
return baseName
}

262
utils/paths_test.go Normal file
View File

@@ -0,0 +1,262 @@
package utils
import (
"os"
"path/filepath"
"runtime"
"strings"
"testing"
)
func TestGetAbsolutePath(t *testing.T) {
// Get current working directory for tests
cwd, err := os.Getwd()
if err != nil {
t.Fatalf("Failed to get current directory: %v", err)
}
tests := []struct {
name string
path string
wantPrefix string
wantErr bool
wantErrMsg string
skipWindows bool
}{
{
name: "absolute path unchanged",
path: cwd,
wantPrefix: cwd,
wantErr: false,
},
{
name: "relative path current directory",
path: ".",
wantPrefix: cwd,
wantErr: false,
},
{
name: "relative path parent directory",
path: "..",
wantPrefix: filepath.Dir(cwd),
wantErr: false,
},
{
name: "relative path with file",
path: "test.txt",
wantPrefix: filepath.Join(cwd, "test.txt"),
wantErr: false,
},
{
name: "relative path with subdirectory",
path: "subdir/file.go",
wantPrefix: filepath.Join(cwd, "subdir", "file.go"),
wantErr: false,
},
{
name: "empty path",
path: "",
wantPrefix: cwd,
wantErr: false,
},
{
name: "path with tilde",
path: "~/test",
wantPrefix: filepath.Join(cwd, "~", "test"),
wantErr: false,
skipWindows: false,
},
{
name: "path with multiple separators",
path: "path//to///file",
wantPrefix: filepath.Join(cwd, "path", "to", "file"),
wantErr: false,
},
{
name: "path with trailing separator",
path: "path/",
wantPrefix: filepath.Join(cwd, "path"),
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.skipWindows && runtime.GOOS == "windows" {
t.Skip("Skipping test on Windows")
}
got, err := GetAbsolutePath(tt.path)
if tt.wantErr {
if err == nil {
t.Errorf("GetAbsolutePath() error = nil, wantErr %v", tt.wantErr)
return
}
if tt.wantErrMsg != "" && !strings.Contains(err.Error(), tt.wantErrMsg) {
t.Errorf("GetAbsolutePath() error = %v, want error containing %v", err, tt.wantErrMsg)
}
return
}
if err != nil {
t.Errorf("GetAbsolutePath() unexpected error = %v", err)
return
}
// Clean the expected path for comparison
wantClean := filepath.Clean(tt.wantPrefix)
gotClean := filepath.Clean(got)
if gotClean != wantClean {
t.Errorf("GetAbsolutePath() = %v, want %v", gotClean, wantClean)
}
// Verify the result is actually absolute
if !filepath.IsAbs(got) {
t.Errorf("GetAbsolutePath() returned non-absolute path: %v", got)
}
})
}
}
func TestGetAbsolutePathSpecialCases(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("Skipping Unix-specific tests on Windows")
}
tests := []struct {
name string
setup func() (string, func())
path string
wantErr bool
}{
{
name: "symlink to directory",
setup: func() (string, func()) {
tmpDir := t.TempDir()
target := filepath.Join(tmpDir, "target")
link := filepath.Join(tmpDir, "link")
if err := os.Mkdir(target, 0o755); err != nil {
t.Fatalf("Failed to create target directory: %v", err)
}
if err := os.Symlink(target, link); err != nil {
t.Fatalf("Failed to create symlink: %v", err)
}
return link, func() {}
},
path: "",
wantErr: false,
},
{
name: "broken symlink",
setup: func() (string, func()) {
tmpDir := t.TempDir()
link := filepath.Join(tmpDir, "broken_link")
if err := os.Symlink("/nonexistent/path", link); err != nil {
t.Fatalf("Failed to create broken symlink: %v", err)
}
return link, func() {}
},
path: "",
wantErr: false, // filepath.Abs still works with broken symlinks
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
path, cleanup := tt.setup()
defer cleanup()
if tt.path == "" {
tt.path = path
}
got, err := GetAbsolutePath(tt.path)
if (err != nil) != tt.wantErr {
t.Errorf("GetAbsolutePath() error = %v, wantErr %v", err, tt.wantErr)
return
}
if err == nil && !filepath.IsAbs(got) {
t.Errorf("GetAbsolutePath() returned non-absolute path: %v", got)
}
})
}
}
func TestGetAbsolutePathConcurrency(t *testing.T) {
// Test that GetAbsolutePath is safe for concurrent use
paths := []string{".", "..", "test.go", "subdir/file.txt", "/tmp/test"}
done := make(chan bool)
for _, p := range paths {
go func(path string) {
_, _ = GetAbsolutePath(path)
done <- true
}(p)
}
// Wait for all goroutines to complete
for range paths {
<-done
}
}
func TestGetAbsolutePathErrorFormatting(t *testing.T) {
// This test verifies error message formatting
// We need to trigger an actual error from filepath.Abs
// On Unix systems, we can't easily trigger filepath.Abs errors
// so we'll just verify the error wrapping works correctly
// Create a test that would fail if filepath.Abs returns an error
path := "test/path"
got, err := GetAbsolutePath(path)
if err != nil {
// If we somehow get an error, verify it's properly formatted
if !strings.Contains(err.Error(), "failed to get absolute path for") {
t.Errorf("Error message format incorrect: %v", err)
}
if !strings.Contains(err.Error(), path) {
t.Errorf("Error message should contain original path: %v", err)
}
} else {
// Normal case - just verify we got a valid absolute path
if !filepath.IsAbs(got) {
t.Errorf("Expected absolute path, got: %v", got)
}
}
}
// BenchmarkGetAbsolutePath benchmarks the GetAbsolutePath function
func BenchmarkGetAbsolutePath(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = GetAbsolutePath("test/path/file.go")
}
}
// BenchmarkGetAbsolutePathAbs benchmarks with already absolute path
func BenchmarkGetAbsolutePathAbs(b *testing.B) {
absPath := "/home/user/test/file.go"
if runtime.GOOS == "windows" {
absPath = "C:\\Users\\test\\file.go"
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = GetAbsolutePath(absPath)
}
}
// BenchmarkGetAbsolutePathCurrent benchmarks with current directory
func BenchmarkGetAbsolutePathCurrent(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = GetAbsolutePath(".")
}
}