// Package benchmark provides benchmarking infrastructure for gibidify. package benchmark import ( "context" "fmt" "os" "path/filepath" "runtime" "sync" "time" "github.com/ivuorinen/gibidify/config" "github.com/ivuorinen/gibidify/fileproc" "github.com/ivuorinen/gibidify/shared" ) // Result represents the results of a benchmark run. type Result struct { Name string Duration time.Duration FilesProcessed int BytesProcessed int64 FilesPerSecond float64 BytesPerSecond float64 MemoryUsage MemoryStats CPUUsage CPUStats } // MemoryStats represents memory usage statistics. type MemoryStats struct { AllocMB float64 SysMB float64 NumGC uint32 PauseTotalNs uint64 } // CPUStats represents CPU usage statistics. type CPUStats struct { UserTime time.Duration SystemTime time.Duration Goroutines int } // Suite represents a collection of benchmarks. type Suite struct { Name string Results []Result } // buildBenchmarkResult constructs a Result with all metrics calculated. // This eliminates code duplication across benchmark functions. func buildBenchmarkResult( name string, files []string, totalBytes int64, duration time.Duration, memBefore, memAfter runtime.MemStats, ) *Result { result := &Result{ Name: name, Duration: duration, FilesProcessed: len(files), BytesProcessed: totalBytes, } // Calculate rates with zero-division guard secs := duration.Seconds() if secs == 0 { result.FilesPerSecond = 0 result.BytesPerSecond = 0 } else { result.FilesPerSecond = float64(len(files)) / secs result.BytesPerSecond = float64(totalBytes) / secs } result.MemoryUsage = MemoryStats{ AllocMB: shared.SafeMemoryDiffMB(memAfter.Alloc, memBefore.Alloc), SysMB: shared.SafeMemoryDiffMB(memAfter.Sys, memBefore.Sys), NumGC: memAfter.NumGC - memBefore.NumGC, PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs, } result.CPUUsage = CPUStats{ Goroutines: runtime.NumGoroutine(), } return result } // FileCollectionBenchmark benchmarks file collection operations. func FileCollectionBenchmark(sourceDir string, numFiles int) (*Result, error) { // Load configuration to ensure proper file filtering config.LoadConfig() // Create temporary directory with test files if no source is provided var cleanup func() if sourceDir == "" { tempDir, cleanupFunc, err := createBenchmarkFiles(numFiles) if err != nil { return nil, shared.WrapError( err, shared.ErrorTypeFileSystem, shared.CodeFSAccess, shared.BenchmarkMsgFailedToCreateFiles, ) } cleanup = cleanupFunc //nolint:errcheck // Benchmark output, errors don't affect results defer cleanup() sourceDir = tempDir } // Measure memory before var memBefore runtime.MemStats runtime.ReadMemStats(&memBefore) startTime := time.Now() // Run the file collection benchmark files, err := fileproc.CollectFiles(sourceDir) if err != nil { return nil, shared.WrapError( err, shared.ErrorTypeProcessing, shared.CodeProcessingCollection, shared.BenchmarkMsgCollectionFailed, ) } duration := time.Since(startTime) // Measure memory after var memAfter runtime.MemStats runtime.ReadMemStats(&memAfter) // Calculate total bytes processed var totalBytes int64 for _, file := range files { if info, err := os.Stat(file); err == nil { totalBytes += info.Size() } } result := buildBenchmarkResult("FileCollection", files, totalBytes, duration, memBefore, memAfter) return result, nil } // FileProcessingBenchmark benchmarks full file processing pipeline. func FileProcessingBenchmark(sourceDir string, format string, concurrency int) (*Result, error) { // Load configuration to ensure proper file filtering config.LoadConfig() var cleanup func() if sourceDir == "" { // Create temporary directory with test files tempDir, cleanupFunc, err := createBenchmarkFiles(shared.BenchmarkDefaultFileCount) if err != nil { return nil, shared.WrapError( err, shared.ErrorTypeFileSystem, shared.CodeFSAccess, shared.BenchmarkMsgFailedToCreateFiles, ) } cleanup = cleanupFunc //nolint:errcheck // Benchmark output, errors don't affect results defer cleanup() sourceDir = tempDir } // Create temporary output file outputFile, err := os.CreateTemp("", "benchmark_output_*."+format) if err != nil { return nil, shared.WrapError( err, shared.ErrorTypeIO, shared.CodeIOFileCreate, "failed to create benchmark output file", ) } defer func() { if err := outputFile.Close(); err != nil { //nolint:errcheck // Warning message in defer, failure doesn't affect benchmark _, _ = fmt.Printf("Warning: failed to close benchmark output file: %v\n", err) } if err := os.Remove(outputFile.Name()); err != nil { //nolint:errcheck // Warning message in defer, failure doesn't affect benchmark _, _ = fmt.Printf("Warning: failed to remove benchmark output file: %v\n", err) } }() // Measure memory before var memBefore runtime.MemStats runtime.ReadMemStats(&memBefore) startTime := time.Now() // Run the full processing pipeline files, err := fileproc.CollectFiles(sourceDir) if err != nil { return nil, shared.WrapError( err, shared.ErrorTypeProcessing, shared.CodeProcessingCollection, shared.BenchmarkMsgCollectionFailed, ) } // Process files with concurrency err = runProcessingPipeline(context.Background(), files, outputFile, format, concurrency, sourceDir) if err != nil { return nil, shared.WrapError( err, shared.ErrorTypeProcessing, shared.CodeProcessingFileRead, "benchmark processing pipeline failed", ) } duration := time.Since(startTime) // Measure memory after var memAfter runtime.MemStats runtime.ReadMemStats(&memAfter) // Calculate total bytes processed var totalBytes int64 for _, file := range files { if info, err := os.Stat(file); err == nil { totalBytes += info.Size() } } benchmarkName := fmt.Sprintf("FileProcessing_%s_c%d", format, concurrency) result := buildBenchmarkResult(benchmarkName, files, totalBytes, duration, memBefore, memAfter) return result, nil } // ConcurrencyBenchmark benchmarks different concurrency levels. func ConcurrencyBenchmark(sourceDir string, format string, concurrencyLevels []int) (*Suite, error) { suite := &Suite{ Name: "ConcurrencyBenchmark", Results: make([]Result, 0, len(concurrencyLevels)), } for _, concurrency := range concurrencyLevels { result, err := FileProcessingBenchmark(sourceDir, format, concurrency) if err != nil { return nil, shared.WrapErrorf( err, shared.ErrorTypeProcessing, shared.CodeProcessingCollection, "concurrency benchmark failed for level %d", concurrency, ) } suite.Results = append(suite.Results, *result) } return suite, nil } // FormatBenchmark benchmarks different output formats. func FormatBenchmark(sourceDir string, formats []string) (*Suite, error) { suite := &Suite{ Name: "FormatBenchmark", Results: make([]Result, 0, len(formats)), } for _, format := range formats { result, err := FileProcessingBenchmark(sourceDir, format, runtime.NumCPU()) if err != nil { return nil, shared.WrapErrorf( err, shared.ErrorTypeProcessing, shared.CodeProcessingCollection, "format benchmark failed for format %s", format, ) } suite.Results = append(suite.Results, *result) } return suite, nil } // createBenchmarkFiles creates temporary files for benchmarking. func createBenchmarkFiles(numFiles int) (string, func(), error) { tempDir, err := os.MkdirTemp("", "gibidify_benchmark_*") if err != nil { return "", nil, shared.WrapError( err, shared.ErrorTypeFileSystem, shared.CodeFSAccess, "failed to create temp directory", ) } cleanup := func() { if err := os.RemoveAll(tempDir); err != nil { //nolint:errcheck // Warning message in cleanup, failure doesn't affect benchmark _, _ = fmt.Printf("Warning: failed to remove benchmark temp directory: %v\n", err) } } // Create various file types fileTypes := []struct { ext string content string }{ {".go", "package main\n\nfunc main() {\n\tprintln(\"Hello, World!\")\n}"}, {".js", "console.log('Hello, World!');"}, {".py", "print('Hello, World!')"}, { ".java", "public class Hello {\n\tpublic static void main(String[] args) {\n\t" + "\tSystem.out.println(\"Hello, World!\");\n\t}\n}", }, { ".cpp", "#include \n\n" + "int main() {\n\tstd::cout << \"Hello, World!\" << std::endl;\n\treturn 0;\n}", }, {".rs", "fn main() {\n\tprintln!(\"Hello, World!\");\n}"}, {".rb", "puts 'Hello, World!'"}, {".php", ""}, {".sh", "#!/bin/bash\necho 'Hello, World!'"}, {".md", "# Hello, World!\n\nThis is a markdown file."}, } for i := 0; i < numFiles; i++ { fileType := fileTypes[i%len(fileTypes)] filename := fmt.Sprintf("file_%d%s", i, fileType.ext) // Create subdirectories for some files if i%10 == 0 { subdir := filepath.Join(tempDir, fmt.Sprintf("subdir_%d", i/10)) if err := os.MkdirAll(subdir, 0o750); err != nil { cleanup() return "", nil, shared.WrapError( err, shared.ErrorTypeFileSystem, shared.CodeFSAccess, "failed to create subdirectory", ) } filename = filepath.Join(subdir, filename) } else { filename = filepath.Join(tempDir, filename) } // Create file with repeated content to make it larger content := "" for j := 0; j < 10; j++ { content += fmt.Sprintf("// Line %d\n%s\n", j, fileType.content) } if err := os.WriteFile(filename, []byte(content), 0o600); err != nil { cleanup() return "", nil, shared.WrapError( err, shared.ErrorTypeIO, shared.CodeIOFileWrite, "failed to write benchmark file", ) } } return tempDir, cleanup, nil } // runProcessingPipeline runs the processing pipeline similar to main.go. func runProcessingPipeline( ctx context.Context, files []string, outputFile *os.File, format string, concurrency int, sourceDir string, ) error { // Guard against invalid concurrency to prevent deadlocks if concurrency < 1 { concurrency = 1 } fileCh := make(chan string, concurrency) writeCh := make(chan fileproc.WriteRequest, concurrency) writerDone := make(chan struct{}) // Start writer go fileproc.StartWriter(outputFile, writeCh, writerDone, format, "", "") // Get absolute path once absRoot, err := shared.AbsolutePath(sourceDir) if err != nil { return shared.WrapError( err, shared.ErrorTypeFileSystem, shared.CodeFSPathResolution, "failed to get absolute path for source directory", ) } // Start workers with proper synchronization var workersDone sync.WaitGroup for i := 0; i < concurrency; i++ { workersDone.Add(1) go func() { defer workersDone.Done() for filePath := range fileCh { fileproc.ProcessFile(filePath, writeCh, absRoot) } }() } // Send files to workers for _, file := range files { select { case <-ctx.Done(): close(fileCh) workersDone.Wait() // Wait for workers to finish close(writeCh) <-writerDone return fmt.Errorf("context canceled: %w", ctx.Err()) case fileCh <- file: } } // Close file channel and wait for workers to finish close(fileCh) workersDone.Wait() // Now it's safe to close the write channel close(writeCh) <-writerDone return nil } // PrintResult prints a formatted benchmark result. func PrintResult(result *Result) { printBenchmarkLine := func(format string, args ...any) { if _, err := fmt.Printf(format, args...); err != nil { // Stdout write errors are rare (broken pipe, etc.) - log but continue shared.LogError("failed to write benchmark output", err) } } printBenchmarkLine(shared.BenchmarkFmtSectionHeader, result.Name) printBenchmarkLine("Duration: %v\n", result.Duration) printBenchmarkLine("Files Processed: %d\n", result.FilesProcessed) printBenchmarkLine("Bytes Processed: %d (%.2f MB)\n", result.BytesProcessed, float64(result.BytesProcessed)/float64(shared.BytesPerMB)) printBenchmarkLine("Files/sec: %.2f\n", result.FilesPerSecond) printBenchmarkLine("Bytes/sec: %.2f MB/sec\n", result.BytesPerSecond/float64(shared.BytesPerMB)) printBenchmarkLine( "Memory Usage: +%.2f MB (Sys: +%.2f MB)\n", result.MemoryUsage.AllocMB, result.MemoryUsage.SysMB, ) //nolint:errcheck // Overflow unlikely for pause duration, result output only pauseDuration, _ := shared.SafeUint64ToInt64(result.MemoryUsage.PauseTotalNs) printBenchmarkLine("GC Runs: %d (Pause: %v)\n", result.MemoryUsage.NumGC, time.Duration(pauseDuration)) printBenchmarkLine("Goroutines: %d\n", result.CPUUsage.Goroutines) printBenchmarkLine("\n") } // PrintSuite prints all results in a benchmark suite. func PrintSuite(suite *Suite) { if _, err := fmt.Printf(shared.BenchmarkFmtSectionHeader, suite.Name); err != nil { shared.LogError("failed to write benchmark suite header", err) } // Iterate by index to avoid taking address of range variable for i := range suite.Results { PrintResult(&suite.Results[i]) } } // RunAllBenchmarks runs a comprehensive benchmark suite. func RunAllBenchmarks(sourceDir string) error { printBenchmark := func(msg string) { if _, err := fmt.Println(msg); err != nil { shared.LogError("failed to write benchmark message", err) } } printBenchmark("Running gibidify benchmark suite...") // Load configuration config.LoadConfig() // File collection benchmark printBenchmark(shared.BenchmarkMsgRunningCollection) result, err := FileCollectionBenchmark(sourceDir, shared.BenchmarkDefaultFileCount) if err != nil { return shared.WrapError( err, shared.ErrorTypeProcessing, shared.CodeProcessingCollection, shared.BenchmarkMsgFileCollectionFailed, ) } PrintResult(result) // Format benchmarks printBenchmark("Running format benchmarks...") formats := []string{shared.FormatJSON, shared.FormatYAML, shared.FormatMarkdown} formatSuite, err := FormatBenchmark(sourceDir, formats) if err != nil { return shared.WrapError( err, shared.ErrorTypeProcessing, shared.CodeProcessingCollection, shared.BenchmarkMsgFormatFailed, ) } PrintSuite(formatSuite) // Concurrency benchmarks printBenchmark("Running concurrency benchmarks...") concurrencyLevels := []int{1, 2, 4, 8, runtime.NumCPU()} concurrencySuite, err := ConcurrencyBenchmark(sourceDir, shared.FormatJSON, concurrencyLevels) if err != nil { return shared.WrapError( err, shared.ErrorTypeProcessing, shared.CodeProcessingCollection, shared.BenchmarkMsgConcurrencyFailed, ) } PrintSuite(concurrencySuite) return nil }