Files
gibidify/benchmark/benchmark.go
Ismo Vuorinen 95b7ef6dd3 chore: modernize workflows, security scanning, and linting configuration (#50)
* build: update Go 1.25, CI workflows, and build tooling

- Upgrade to Go 1.25
- Add benchmark targets to Makefile
- Implement parallel gosec execution
- Lock tool versions for reproducibility
- Add shellcheck directives to scripts
- Update CI workflows with improved caching

* refactor: migrate from golangci-lint to revive

- Replace golangci-lint with revive for linting
- Configure comprehensive revive rules
- Fix all EditorConfig violations
- Add yamllint and yamlfmt support
- Remove deprecated .golangci.yml

* refactor: rename utils to shared and deduplicate code

- Rename utils package to shared
- Add shared constants package
- Deduplicate constants across packages
- Address CodeRabbit review feedback

* fix: resolve SonarQube issues and add safety guards

- Fix all 73 SonarQube OPEN issues
- Add nil guards for resourceMonitor, backpressure, metricsCollector
- Implement io.Closer for headerFileReader
- Propagate errors from processing helpers
- Add metrics and templates packages
- Improve error handling across codebase

* test: improve test infrastructure and coverage

- Add benchmarks for cli, fileproc, metrics
- Improve test coverage for cli, fileproc, config
- Refactor tests with helper functions
- Add shared test constants
- Fix test function naming conventions
- Reduce cognitive complexity in benchmark tests

* docs: update documentation and configuration examples

- Update CLAUDE.md with current project state
- Refresh README with new features
- Add usage and configuration examples
- Add SonarQube project configuration
- Consolidate config.example.yaml

* fix: resolve shellcheck warnings in scripts

- Use ./*.go instead of *.go to prevent dash-prefixed filenames
  from being interpreted as options (SC2035)
- Remove unreachable return statement after exit (SC2317)
- Remove obsolete gibidiutils/ directory reference

* chore(deps): upgrade go dependencies

* chore(lint): megalinter fixes

* fix: improve test coverage and fix file descriptor leaks

- Add defer r.Close() to fix pipe file descriptor leaks in benchmark tests
- Refactor TestProcessorConfigureFileTypes with helper functions and assertions
- Refactor TestProcessorLogFinalStats with output capture and keyword verification
- Use shared constants instead of literal strings (TestFilePNG, FormatMarkdown, etc.)
- Reduce cognitive complexity by extracting helper functions

* fix: align test comments with function names

Remove underscores from test comments to match actual function names:
- benchmark/benchmark_test.go (2 fixes)
- fileproc/filetypes_config_test.go (4 fixes)
- fileproc/filetypes_registry_test.go (6 fixes)
- fileproc/processor_test.go (6 fixes)
- fileproc/resource_monitor_types_test.go (4 fixes)
- fileproc/writer_test.go (3 fixes)

* fix: various test improvements and bug fixes

- Remove duplicate maxCacheSize check in filetypes_registry_test.go
- Shorten long comment in processor_test.go to stay under 120 chars
- Remove flaky time.Sleep in collector_test.go, use >= 0 assertion
- Close pipe reader in benchmark_test.go to fix file descriptor leak
- Use ContinueOnError in flags_test.go to match ResetFlags behavior
- Add nil check for p.ui in processor_workers.go before UpdateProgress
- Fix resource_monitor_validation_test.go by setting hardMemoryLimitBytes directly

* chore(yaml): add missing document start markers

Add --- document start to YAML files to satisfy yamllint:
- .github/workflows/codeql.yml
- .github/workflows/build-test-publish.yml
- .github/workflows/security.yml
- .github/actions/setup/action.yml

* fix: guard nil resourceMonitor and fix test deadlock

- Guard resourceMonitor before CreateFileProcessingContext call
- Add ui.UpdateProgress on emergency stop and path error returns
- Fix potential deadlock in TestProcessFile using wg.Go with defer close
2025-12-10 19:07:11 +02:00

536 lines
14 KiB
Go

// Package benchmark provides benchmarking infrastructure for gibidify.
package benchmark
import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"sync"
"time"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/shared"
)
// Result represents the results of a benchmark run.
type Result struct {
Name string
Duration time.Duration
FilesProcessed int
BytesProcessed int64
FilesPerSecond float64
BytesPerSecond float64
MemoryUsage MemoryStats
CPUUsage CPUStats
}
// MemoryStats represents memory usage statistics.
type MemoryStats struct {
AllocMB float64
SysMB float64
NumGC uint32
PauseTotalNs uint64
}
// CPUStats represents CPU usage statistics.
type CPUStats struct {
UserTime time.Duration
SystemTime time.Duration
Goroutines int
}
// Suite represents a collection of benchmarks.
type Suite struct {
Name string
Results []Result
}
// buildBenchmarkResult constructs a Result with all metrics calculated.
// This eliminates code duplication across benchmark functions.
func buildBenchmarkResult(
name string,
files []string,
totalBytes int64,
duration time.Duration,
memBefore, memAfter runtime.MemStats,
) *Result {
result := &Result{
Name: name,
Duration: duration,
FilesProcessed: len(files),
BytesProcessed: totalBytes,
}
// Calculate rates with zero-division guard
secs := duration.Seconds()
if secs == 0 {
result.FilesPerSecond = 0
result.BytesPerSecond = 0
} else {
result.FilesPerSecond = float64(len(files)) / secs
result.BytesPerSecond = float64(totalBytes) / secs
}
result.MemoryUsage = MemoryStats{
AllocMB: shared.SafeMemoryDiffMB(memAfter.Alloc, memBefore.Alloc),
SysMB: shared.SafeMemoryDiffMB(memAfter.Sys, memBefore.Sys),
NumGC: memAfter.NumGC - memBefore.NumGC,
PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs,
}
result.CPUUsage = CPUStats{
Goroutines: runtime.NumGoroutine(),
}
return result
}
// FileCollectionBenchmark benchmarks file collection operations.
func FileCollectionBenchmark(sourceDir string, numFiles int) (*Result, error) {
// Load configuration to ensure proper file filtering
config.LoadConfig()
// Create temporary directory with test files if no source is provided
var cleanup func()
if sourceDir == "" {
tempDir, cleanupFunc, err := createBenchmarkFiles(numFiles)
if err != nil {
return nil, shared.WrapError(
err,
shared.ErrorTypeFileSystem,
shared.CodeFSAccess,
shared.BenchmarkMsgFailedToCreateFiles,
)
}
cleanup = cleanupFunc
//nolint:errcheck // Benchmark output, errors don't affect results
defer cleanup()
sourceDir = tempDir
}
// Measure memory before
var memBefore runtime.MemStats
runtime.ReadMemStats(&memBefore)
startTime := time.Now()
// Run the file collection benchmark
files, err := fileproc.CollectFiles(sourceDir)
if err != nil {
return nil, shared.WrapError(
err,
shared.ErrorTypeProcessing,
shared.CodeProcessingCollection,
shared.BenchmarkMsgCollectionFailed,
)
}
duration := time.Since(startTime)
// Measure memory after
var memAfter runtime.MemStats
runtime.ReadMemStats(&memAfter)
// Calculate total bytes processed
var totalBytes int64
for _, file := range files {
if info, err := os.Stat(file); err == nil {
totalBytes += info.Size()
}
}
result := buildBenchmarkResult("FileCollection", files, totalBytes, duration, memBefore, memAfter)
return result, nil
}
// FileProcessingBenchmark benchmarks full file processing pipeline.
func FileProcessingBenchmark(sourceDir string, format string, concurrency int) (*Result, error) {
// Load configuration to ensure proper file filtering
config.LoadConfig()
var cleanup func()
if sourceDir == "" {
// Create temporary directory with test files
tempDir, cleanupFunc, err := createBenchmarkFiles(shared.BenchmarkDefaultFileCount)
if err != nil {
return nil, shared.WrapError(
err,
shared.ErrorTypeFileSystem,
shared.CodeFSAccess,
shared.BenchmarkMsgFailedToCreateFiles,
)
}
cleanup = cleanupFunc
//nolint:errcheck // Benchmark output, errors don't affect results
defer cleanup()
sourceDir = tempDir
}
// Create temporary output file
outputFile, err := os.CreateTemp("", "benchmark_output_*."+format)
if err != nil {
return nil, shared.WrapError(
err,
shared.ErrorTypeIO,
shared.CodeIOFileCreate,
"failed to create benchmark output file",
)
}
defer func() {
if err := outputFile.Close(); err != nil {
//nolint:errcheck // Warning message in defer, failure doesn't affect benchmark
_, _ = fmt.Printf("Warning: failed to close benchmark output file: %v\n", err)
}
if err := os.Remove(outputFile.Name()); err != nil {
//nolint:errcheck // Warning message in defer, failure doesn't affect benchmark
_, _ = fmt.Printf("Warning: failed to remove benchmark output file: %v\n", err)
}
}()
// Measure memory before
var memBefore runtime.MemStats
runtime.ReadMemStats(&memBefore)
startTime := time.Now()
// Run the full processing pipeline
files, err := fileproc.CollectFiles(sourceDir)
if err != nil {
return nil, shared.WrapError(
err,
shared.ErrorTypeProcessing,
shared.CodeProcessingCollection,
shared.BenchmarkMsgCollectionFailed,
)
}
// Process files with concurrency
err = runProcessingPipeline(context.Background(), files, outputFile, format, concurrency, sourceDir)
if err != nil {
return nil, shared.WrapError(
err,
shared.ErrorTypeProcessing,
shared.CodeProcessingFileRead,
"benchmark processing pipeline failed",
)
}
duration := time.Since(startTime)
// Measure memory after
var memAfter runtime.MemStats
runtime.ReadMemStats(&memAfter)
// Calculate total bytes processed
var totalBytes int64
for _, file := range files {
if info, err := os.Stat(file); err == nil {
totalBytes += info.Size()
}
}
benchmarkName := fmt.Sprintf("FileProcessing_%s_c%d", format, concurrency)
result := buildBenchmarkResult(benchmarkName, files, totalBytes, duration, memBefore, memAfter)
return result, nil
}
// ConcurrencyBenchmark benchmarks different concurrency levels.
func ConcurrencyBenchmark(sourceDir string, format string, concurrencyLevels []int) (*Suite, error) {
suite := &Suite{
Name: "ConcurrencyBenchmark",
Results: make([]Result, 0, len(concurrencyLevels)),
}
for _, concurrency := range concurrencyLevels {
result, err := FileProcessingBenchmark(sourceDir, format, concurrency)
if err != nil {
return nil, shared.WrapErrorf(
err,
shared.ErrorTypeProcessing,
shared.CodeProcessingCollection,
"concurrency benchmark failed for level %d",
concurrency,
)
}
suite.Results = append(suite.Results, *result)
}
return suite, nil
}
// FormatBenchmark benchmarks different output formats.
func FormatBenchmark(sourceDir string, formats []string) (*Suite, error) {
suite := &Suite{
Name: "FormatBenchmark",
Results: make([]Result, 0, len(formats)),
}
for _, format := range formats {
result, err := FileProcessingBenchmark(sourceDir, format, runtime.NumCPU())
if err != nil {
return nil, shared.WrapErrorf(
err,
shared.ErrorTypeProcessing,
shared.CodeProcessingCollection,
"format benchmark failed for format %s",
format,
)
}
suite.Results = append(suite.Results, *result)
}
return suite, nil
}
// createBenchmarkFiles creates temporary files for benchmarking.
func createBenchmarkFiles(numFiles int) (string, func(), error) {
tempDir, err := os.MkdirTemp("", "gibidify_benchmark_*")
if err != nil {
return "", nil, shared.WrapError(
err,
shared.ErrorTypeFileSystem,
shared.CodeFSAccess,
"failed to create temp directory",
)
}
cleanup := func() {
if err := os.RemoveAll(tempDir); err != nil {
//nolint:errcheck // Warning message in cleanup, failure doesn't affect benchmark
_, _ = fmt.Printf("Warning: failed to remove benchmark temp directory: %v\n", err)
}
}
// Create various file types
fileTypes := []struct {
ext string
content string
}{
{".go", "package main\n\nfunc main() {\n\tprintln(\"Hello, World!\")\n}"},
{".js", "console.log('Hello, World!');"},
{".py", "print('Hello, World!')"},
{
".java",
"public class Hello {\n\tpublic static void main(String[] args) {\n\t" +
"\tSystem.out.println(\"Hello, World!\");\n\t}\n}",
},
{
".cpp",
"#include <iostream>\n\n" +
"int main() {\n\tstd::cout << \"Hello, World!\" << std::endl;\n\treturn 0;\n}",
},
{".rs", "fn main() {\n\tprintln!(\"Hello, World!\");\n}"},
{".rb", "puts 'Hello, World!'"},
{".php", "<?php\necho 'Hello, World!';\n?>"},
{".sh", "#!/bin/bash\necho 'Hello, World!'"},
{".md", "# Hello, World!\n\nThis is a markdown file."},
}
for i := 0; i < numFiles; i++ {
fileType := fileTypes[i%len(fileTypes)]
filename := fmt.Sprintf("file_%d%s", i, fileType.ext)
// Create subdirectories for some files
if i%10 == 0 {
subdir := filepath.Join(tempDir, fmt.Sprintf("subdir_%d", i/10))
if err := os.MkdirAll(subdir, 0o750); err != nil {
cleanup()
return "", nil, shared.WrapError(
err,
shared.ErrorTypeFileSystem,
shared.CodeFSAccess,
"failed to create subdirectory",
)
}
filename = filepath.Join(subdir, filename)
} else {
filename = filepath.Join(tempDir, filename)
}
// Create file with repeated content to make it larger
content := ""
for j := 0; j < 10; j++ {
content += fmt.Sprintf("// Line %d\n%s\n", j, fileType.content)
}
if err := os.WriteFile(filename, []byte(content), 0o600); err != nil {
cleanup()
return "", nil, shared.WrapError(
err, shared.ErrorTypeIO, shared.CodeIOFileWrite, "failed to write benchmark file",
)
}
}
return tempDir, cleanup, nil
}
// runProcessingPipeline runs the processing pipeline similar to main.go.
func runProcessingPipeline(
ctx context.Context,
files []string,
outputFile *os.File,
format string,
concurrency int,
sourceDir string,
) error {
// Guard against invalid concurrency to prevent deadlocks
if concurrency < 1 {
concurrency = 1
}
fileCh := make(chan string, concurrency)
writeCh := make(chan fileproc.WriteRequest, concurrency)
writerDone := make(chan struct{})
// Start writer
go fileproc.StartWriter(outputFile, writeCh, writerDone, format, "", "")
// Get absolute path once
absRoot, err := shared.AbsolutePath(sourceDir)
if err != nil {
return shared.WrapError(
err,
shared.ErrorTypeFileSystem,
shared.CodeFSPathResolution,
"failed to get absolute path for source directory",
)
}
// Start workers with proper synchronization
var workersDone sync.WaitGroup
for i := 0; i < concurrency; i++ {
workersDone.Add(1)
go func() {
defer workersDone.Done()
for filePath := range fileCh {
fileproc.ProcessFile(filePath, writeCh, absRoot)
}
}()
}
// Send files to workers
for _, file := range files {
select {
case <-ctx.Done():
close(fileCh)
workersDone.Wait() // Wait for workers to finish
close(writeCh)
<-writerDone
return fmt.Errorf("context canceled: %w", ctx.Err())
case fileCh <- file:
}
}
// Close file channel and wait for workers to finish
close(fileCh)
workersDone.Wait()
// Now it's safe to close the write channel
close(writeCh)
<-writerDone
return nil
}
// PrintResult prints a formatted benchmark result.
func PrintResult(result *Result) {
printBenchmarkLine := func(format string, args ...any) {
if _, err := fmt.Printf(format, args...); err != nil {
// Stdout write errors are rare (broken pipe, etc.) - log but continue
shared.LogError("failed to write benchmark output", err)
}
}
printBenchmarkLine(shared.BenchmarkFmtSectionHeader, result.Name)
printBenchmarkLine("Duration: %v\n", result.Duration)
printBenchmarkLine("Files Processed: %d\n", result.FilesProcessed)
printBenchmarkLine("Bytes Processed: %d (%.2f MB)\n", result.BytesProcessed,
float64(result.BytesProcessed)/float64(shared.BytesPerMB))
printBenchmarkLine("Files/sec: %.2f\n", result.FilesPerSecond)
printBenchmarkLine("Bytes/sec: %.2f MB/sec\n", result.BytesPerSecond/float64(shared.BytesPerMB))
printBenchmarkLine(
"Memory Usage: +%.2f MB (Sys: +%.2f MB)\n",
result.MemoryUsage.AllocMB,
result.MemoryUsage.SysMB,
)
//nolint:errcheck // Overflow unlikely for pause duration, result output only
pauseDuration, _ := shared.SafeUint64ToInt64(result.MemoryUsage.PauseTotalNs)
printBenchmarkLine("GC Runs: %d (Pause: %v)\n", result.MemoryUsage.NumGC, time.Duration(pauseDuration))
printBenchmarkLine("Goroutines: %d\n", result.CPUUsage.Goroutines)
printBenchmarkLine("\n")
}
// PrintSuite prints all results in a benchmark suite.
func PrintSuite(suite *Suite) {
if _, err := fmt.Printf(shared.BenchmarkFmtSectionHeader, suite.Name); err != nil {
shared.LogError("failed to write benchmark suite header", err)
}
// Iterate by index to avoid taking address of range variable
for i := range suite.Results {
PrintResult(&suite.Results[i])
}
}
// RunAllBenchmarks runs a comprehensive benchmark suite.
func RunAllBenchmarks(sourceDir string) error {
printBenchmark := func(msg string) {
if _, err := fmt.Println(msg); err != nil {
shared.LogError("failed to write benchmark message", err)
}
}
printBenchmark("Running gibidify benchmark suite...")
// Load configuration
config.LoadConfig()
// File collection benchmark
printBenchmark(shared.BenchmarkMsgRunningCollection)
result, err := FileCollectionBenchmark(sourceDir, shared.BenchmarkDefaultFileCount)
if err != nil {
return shared.WrapError(
err,
shared.ErrorTypeProcessing,
shared.CodeProcessingCollection,
shared.BenchmarkMsgFileCollectionFailed,
)
}
PrintResult(result)
// Format benchmarks
printBenchmark("Running format benchmarks...")
formats := []string{shared.FormatJSON, shared.FormatYAML, shared.FormatMarkdown}
formatSuite, err := FormatBenchmark(sourceDir, formats)
if err != nil {
return shared.WrapError(
err,
shared.ErrorTypeProcessing,
shared.CodeProcessingCollection,
shared.BenchmarkMsgFormatFailed,
)
}
PrintSuite(formatSuite)
// Concurrency benchmarks
printBenchmark("Running concurrency benchmarks...")
concurrencyLevels := []int{1, 2, 4, 8, runtime.NumCPU()}
concurrencySuite, err := ConcurrencyBenchmark(sourceDir, shared.FormatJSON, concurrencyLevels)
if err != nil {
return shared.WrapError(
err,
shared.ErrorTypeProcessing,
shared.CodeProcessingCollection,
shared.BenchmarkMsgConcurrencyFailed,
)
}
PrintSuite(concurrencySuite)
return nil
}