mirror of
https://github.com/ivuorinen/gibidify.git
synced 2026-01-26 11:34:03 +00:00
* build: update Go 1.25, CI workflows, and build tooling - Upgrade to Go 1.25 - Add benchmark targets to Makefile - Implement parallel gosec execution - Lock tool versions for reproducibility - Add shellcheck directives to scripts - Update CI workflows with improved caching * refactor: migrate from golangci-lint to revive - Replace golangci-lint with revive for linting - Configure comprehensive revive rules - Fix all EditorConfig violations - Add yamllint and yamlfmt support - Remove deprecated .golangci.yml * refactor: rename utils to shared and deduplicate code - Rename utils package to shared - Add shared constants package - Deduplicate constants across packages - Address CodeRabbit review feedback * fix: resolve SonarQube issues and add safety guards - Fix all 73 SonarQube OPEN issues - Add nil guards for resourceMonitor, backpressure, metricsCollector - Implement io.Closer for headerFileReader - Propagate errors from processing helpers - Add metrics and templates packages - Improve error handling across codebase * test: improve test infrastructure and coverage - Add benchmarks for cli, fileproc, metrics - Improve test coverage for cli, fileproc, config - Refactor tests with helper functions - Add shared test constants - Fix test function naming conventions - Reduce cognitive complexity in benchmark tests * docs: update documentation and configuration examples - Update CLAUDE.md with current project state - Refresh README with new features - Add usage and configuration examples - Add SonarQube project configuration - Consolidate config.example.yaml * fix: resolve shellcheck warnings in scripts - Use ./*.go instead of *.go to prevent dash-prefixed filenames from being interpreted as options (SC2035) - Remove unreachable return statement after exit (SC2317) - Remove obsolete gibidiutils/ directory reference * chore(deps): upgrade go dependencies * chore(lint): megalinter fixes * fix: improve test coverage and fix file descriptor leaks - Add defer r.Close() to fix pipe file descriptor leaks in benchmark tests - Refactor TestProcessorConfigureFileTypes with helper functions and assertions - Refactor TestProcessorLogFinalStats with output capture and keyword verification - Use shared constants instead of literal strings (TestFilePNG, FormatMarkdown, etc.) - Reduce cognitive complexity by extracting helper functions * fix: align test comments with function names Remove underscores from test comments to match actual function names: - benchmark/benchmark_test.go (2 fixes) - fileproc/filetypes_config_test.go (4 fixes) - fileproc/filetypes_registry_test.go (6 fixes) - fileproc/processor_test.go (6 fixes) - fileproc/resource_monitor_types_test.go (4 fixes) - fileproc/writer_test.go (3 fixes) * fix: various test improvements and bug fixes - Remove duplicate maxCacheSize check in filetypes_registry_test.go - Shorten long comment in processor_test.go to stay under 120 chars - Remove flaky time.Sleep in collector_test.go, use >= 0 assertion - Close pipe reader in benchmark_test.go to fix file descriptor leak - Use ContinueOnError in flags_test.go to match ResetFlags behavior - Add nil check for p.ui in processor_workers.go before UpdateProgress - Fix resource_monitor_validation_test.go by setting hardMemoryLimitBytes directly * chore(yaml): add missing document start markers Add --- document start to YAML files to satisfy yamllint: - .github/workflows/codeql.yml - .github/workflows/build-test-publish.yml - .github/workflows/security.yml - .github/actions/setup/action.yml * fix: guard nil resourceMonitor and fix test deadlock - Guard resourceMonitor before CreateFileProcessingContext call - Add ui.UpdateProgress on emergency stop and path error returns - Fix potential deadlock in TestProcessFile using wg.Go with defer close
536 lines
14 KiB
Go
536 lines
14 KiB
Go
// Package benchmark provides benchmarking infrastructure for gibidify.
|
|
package benchmark
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/ivuorinen/gibidify/config"
|
|
"github.com/ivuorinen/gibidify/fileproc"
|
|
"github.com/ivuorinen/gibidify/shared"
|
|
)
|
|
|
|
// Result represents the results of a benchmark run.
|
|
type Result struct {
|
|
Name string
|
|
Duration time.Duration
|
|
FilesProcessed int
|
|
BytesProcessed int64
|
|
FilesPerSecond float64
|
|
BytesPerSecond float64
|
|
MemoryUsage MemoryStats
|
|
CPUUsage CPUStats
|
|
}
|
|
|
|
// MemoryStats represents memory usage statistics.
|
|
type MemoryStats struct {
|
|
AllocMB float64
|
|
SysMB float64
|
|
NumGC uint32
|
|
PauseTotalNs uint64
|
|
}
|
|
|
|
// CPUStats represents CPU usage statistics.
|
|
type CPUStats struct {
|
|
UserTime time.Duration
|
|
SystemTime time.Duration
|
|
Goroutines int
|
|
}
|
|
|
|
// Suite represents a collection of benchmarks.
|
|
type Suite struct {
|
|
Name string
|
|
Results []Result
|
|
}
|
|
|
|
// buildBenchmarkResult constructs a Result with all metrics calculated.
|
|
// This eliminates code duplication across benchmark functions.
|
|
func buildBenchmarkResult(
|
|
name string,
|
|
files []string,
|
|
totalBytes int64,
|
|
duration time.Duration,
|
|
memBefore, memAfter runtime.MemStats,
|
|
) *Result {
|
|
result := &Result{
|
|
Name: name,
|
|
Duration: duration,
|
|
FilesProcessed: len(files),
|
|
BytesProcessed: totalBytes,
|
|
}
|
|
|
|
// Calculate rates with zero-division guard
|
|
secs := duration.Seconds()
|
|
if secs == 0 {
|
|
result.FilesPerSecond = 0
|
|
result.BytesPerSecond = 0
|
|
} else {
|
|
result.FilesPerSecond = float64(len(files)) / secs
|
|
result.BytesPerSecond = float64(totalBytes) / secs
|
|
}
|
|
|
|
result.MemoryUsage = MemoryStats{
|
|
AllocMB: shared.SafeMemoryDiffMB(memAfter.Alloc, memBefore.Alloc),
|
|
SysMB: shared.SafeMemoryDiffMB(memAfter.Sys, memBefore.Sys),
|
|
NumGC: memAfter.NumGC - memBefore.NumGC,
|
|
PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs,
|
|
}
|
|
|
|
result.CPUUsage = CPUStats{
|
|
Goroutines: runtime.NumGoroutine(),
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// FileCollectionBenchmark benchmarks file collection operations.
|
|
func FileCollectionBenchmark(sourceDir string, numFiles int) (*Result, error) {
|
|
// Load configuration to ensure proper file filtering
|
|
config.LoadConfig()
|
|
|
|
// Create temporary directory with test files if no source is provided
|
|
var cleanup func()
|
|
if sourceDir == "" {
|
|
tempDir, cleanupFunc, err := createBenchmarkFiles(numFiles)
|
|
if err != nil {
|
|
return nil, shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeFileSystem,
|
|
shared.CodeFSAccess,
|
|
shared.BenchmarkMsgFailedToCreateFiles,
|
|
)
|
|
}
|
|
cleanup = cleanupFunc
|
|
//nolint:errcheck // Benchmark output, errors don't affect results
|
|
defer cleanup()
|
|
sourceDir = tempDir
|
|
}
|
|
|
|
// Measure memory before
|
|
var memBefore runtime.MemStats
|
|
runtime.ReadMemStats(&memBefore)
|
|
|
|
startTime := time.Now()
|
|
|
|
// Run the file collection benchmark
|
|
files, err := fileproc.CollectFiles(sourceDir)
|
|
if err != nil {
|
|
return nil, shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeProcessing,
|
|
shared.CodeProcessingCollection,
|
|
shared.BenchmarkMsgCollectionFailed,
|
|
)
|
|
}
|
|
|
|
duration := time.Since(startTime)
|
|
|
|
// Measure memory after
|
|
var memAfter runtime.MemStats
|
|
runtime.ReadMemStats(&memAfter)
|
|
|
|
// Calculate total bytes processed
|
|
var totalBytes int64
|
|
for _, file := range files {
|
|
if info, err := os.Stat(file); err == nil {
|
|
totalBytes += info.Size()
|
|
}
|
|
}
|
|
|
|
result := buildBenchmarkResult("FileCollection", files, totalBytes, duration, memBefore, memAfter)
|
|
return result, nil
|
|
}
|
|
|
|
// FileProcessingBenchmark benchmarks full file processing pipeline.
|
|
func FileProcessingBenchmark(sourceDir string, format string, concurrency int) (*Result, error) {
|
|
// Load configuration to ensure proper file filtering
|
|
config.LoadConfig()
|
|
|
|
var cleanup func()
|
|
if sourceDir == "" {
|
|
// Create temporary directory with test files
|
|
tempDir, cleanupFunc, err := createBenchmarkFiles(shared.BenchmarkDefaultFileCount)
|
|
if err != nil {
|
|
return nil, shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeFileSystem,
|
|
shared.CodeFSAccess,
|
|
shared.BenchmarkMsgFailedToCreateFiles,
|
|
)
|
|
}
|
|
cleanup = cleanupFunc
|
|
//nolint:errcheck // Benchmark output, errors don't affect results
|
|
defer cleanup()
|
|
sourceDir = tempDir
|
|
}
|
|
|
|
// Create temporary output file
|
|
outputFile, err := os.CreateTemp("", "benchmark_output_*."+format)
|
|
if err != nil {
|
|
return nil, shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeIO,
|
|
shared.CodeIOFileCreate,
|
|
"failed to create benchmark output file",
|
|
)
|
|
}
|
|
defer func() {
|
|
if err := outputFile.Close(); err != nil {
|
|
//nolint:errcheck // Warning message in defer, failure doesn't affect benchmark
|
|
_, _ = fmt.Printf("Warning: failed to close benchmark output file: %v\n", err)
|
|
}
|
|
if err := os.Remove(outputFile.Name()); err != nil {
|
|
//nolint:errcheck // Warning message in defer, failure doesn't affect benchmark
|
|
_, _ = fmt.Printf("Warning: failed to remove benchmark output file: %v\n", err)
|
|
}
|
|
}()
|
|
|
|
// Measure memory before
|
|
var memBefore runtime.MemStats
|
|
runtime.ReadMemStats(&memBefore)
|
|
|
|
startTime := time.Now()
|
|
|
|
// Run the full processing pipeline
|
|
files, err := fileproc.CollectFiles(sourceDir)
|
|
if err != nil {
|
|
return nil, shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeProcessing,
|
|
shared.CodeProcessingCollection,
|
|
shared.BenchmarkMsgCollectionFailed,
|
|
)
|
|
}
|
|
|
|
// Process files with concurrency
|
|
err = runProcessingPipeline(context.Background(), files, outputFile, format, concurrency, sourceDir)
|
|
if err != nil {
|
|
return nil, shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeProcessing,
|
|
shared.CodeProcessingFileRead,
|
|
"benchmark processing pipeline failed",
|
|
)
|
|
}
|
|
|
|
duration := time.Since(startTime)
|
|
|
|
// Measure memory after
|
|
var memAfter runtime.MemStats
|
|
runtime.ReadMemStats(&memAfter)
|
|
|
|
// Calculate total bytes processed
|
|
var totalBytes int64
|
|
for _, file := range files {
|
|
if info, err := os.Stat(file); err == nil {
|
|
totalBytes += info.Size()
|
|
}
|
|
}
|
|
|
|
benchmarkName := fmt.Sprintf("FileProcessing_%s_c%d", format, concurrency)
|
|
result := buildBenchmarkResult(benchmarkName, files, totalBytes, duration, memBefore, memAfter)
|
|
return result, nil
|
|
}
|
|
|
|
// ConcurrencyBenchmark benchmarks different concurrency levels.
|
|
func ConcurrencyBenchmark(sourceDir string, format string, concurrencyLevels []int) (*Suite, error) {
|
|
suite := &Suite{
|
|
Name: "ConcurrencyBenchmark",
|
|
Results: make([]Result, 0, len(concurrencyLevels)),
|
|
}
|
|
|
|
for _, concurrency := range concurrencyLevels {
|
|
result, err := FileProcessingBenchmark(sourceDir, format, concurrency)
|
|
if err != nil {
|
|
return nil, shared.WrapErrorf(
|
|
err,
|
|
shared.ErrorTypeProcessing,
|
|
shared.CodeProcessingCollection,
|
|
"concurrency benchmark failed for level %d",
|
|
concurrency,
|
|
)
|
|
}
|
|
suite.Results = append(suite.Results, *result)
|
|
}
|
|
|
|
return suite, nil
|
|
}
|
|
|
|
// FormatBenchmark benchmarks different output formats.
|
|
func FormatBenchmark(sourceDir string, formats []string) (*Suite, error) {
|
|
suite := &Suite{
|
|
Name: "FormatBenchmark",
|
|
Results: make([]Result, 0, len(formats)),
|
|
}
|
|
|
|
for _, format := range formats {
|
|
result, err := FileProcessingBenchmark(sourceDir, format, runtime.NumCPU())
|
|
if err != nil {
|
|
return nil, shared.WrapErrorf(
|
|
err,
|
|
shared.ErrorTypeProcessing,
|
|
shared.CodeProcessingCollection,
|
|
"format benchmark failed for format %s",
|
|
format,
|
|
)
|
|
}
|
|
suite.Results = append(suite.Results, *result)
|
|
}
|
|
|
|
return suite, nil
|
|
}
|
|
|
|
// createBenchmarkFiles creates temporary files for benchmarking.
|
|
func createBenchmarkFiles(numFiles int) (string, func(), error) {
|
|
tempDir, err := os.MkdirTemp("", "gibidify_benchmark_*")
|
|
if err != nil {
|
|
return "", nil, shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeFileSystem,
|
|
shared.CodeFSAccess,
|
|
"failed to create temp directory",
|
|
)
|
|
}
|
|
|
|
cleanup := func() {
|
|
if err := os.RemoveAll(tempDir); err != nil {
|
|
//nolint:errcheck // Warning message in cleanup, failure doesn't affect benchmark
|
|
_, _ = fmt.Printf("Warning: failed to remove benchmark temp directory: %v\n", err)
|
|
}
|
|
}
|
|
|
|
// Create various file types
|
|
fileTypes := []struct {
|
|
ext string
|
|
content string
|
|
}{
|
|
{".go", "package main\n\nfunc main() {\n\tprintln(\"Hello, World!\")\n}"},
|
|
{".js", "console.log('Hello, World!');"},
|
|
{".py", "print('Hello, World!')"},
|
|
{
|
|
".java",
|
|
"public class Hello {\n\tpublic static void main(String[] args) {\n\t" +
|
|
"\tSystem.out.println(\"Hello, World!\");\n\t}\n}",
|
|
},
|
|
{
|
|
".cpp",
|
|
"#include <iostream>\n\n" +
|
|
"int main() {\n\tstd::cout << \"Hello, World!\" << std::endl;\n\treturn 0;\n}",
|
|
},
|
|
{".rs", "fn main() {\n\tprintln!(\"Hello, World!\");\n}"},
|
|
{".rb", "puts 'Hello, World!'"},
|
|
{".php", "<?php\necho 'Hello, World!';\n?>"},
|
|
{".sh", "#!/bin/bash\necho 'Hello, World!'"},
|
|
{".md", "# Hello, World!\n\nThis is a markdown file."},
|
|
}
|
|
|
|
for i := 0; i < numFiles; i++ {
|
|
fileType := fileTypes[i%len(fileTypes)]
|
|
filename := fmt.Sprintf("file_%d%s", i, fileType.ext)
|
|
|
|
// Create subdirectories for some files
|
|
if i%10 == 0 {
|
|
subdir := filepath.Join(tempDir, fmt.Sprintf("subdir_%d", i/10))
|
|
if err := os.MkdirAll(subdir, 0o750); err != nil {
|
|
cleanup()
|
|
|
|
return "", nil, shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeFileSystem,
|
|
shared.CodeFSAccess,
|
|
"failed to create subdirectory",
|
|
)
|
|
}
|
|
filename = filepath.Join(subdir, filename)
|
|
} else {
|
|
filename = filepath.Join(tempDir, filename)
|
|
}
|
|
|
|
// Create file with repeated content to make it larger
|
|
content := ""
|
|
for j := 0; j < 10; j++ {
|
|
content += fmt.Sprintf("// Line %d\n%s\n", j, fileType.content)
|
|
}
|
|
|
|
if err := os.WriteFile(filename, []byte(content), 0o600); err != nil {
|
|
cleanup()
|
|
|
|
return "", nil, shared.WrapError(
|
|
err, shared.ErrorTypeIO, shared.CodeIOFileWrite, "failed to write benchmark file",
|
|
)
|
|
}
|
|
}
|
|
|
|
return tempDir, cleanup, nil
|
|
}
|
|
|
|
// runProcessingPipeline runs the processing pipeline similar to main.go.
|
|
func runProcessingPipeline(
|
|
ctx context.Context,
|
|
files []string,
|
|
outputFile *os.File,
|
|
format string,
|
|
concurrency int,
|
|
sourceDir string,
|
|
) error {
|
|
// Guard against invalid concurrency to prevent deadlocks
|
|
if concurrency < 1 {
|
|
concurrency = 1
|
|
}
|
|
|
|
fileCh := make(chan string, concurrency)
|
|
writeCh := make(chan fileproc.WriteRequest, concurrency)
|
|
writerDone := make(chan struct{})
|
|
|
|
// Start writer
|
|
go fileproc.StartWriter(outputFile, writeCh, writerDone, format, "", "")
|
|
|
|
// Get absolute path once
|
|
absRoot, err := shared.AbsolutePath(sourceDir)
|
|
if err != nil {
|
|
return shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeFileSystem,
|
|
shared.CodeFSPathResolution,
|
|
"failed to get absolute path for source directory",
|
|
)
|
|
}
|
|
|
|
// Start workers with proper synchronization
|
|
var workersDone sync.WaitGroup
|
|
for i := 0; i < concurrency; i++ {
|
|
workersDone.Add(1)
|
|
go func() {
|
|
defer workersDone.Done()
|
|
for filePath := range fileCh {
|
|
fileproc.ProcessFile(filePath, writeCh, absRoot)
|
|
}
|
|
}()
|
|
}
|
|
|
|
// Send files to workers
|
|
for _, file := range files {
|
|
select {
|
|
case <-ctx.Done():
|
|
close(fileCh)
|
|
workersDone.Wait() // Wait for workers to finish
|
|
close(writeCh)
|
|
<-writerDone
|
|
|
|
return fmt.Errorf("context canceled: %w", ctx.Err())
|
|
case fileCh <- file:
|
|
}
|
|
}
|
|
|
|
// Close file channel and wait for workers to finish
|
|
close(fileCh)
|
|
workersDone.Wait()
|
|
|
|
// Now it's safe to close the write channel
|
|
close(writeCh)
|
|
<-writerDone
|
|
|
|
return nil
|
|
}
|
|
|
|
// PrintResult prints a formatted benchmark result.
|
|
func PrintResult(result *Result) {
|
|
printBenchmarkLine := func(format string, args ...any) {
|
|
if _, err := fmt.Printf(format, args...); err != nil {
|
|
// Stdout write errors are rare (broken pipe, etc.) - log but continue
|
|
shared.LogError("failed to write benchmark output", err)
|
|
}
|
|
}
|
|
|
|
printBenchmarkLine(shared.BenchmarkFmtSectionHeader, result.Name)
|
|
printBenchmarkLine("Duration: %v\n", result.Duration)
|
|
printBenchmarkLine("Files Processed: %d\n", result.FilesProcessed)
|
|
printBenchmarkLine("Bytes Processed: %d (%.2f MB)\n", result.BytesProcessed,
|
|
float64(result.BytesProcessed)/float64(shared.BytesPerMB))
|
|
printBenchmarkLine("Files/sec: %.2f\n", result.FilesPerSecond)
|
|
printBenchmarkLine("Bytes/sec: %.2f MB/sec\n", result.BytesPerSecond/float64(shared.BytesPerMB))
|
|
printBenchmarkLine(
|
|
"Memory Usage: +%.2f MB (Sys: +%.2f MB)\n",
|
|
result.MemoryUsage.AllocMB,
|
|
result.MemoryUsage.SysMB,
|
|
)
|
|
//nolint:errcheck // Overflow unlikely for pause duration, result output only
|
|
pauseDuration, _ := shared.SafeUint64ToInt64(result.MemoryUsage.PauseTotalNs)
|
|
printBenchmarkLine("GC Runs: %d (Pause: %v)\n", result.MemoryUsage.NumGC, time.Duration(pauseDuration))
|
|
printBenchmarkLine("Goroutines: %d\n", result.CPUUsage.Goroutines)
|
|
printBenchmarkLine("\n")
|
|
}
|
|
|
|
// PrintSuite prints all results in a benchmark suite.
|
|
func PrintSuite(suite *Suite) {
|
|
if _, err := fmt.Printf(shared.BenchmarkFmtSectionHeader, suite.Name); err != nil {
|
|
shared.LogError("failed to write benchmark suite header", err)
|
|
}
|
|
// Iterate by index to avoid taking address of range variable
|
|
for i := range suite.Results {
|
|
PrintResult(&suite.Results[i])
|
|
}
|
|
}
|
|
|
|
// RunAllBenchmarks runs a comprehensive benchmark suite.
|
|
func RunAllBenchmarks(sourceDir string) error {
|
|
printBenchmark := func(msg string) {
|
|
if _, err := fmt.Println(msg); err != nil {
|
|
shared.LogError("failed to write benchmark message", err)
|
|
}
|
|
}
|
|
|
|
printBenchmark("Running gibidify benchmark suite...")
|
|
|
|
// Load configuration
|
|
config.LoadConfig()
|
|
|
|
// File collection benchmark
|
|
printBenchmark(shared.BenchmarkMsgRunningCollection)
|
|
result, err := FileCollectionBenchmark(sourceDir, shared.BenchmarkDefaultFileCount)
|
|
if err != nil {
|
|
return shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeProcessing,
|
|
shared.CodeProcessingCollection,
|
|
shared.BenchmarkMsgFileCollectionFailed,
|
|
)
|
|
}
|
|
PrintResult(result)
|
|
|
|
// Format benchmarks
|
|
printBenchmark("Running format benchmarks...")
|
|
formats := []string{shared.FormatJSON, shared.FormatYAML, shared.FormatMarkdown}
|
|
formatSuite, err := FormatBenchmark(sourceDir, formats)
|
|
if err != nil {
|
|
return shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeProcessing,
|
|
shared.CodeProcessingCollection,
|
|
shared.BenchmarkMsgFormatFailed,
|
|
)
|
|
}
|
|
PrintSuite(formatSuite)
|
|
|
|
// Concurrency benchmarks
|
|
printBenchmark("Running concurrency benchmarks...")
|
|
concurrencyLevels := []int{1, 2, 4, 8, runtime.NumCPU()}
|
|
concurrencySuite, err := ConcurrencyBenchmark(sourceDir, shared.FormatJSON, concurrencyLevels)
|
|
if err != nil {
|
|
return shared.WrapError(
|
|
err,
|
|
shared.ErrorTypeProcessing,
|
|
shared.CodeProcessingCollection,
|
|
shared.BenchmarkMsgConcurrencyFailed,
|
|
)
|
|
}
|
|
PrintSuite(concurrencySuite)
|
|
|
|
return nil
|
|
}
|