chore: modernize workflows, security scanning, and linting configuration (#50)

* build: update Go 1.25, CI workflows, and build tooling - Upgrade to Go 1.25 - Add benchmark targets to Makefile - Implement parallel gosec execution - Lock tool versions for reproducibility - Add shellcheck directives to scripts - Update CI workflows with improved caching * refactor: migrate from golangci-lint to revive - Replace golangci-lint with revive for linting - Configure comprehensive revive rules - Fix all EditorConfig violations - Add yamllint and yamlfmt support - Remove deprecated .golangci.yml * refactor: rename utils to shared and deduplicate code - Rename utils package to shared - Add shared constants package - Deduplicate constants across packages - Address CodeRabbit review feedback * fix: resolve SonarQube issues and add safety guards - Fix all 73 SonarQube OPEN issues - Add nil guards for resourceMonitor, backpressure, metricsCollector - Implement io.Closer for headerFileReader - Propagate errors from processing helpers - Add metrics and templates packages - Improve error handling across codebase * test: improve test infrastructure and coverage - Add benchmarks for cli, fileproc, metrics - Improve test coverage for cli, fileproc, config - Refactor tests with helper functions - Add shared test constants - Fix test function naming conventions - Reduce cognitive complexity in benchmark tests * docs: update documentation and configuration examples - Update CLAUDE.md with current project state - Refresh README with new features - Add usage and configuration examples - Add SonarQube project configuration - Consolidate config.example.yaml * fix: resolve shellcheck warnings in scripts - Use ./*.go instead of *.go to prevent dash-prefixed filenames from being interpreted as options (SC2035) - Remove unreachable return statement after exit (SC2317) - Remove obsolete gibidiutils/ directory reference * chore(deps): upgrade go dependencies * chore(lint): megalinter fixes * fix: improve test coverage and fix file descriptor leaks - Add defer r.Close() to fix pipe file descriptor leaks in benchmark tests - Refactor TestProcessorConfigureFileTypes with helper functions and assertions - Refactor TestProcessorLogFinalStats with output capture and keyword verification - Use shared constants instead of literal strings (TestFilePNG, FormatMarkdown, etc.) - Reduce cognitive complexity by extracting helper functions * fix: align test comments with function names Remove underscores from test comments to match actual function names: - benchmark/benchmark_test.go (2 fixes) - fileproc/filetypes_config_test.go (4 fixes) - fileproc/filetypes_registry_test.go (6 fixes) - fileproc/processor_test.go (6 fixes) - fileproc/resource_monitor_types_test.go (4 fixes) - fileproc/writer_test.go (3 fixes) * fix: various test improvements and bug fixes - Remove duplicate maxCacheSize check in filetypes_registry_test.go - Shorten long comment in processor_test.go to stay under 120 chars - Remove flaky time.Sleep in collector_test.go, use >= 0 assertion - Close pipe reader in benchmark_test.go to fix file descriptor leak - Use ContinueOnError in flags_test.go to match ResetFlags behavior - Add nil check for p.ui in processor_workers.go before UpdateProgress - Fix resource_monitor_validation_test.go by setting hardMemoryLimitBytes directly * chore(yaml): add missing document start markers Add --- document start to YAML files to satisfy yamllint: - .github/workflows/codeql.yml - .github/workflows/build-test-publish.yml - .github/workflows/security.yml - .github/actions/setup/action.yml * fix: guard nil resourceMonitor and fix test deadlock - Guard resourceMonitor before CreateFileProcessingContext call - Add ui.UpdateProgress on emergency stop and path error returns - Fix potential deadlock in TestProcessFile using wg.Go with defer close
2026-03-13 01:00:25 +00:00 · 2025-12-10 19:07:11 +02:00
parent ea4a39a360
commit 95b7ef6dd3
149 changed files with 22990 additions and 8976 deletions
--- a/metrics/collector.go
+++ b/metrics/collector.go
@@ -0,0 +1,355 @@
+// Package metrics provides performance monitoring and reporting capabilities.
+package metrics
+
+import (
+	"math"
+	"runtime"
+	"sync/atomic"
+	"time"
+
+	"github.com/ivuorinen/gibidify/shared"
+)
+
+// NewCollector creates a new metrics collector.
+func NewCollector() *Collector {
+	now := time.Now()
+
+	return &Collector{
+		startTime:    now,
+		lastUpdate:   now,
+		formatCounts: make(map[string]int64),
+		errorCounts:  make(map[string]int64),
+		phaseTimings: make(map[string]time.Duration),
+		smallestFile: math.MaxInt64, // Initialize to max value to properly track minimum
+	}
+}
+
+// RecordFileProcessed records the successful processing of a file.
+func (c *Collector) RecordFileProcessed(result FileProcessingResult) {
+	atomic.AddInt64(&c.totalFiles, 1)
+
+	c.updateFileStatusCounters(result)
+	atomic.AddInt64(&c.totalSize, result.FileSize)
+	c.updateFormatAndErrorCounts(result)
+}
+
+// updateFileStatusCounters updates counters based on file processing result.
+func (c *Collector) updateFileStatusCounters(result FileProcessingResult) {
+	switch {
+	case result.Success:
+		atomic.AddInt64(&c.processedFiles, 1)
+		atomic.AddInt64(&c.processedSize, result.FileSize)
+		c.updateFileSizeExtremes(result.FileSize)
+	case result.Skipped:
+		atomic.AddInt64(&c.skippedFiles, 1)
+	default:
+		atomic.AddInt64(&c.errorFiles, 1)
+	}
+}
+
+// updateFileSizeExtremes updates the largest and smallest file size atomically.
+func (c *Collector) updateFileSizeExtremes(fileSize int64) {
+	// Update the largest file atomically
+	for {
+		current := atomic.LoadInt64(&c.largestFile)
+		if fileSize <= current {
+			break
+		}
+		if atomic.CompareAndSwapInt64(&c.largestFile, current, fileSize) {
+			break
+		}
+	}
+
+	// Update the smallest file atomically
+	for {
+		current := atomic.LoadInt64(&c.smallestFile)
+		if fileSize >= current {
+			break
+		}
+		if atomic.CompareAndSwapInt64(&c.smallestFile, current, fileSize) {
+			break
+		}
+	}
+}
+
+// updateFormatAndErrorCounts updates format and error counts with mutex protection.
+func (c *Collector) updateFormatAndErrorCounts(result FileProcessingResult) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if result.Format != "" {
+		c.formatCounts[result.Format]++
+	}
+	if result.Error != nil {
+		errorType := c.simplifyErrorType(result.Error)
+		c.errorCounts[errorType]++
+	}
+	c.lastUpdate = time.Now()
+}
+
+// simplifyErrorType simplifies error messages for better aggregation.
+func (c *Collector) simplifyErrorType(err error) string {
+	errorType := err.Error()
+	// Simplify error types for better aggregation
+	if len(errorType) > 50 {
+		errorType = errorType[:50] + "..."
+	}
+
+	return errorType
+}
+
+// RecordPhaseTime records the time spent in a processing phase.
+func (c *Collector) RecordPhaseTime(phase string, duration time.Duration) {
+	c.mu.Lock()
+	c.phaseTimings[phase] += duration
+	c.mu.Unlock()
+}
+
+// IncrementConcurrency increments the current concurrency counter.
+func (c *Collector) IncrementConcurrency() {
+	newVal := atomic.AddInt32(&c.concurrency, 1)
+
+	// Update peak concurrency if current is higher
+	for {
+		peak := atomic.LoadInt32(&c.peakConcurrency)
+		if newVal <= peak || atomic.CompareAndSwapInt32(&c.peakConcurrency, peak, newVal) {
+			break
+		}
+	}
+}
+
+// DecrementConcurrency decrements the current concurrency counter.
+// Prevents negative values if calls are imbalanced.
+func (c *Collector) DecrementConcurrency() {
+	for {
+		cur := atomic.LoadInt32(&c.concurrency)
+		if cur == 0 {
+			return
+		}
+		if atomic.CompareAndSwapInt32(&c.concurrency, cur, cur-1) {
+			return
+		}
+	}
+}
+
+// CurrentMetrics returns the current metrics snapshot.
+func (c *Collector) CurrentMetrics() ProcessingMetrics {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+
+	now := time.Now()
+	processingTime := now.Sub(c.startTime)
+
+	totalFiles := atomic.LoadInt64(&c.totalFiles)
+	processedFiles := atomic.LoadInt64(&c.processedFiles)
+	processedSize := atomic.LoadInt64(&c.processedSize)
+
+	var avgFileSize float64
+	if processedFiles > 0 {
+		avgFileSize = float64(processedSize) / float64(processedFiles)
+	}
+
+	var filesPerSec, bytesPerSec float64
+	if processingTime.Seconds() > 0 {
+		filesPerSec = float64(processedFiles) / processingTime.Seconds()
+		bytesPerSec = float64(processedSize) / processingTime.Seconds()
+	}
+
+	smallestFile := atomic.LoadInt64(&c.smallestFile)
+	if smallestFile == math.MaxInt64 {
+		smallestFile = 0 // No files processed yet
+	}
+
+	// Copy maps to avoid race conditions
+	formatCounts := make(map[string]int64)
+	for k, v := range c.formatCounts {
+		formatCounts[k] = v
+	}
+
+	errorCounts := make(map[string]int64)
+	for k, v := range c.errorCounts {
+		errorCounts[k] = v
+	}
+
+	phaseTimings := make(map[string]time.Duration)
+	for k, v := range c.phaseTimings {
+		phaseTimings[k] = v
+	}
+
+	return ProcessingMetrics{
+		TotalFiles:         totalFiles,
+		ProcessedFiles:     processedFiles,
+		SkippedFiles:       atomic.LoadInt64(&c.skippedFiles),
+		ErrorFiles:         atomic.LoadInt64(&c.errorFiles),
+		LastUpdated:        c.lastUpdate,
+		TotalSize:          atomic.LoadInt64(&c.totalSize),
+		ProcessedSize:      processedSize,
+		AverageFileSize:    avgFileSize,
+		LargestFile:        atomic.LoadInt64(&c.largestFile),
+		SmallestFile:       smallestFile,
+		StartTime:          c.startTime,
+		ProcessingTime:     processingTime,
+		FilesPerSecond:     filesPerSec,
+		BytesPerSecond:     bytesPerSec,
+		PeakMemoryMB:       shared.BytesToMB(m.Sys),
+		CurrentMemoryMB:    shared.BytesToMB(m.Alloc),
+		GoroutineCount:     runtime.NumGoroutine(),
+		FormatCounts:       formatCounts,
+		ErrorCounts:        errorCounts,
+		MaxConcurrency:     int(atomic.LoadInt32(&c.peakConcurrency)),
+		CurrentConcurrency: atomic.LoadInt32(&c.concurrency),
+		PhaseTimings:       phaseTimings,
+	}
+}
+
+// Finish marks the end of processing and records final metrics.
+func (c *Collector) Finish() {
+	// Get current metrics first (which will acquire its own lock)
+	currentMetrics := c.CurrentMetrics()
+
+	// Then update final metrics with lock
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.metrics = currentMetrics
+	c.metrics.EndTime = time.Now()
+	c.metrics.ProcessingTime = c.metrics.EndTime.Sub(c.startTime)
+}
+
+// FinalMetrics returns the final metrics after processing is complete.
+func (c *Collector) FinalMetrics() ProcessingMetrics {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	return c.metrics
+}
+
+// GenerateReport generates a comprehensive profiling report.
+func (c *Collector) GenerateReport() ProfileReport {
+	metrics := c.CurrentMetrics()
+
+	// Generate format breakdown
+	formatBreakdown := make(map[string]FormatMetrics)
+	for format, count := range metrics.FormatCounts {
+		// For now, we don't have detailed per-format timing data
+		// This could be enhanced in the future
+		formatBreakdown[format] = FormatMetrics{
+			Count:                 count,
+			TotalSize:             0, // Would need to track this separately
+			AverageSize:           0,
+			TotalProcessingTime:   0,
+			AverageProcessingTime: 0,
+		}
+	}
+
+	// Generate phase breakdown
+	phaseBreakdown := make(map[string]PhaseMetrics)
+	totalPhaseTime := time.Duration(0)
+	for _, duration := range metrics.PhaseTimings {
+		totalPhaseTime += duration
+	}
+
+	for phase, duration := range metrics.PhaseTimings {
+		percentage := float64(0)
+		if totalPhaseTime > 0 {
+			percentage = float64(duration) / float64(totalPhaseTime) * 100
+		}
+
+		phaseBreakdown[phase] = PhaseMetrics{
+			TotalTime:   duration,
+			Count:       1, // For now, we track total time per phase
+			AverageTime: duration,
+			Percentage:  percentage,
+		}
+	}
+
+	// Calculate performance index (files per second normalized)
+	performanceIndex := metrics.FilesPerSecond
+	if performanceIndex > shared.MetricsPerformanceIndexCap {
+		performanceIndex = shared.MetricsPerformanceIndexCap // Cap for reasonable indexing
+	}
+
+	// Generate recommendations
+	recommendations := c.generateRecommendations(metrics)
+
+	return ProfileReport{
+		Summary:          metrics,
+		TopLargestFiles:  []FileInfo{}, // Would need separate tracking
+		TopSlowestFiles:  []FileInfo{}, // Would need separate tracking
+		FormatBreakdown:  formatBreakdown,
+		ErrorBreakdown:   metrics.ErrorCounts,
+		PhaseBreakdown:   phaseBreakdown,
+		PerformanceIndex: performanceIndex,
+		Recommendations:  recommendations,
+	}
+}
+
+// generateRecommendations generates performance recommendations based on metrics.
+func (c *Collector) generateRecommendations(metrics ProcessingMetrics) []string {
+	var recommendations []string
+
+	// Memory usage recommendations
+	if metrics.CurrentMemoryMB > 500 {
+		recommendations = append(recommendations, "Consider reducing memory usage - current usage is high (>500MB)")
+	}
+
+	// Processing rate recommendations
+	if metrics.FilesPerSecond < 10 && metrics.ProcessedFiles > 100 {
+		recommendations = append(recommendations,
+			"Processing rate is low (<10 files/sec) - consider optimizing file I/O")
+	}
+
+	// Error rate recommendations
+	if metrics.TotalFiles > 0 {
+		errorRate := float64(metrics.ErrorFiles) / float64(metrics.TotalFiles) * 100
+		if errorRate > 5 {
+			recommendations = append(recommendations, "High error rate (>5%) detected - review error logs")
+		}
+	}
+
+	// Concurrency recommendations
+	halfMaxConcurrency := shared.SafeIntToInt32WithDefault(metrics.MaxConcurrency/2, 1)
+	if halfMaxConcurrency > 0 && metrics.CurrentConcurrency < halfMaxConcurrency {
+		recommendations = append(recommendations,
+			"Low concurrency utilization - consider increasing concurrent processing")
+	}
+
+	// Large file recommendations
+	const largeSizeThreshold = 50 * shared.BytesPerMB // 50MB
+	if metrics.LargestFile > largeSizeThreshold {
+		recommendations = append(
+			recommendations,
+			"Very large files detected (>50MB) - consider streaming processing for large files",
+		)
+	}
+
+	return recommendations
+}
+
+// Reset resets all metrics to initial state.
+func (c *Collector) Reset() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	now := time.Now()
+	c.startTime = now
+	c.lastUpdate = now
+
+	atomic.StoreInt64(&c.totalFiles, 0)
+	atomic.StoreInt64(&c.processedFiles, 0)
+	atomic.StoreInt64(&c.skippedFiles, 0)
+	atomic.StoreInt64(&c.errorFiles, 0)
+	atomic.StoreInt64(&c.totalSize, 0)
+	atomic.StoreInt64(&c.processedSize, 0)
+	atomic.StoreInt64(&c.largestFile, 0)
+	atomic.StoreInt64(&c.smallestFile, math.MaxInt64)
+	atomic.StoreInt32(&c.concurrency, 0)
+
+	c.formatCounts = make(map[string]int64)
+	c.errorCounts = make(map[string]int64)
+	c.metrics = ProcessingMetrics{} // Clear final snapshot
+	c.phaseTimings = make(map[string]time.Duration)
+}
--- a/metrics/collector_test.go
+++ b/metrics/collector_test.go
@@ -0,0 +1,484 @@
+package metrics
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/ivuorinen/gibidify/shared"
+)
+
+func TestNewCollector(t *testing.T) {
+	collector := NewCollector()
+
+	if collector == nil {
+		t.Fatal("NewCollector returned nil")
+	}
+
+	if collector.formatCounts == nil {
+		t.Error("formatCounts map not initialized")
+	}
+
+	if collector.errorCounts == nil {
+		t.Error("errorCounts map not initialized")
+	}
+
+	if collector.phaseTimings == nil {
+		t.Error("phaseTimings map not initialized")
+	}
+
+	maxInt := shared.MetricsMaxInt64
+	if collector.smallestFile != maxInt {
+		t.Errorf("smallestFile not initialized correctly, got %d, want %d", collector.smallestFile, maxInt)
+	}
+}
+
+func TestRecordFileProcessedSuccess(t *testing.T) {
+	collector := NewCollector()
+
+	result := FileProcessingResult{
+		FilePath:       shared.TestPathTestFileGo,
+		FileSize:       1024,
+		Format:         "go",
+		ProcessingTime: 10 * time.Millisecond,
+		Success:        true,
+	}
+
+	collector.RecordFileProcessed(result)
+
+	metrics := collector.CurrentMetrics()
+
+	if metrics.TotalFiles != 1 {
+		t.Errorf(shared.TestFmtExpectedTotalFiles, metrics.TotalFiles)
+	}
+
+	if metrics.ProcessedFiles != 1 {
+		t.Errorf("Expected ProcessedFiles=1, got %d", metrics.ProcessedFiles)
+	}
+
+	if metrics.ProcessedSize != 1024 {
+		t.Errorf("Expected ProcessedSize=1024, got %d", metrics.ProcessedSize)
+	}
+
+	if metrics.FormatCounts["go"] != 1 {
+		t.Errorf("Expected go format count=1, got %d", metrics.FormatCounts["go"])
+	}
+
+	if metrics.LargestFile != 1024 {
+		t.Errorf("Expected LargestFile=1024, got %d", metrics.LargestFile)
+	}
+
+	if metrics.SmallestFile != 1024 {
+		t.Errorf("Expected SmallestFile=1024, got %d", metrics.SmallestFile)
+	}
+}
+
+func TestRecordFileProcessedError(t *testing.T) {
+	collector := NewCollector()
+
+	result := FileProcessingResult{
+		FilePath:       "/test/error.txt",
+		FileSize:       512,
+		Format:         "txt",
+		ProcessingTime: 5 * time.Millisecond,
+		Success:        false,
+		Error:          errors.New(shared.TestErrTestErrorMsg),
+	}
+
+	collector.RecordFileProcessed(result)
+
+	metrics := collector.CurrentMetrics()
+
+	if metrics.TotalFiles != 1 {
+		t.Errorf(shared.TestFmtExpectedTotalFiles, metrics.TotalFiles)
+	}
+
+	if metrics.ErrorFiles != 1 {
+		t.Errorf("Expected ErrorFiles=1, got %d", metrics.ErrorFiles)
+	}
+
+	if metrics.ProcessedFiles != 0 {
+		t.Errorf("Expected ProcessedFiles=0, got %d", metrics.ProcessedFiles)
+	}
+
+	if metrics.ErrorCounts[shared.TestErrTestErrorMsg] != 1 {
+		t.Errorf("Expected error count=1, got %d", metrics.ErrorCounts[shared.TestErrTestErrorMsg])
+	}
+}
+
+func TestRecordFileProcessedSkipped(t *testing.T) {
+	collector := NewCollector()
+
+	result := FileProcessingResult{
+		FilePath:   "/test/skipped.bin",
+		FileSize:   256,
+		Success:    false,
+		Skipped:    true,
+		SkipReason: "binary file",
+	}
+
+	collector.RecordFileProcessed(result)
+
+	metrics := collector.CurrentMetrics()
+
+	if metrics.TotalFiles != 1 {
+		t.Errorf(shared.TestFmtExpectedTotalFiles, metrics.TotalFiles)
+	}
+
+	if metrics.SkippedFiles != 1 {
+		t.Errorf("Expected SkippedFiles=1, got %d", metrics.SkippedFiles)
+	}
+
+	if metrics.ProcessedFiles != 0 {
+		t.Errorf("Expected ProcessedFiles=0, got %d", metrics.ProcessedFiles)
+	}
+}
+
+func TestRecordPhaseTime(t *testing.T) {
+	collector := NewCollector()
+
+	collector.RecordPhaseTime(shared.MetricsPhaseCollection, 100*time.Millisecond)
+	collector.RecordPhaseTime(shared.MetricsPhaseProcessing, 200*time.Millisecond)
+	collector.RecordPhaseTime(shared.MetricsPhaseCollection, 50*time.Millisecond) // Add to existing
+
+	metrics := collector.CurrentMetrics()
+
+	if metrics.PhaseTimings[shared.MetricsPhaseCollection] != 150*time.Millisecond {
+		t.Errorf("Expected collection phase time=150ms, got %v", metrics.PhaseTimings[shared.MetricsPhaseCollection])
+	}
+
+	if metrics.PhaseTimings[shared.MetricsPhaseProcessing] != 200*time.Millisecond {
+		t.Errorf("Expected processing phase time=200ms, got %v", metrics.PhaseTimings[shared.MetricsPhaseProcessing])
+	}
+}
+
+func TestConcurrencyTracking(t *testing.T) {
+	collector := NewCollector()
+
+	// Initial concurrency should be 0
+	metrics := collector.CurrentMetrics()
+	if metrics.CurrentConcurrency != 0 {
+		t.Errorf("Expected initial concurrency=0, got %d", metrics.CurrentConcurrency)
+	}
+
+	// Increment concurrency
+	collector.IncrementConcurrency()
+	collector.IncrementConcurrency()
+
+	metrics = collector.CurrentMetrics()
+	if metrics.CurrentConcurrency != 2 {
+		t.Errorf("Expected concurrency=2, got %d", metrics.CurrentConcurrency)
+	}
+
+	// Decrement concurrency
+	collector.DecrementConcurrency()
+
+	metrics = collector.CurrentMetrics()
+	if metrics.CurrentConcurrency != 1 {
+		t.Errorf("Expected concurrency=1, got %d", metrics.CurrentConcurrency)
+	}
+}
+
+func TestFileSizeTracking(t *testing.T) {
+	collector := NewCollector()
+
+	files := []FileProcessingResult{
+		{FilePath: "small.txt", FileSize: 100, Success: true, Format: "txt"},
+		{FilePath: "large.txt", FileSize: 5000, Success: true, Format: "txt"},
+		{FilePath: "medium.txt", FileSize: 1000, Success: true, Format: "txt"},
+	}
+
+	for _, file := range files {
+		collector.RecordFileProcessed(file)
+	}
+
+	metrics := collector.CurrentMetrics()
+
+	if metrics.LargestFile != 5000 {
+		t.Errorf("Expected LargestFile=5000, got %d", metrics.LargestFile)
+	}
+
+	if metrics.SmallestFile != 100 {
+		t.Errorf("Expected SmallestFile=100, got %d", metrics.SmallestFile)
+	}
+
+	expectedAvg := float64(6100) / 3 // (100 + 5000 + 1000) / 3
+	if math.Abs(metrics.AverageFileSize-expectedAvg) > 0.1 {
+		t.Errorf("Expected AverageFileSize=%.1f, got %.1f", expectedAvg, metrics.AverageFileSize)
+	}
+}
+
+func TestConcurrentAccess(t *testing.T) {
+	collector := NewCollector()
+
+	// Test concurrent file processing
+	var wg sync.WaitGroup
+	numGoroutines := 10
+	filesPerGoroutine := 100
+
+	wg.Add(numGoroutines)
+	for i := 0; i < numGoroutines; i++ {
+		go func(id int) {
+			defer wg.Done()
+			for j := 0; j < filesPerGoroutine; j++ {
+				result := FileProcessingResult{
+					FilePath: fmt.Sprintf("/test/file_%d_%d.go", id, j),
+					FileSize: int64(j + 1),
+					Success:  true,
+					Format:   "go",
+				}
+				collector.RecordFileProcessed(result)
+			}
+		}(i)
+	}
+
+	wg.Wait()
+
+	metrics := collector.CurrentMetrics()
+	expectedTotal := int64(numGoroutines * filesPerGoroutine)
+
+	if metrics.TotalFiles != expectedTotal {
+		t.Errorf("Expected TotalFiles=%d, got %d", expectedTotal, metrics.TotalFiles)
+	}
+
+	if metrics.ProcessedFiles != expectedTotal {
+		t.Errorf("Expected ProcessedFiles=%d, got %d", expectedTotal, metrics.ProcessedFiles)
+	}
+
+	if metrics.FormatCounts["go"] != expectedTotal {
+		t.Errorf("Expected go format count=%d, got %d", expectedTotal, metrics.FormatCounts["go"])
+	}
+}
+
+func TestFinishAndGetFinalMetrics(t *testing.T) {
+	collector := NewCollector()
+
+	// Process some files
+	result := FileProcessingResult{
+		FilePath: shared.TestPathTestFileGo,
+		FileSize: 1024,
+		Success:  true,
+		Format:   "go",
+	}
+	collector.RecordFileProcessed(result)
+
+	collector.Finish()
+
+	finalMetrics := collector.FinalMetrics()
+
+	if finalMetrics.EndTime.IsZero() {
+		t.Error("EndTime should be set after Finish()")
+	}
+
+	if finalMetrics.ProcessingTime < 0 {
+		t.Error("ProcessingTime should be >= 0 after Finish()")
+	}
+
+	if finalMetrics.ProcessedFiles != 1 {
+		t.Errorf("Expected ProcessedFiles=1, got %d", finalMetrics.ProcessedFiles)
+	}
+}
+
+func TestGenerateReport(t *testing.T) {
+	collector := NewCollector()
+
+	// Add some test data
+	files := []FileProcessingResult{
+		{FilePath: "file1.go", FileSize: 1000, Success: true, Format: "go"},
+		{FilePath: "file2.js", FileSize: 2000, Success: true, Format: "js"},
+		{FilePath: "file3.go", FileSize: 500, Success: false, Error: errors.New("syntax error")},
+	}
+
+	for _, file := range files {
+		collector.RecordFileProcessed(file)
+	}
+
+	collector.RecordPhaseTime(shared.MetricsPhaseCollection, 100*time.Millisecond)
+	collector.RecordPhaseTime(shared.MetricsPhaseProcessing, 200*time.Millisecond)
+
+	// Call Finish to mirror production usage where GenerateReport is called after processing completes
+	collector.Finish()
+
+	report := collector.GenerateReport()
+
+	if report.Summary.TotalFiles != 3 {
+		t.Errorf("Expected Summary.TotalFiles=3, got %d", report.Summary.TotalFiles)
+	}
+
+	if report.FormatBreakdown["go"].Count != 1 {
+		t.Errorf("Expected go format count=1, got %d", report.FormatBreakdown["go"].Count)
+	}
+
+	if report.FormatBreakdown["js"].Count != 1 {
+		t.Errorf("Expected js format count=1, got %d", report.FormatBreakdown["js"].Count)
+	}
+
+	if len(report.ErrorBreakdown) != 1 {
+		t.Errorf("Expected 1 error type, got %d", len(report.ErrorBreakdown))
+	}
+
+	if len(report.PhaseBreakdown) != 2 {
+		t.Errorf("Expected 2 phases, got %d", len(report.PhaseBreakdown))
+	}
+
+	if len(report.Recommendations) == 0 {
+		t.Error("Expected some recommendations")
+	}
+}
+
+func TestReset(t *testing.T) {
+	collector := NewCollector()
+
+	// Add some data
+	result := FileProcessingResult{
+		FilePath: shared.TestPathTestFileGo,
+		FileSize: 1024,
+		Success:  true,
+		Format:   "go",
+	}
+	collector.RecordFileProcessed(result)
+	collector.RecordPhaseTime(shared.MetricsPhaseCollection, 100*time.Millisecond)
+
+	// Verify data exists
+	metrics := collector.CurrentMetrics()
+	if metrics.TotalFiles == 0 {
+		t.Error("Expected data before reset")
+	}
+
+	// Reset
+	collector.Reset()
+
+	// Verify reset
+	metrics = collector.CurrentMetrics()
+	if metrics.TotalFiles != 0 {
+		t.Errorf("Expected TotalFiles=0 after reset, got %d", metrics.TotalFiles)
+	}
+
+	if metrics.ProcessedFiles != 0 {
+		t.Errorf("Expected ProcessedFiles=0 after reset, got %d", metrics.ProcessedFiles)
+	}
+
+	if len(metrics.FormatCounts) != 0 {
+		t.Errorf("Expected empty FormatCounts after reset, got %d entries", len(metrics.FormatCounts))
+	}
+
+	if len(metrics.PhaseTimings) != 0 {
+		t.Errorf("Expected empty PhaseTimings after reset, got %d entries", len(metrics.PhaseTimings))
+	}
+}
+
+// Benchmarks for collector hot paths
+
+func BenchmarkCollectorRecordFileProcessed(b *testing.B) {
+	collector := NewCollector()
+	result := FileProcessingResult{
+		FilePath:       shared.TestPathTestFileGo,
+		FileSize:       1024,
+		Format:         "go",
+		ProcessingTime: 10 * time.Millisecond,
+		Success:        true,
+	}
+
+	for b.Loop() {
+		collector.RecordFileProcessed(result)
+	}
+}
+
+func BenchmarkCollectorRecordFileProcessedConcurrent(b *testing.B) {
+	collector := NewCollector()
+	result := FileProcessingResult{
+		FilePath:       shared.TestPathTestFileGo,
+		FileSize:       1024,
+		Format:         "go",
+		ProcessingTime: 10 * time.Millisecond,
+		Success:        true,
+	}
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			collector.RecordFileProcessed(result)
+		}
+	})
+}
+
+func BenchmarkCollectorCurrentMetrics(b *testing.B) {
+	collector := NewCollector()
+
+	// Add some baseline data
+	for i := 0; i < 100; i++ {
+		result := FileProcessingResult{
+			FilePath: fmt.Sprintf("/test/file%d.go", i),
+			FileSize: int64(i * 100),
+			Format:   "go",
+			Success:  true,
+		}
+		collector.RecordFileProcessed(result)
+	}
+
+	b.ResetTimer()
+	for b.Loop() {
+		_ = collector.CurrentMetrics()
+	}
+}
+
+func BenchmarkCollectorGenerateReport(b *testing.B) {
+	benchmarks := []struct {
+		name  string
+		files int
+	}{
+		{"10files", 10},
+		{"100files", 100},
+		{"1000files", 1000},
+	}
+
+	for _, bm := range benchmarks {
+		b.Run(bm.name, func(b *testing.B) {
+			collector := NewCollector()
+
+			// Add test data
+			formats := []string{"go", "js", "py", "ts", "rs", "java", "cpp", "rb"}
+			for i := 0; i < bm.files; i++ {
+				var result FileProcessingResult
+				if i%10 == 0 {
+					result = FileProcessingResult{
+						FilePath: fmt.Sprintf("/test/error%d.go", i),
+						FileSize: 500,
+						Success:  false,
+						Error:    errors.New(shared.TestErrTestErrorMsg),
+					}
+				} else {
+					result = FileProcessingResult{
+						FilePath: fmt.Sprintf("/test/file%d.go", i),
+						FileSize: int64(i * 100),
+						Format:   formats[i%len(formats)],
+						Success:  true,
+					}
+				}
+				collector.RecordFileProcessed(result)
+			}
+
+			collector.RecordPhaseTime(shared.MetricsPhaseCollection, 50*time.Millisecond)
+			collector.RecordPhaseTime(shared.MetricsPhaseProcessing, 150*time.Millisecond)
+			collector.Finish()
+
+			b.ResetTimer()
+			for b.Loop() {
+				_ = collector.GenerateReport()
+			}
+		})
+	}
+}
+
+func BenchmarkCollectorConcurrencyTracking(b *testing.B) {
+	collector := NewCollector()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			collector.IncrementConcurrency()
+			collector.DecrementConcurrency()
+		}
+	})
+}
--- a/metrics/reporter.go
+++ b/metrics/reporter.go
@@ -0,0 +1,418 @@
+// Package metrics provides performance monitoring and reporting capabilities.
+package metrics
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+	"time"
+
+	"golang.org/x/text/cases"
+	"golang.org/x/text/language"
+
+	"github.com/ivuorinen/gibidify/shared"
+)
+
+// reportBuilder wraps strings.Builder with error accumulation for robust error handling.
+type reportBuilder struct {
+	b   *strings.Builder
+	err error
+}
+
+// newReportBuilder creates a new report builder.
+func newReportBuilder() *reportBuilder {
+	return &reportBuilder{b: &strings.Builder{}}
+}
+
+// writeString writes a string, accumulating any errors.
+func (rb *reportBuilder) writeString(s string) {
+	if rb.err != nil {
+		return
+	}
+	_, rb.err = rb.b.WriteString(s)
+}
+
+// fprintf formats and writes, accumulating any errors.
+func (rb *reportBuilder) fprintf(format string, args ...any) {
+	if rb.err != nil {
+		return
+	}
+	_, rb.err = fmt.Fprintf(rb.b, format, args...)
+}
+
+// String returns the accumulated string, or empty string if there was an error.
+func (rb *reportBuilder) String() string {
+	if rb.err != nil {
+		return ""
+	}
+	return rb.b.String()
+}
+
+// Reporter handles metrics reporting and formatting.
+type Reporter struct {
+	collector *Collector
+	verbose   bool
+	colors    bool
+}
+
+// NewReporter creates a new metrics reporter.
+func NewReporter(collector *Collector, verbose, colors bool) *Reporter {
+	return &Reporter{
+		collector: collector,
+		verbose:   verbose,
+		colors:    colors,
+	}
+}
+
+// ReportProgress provides a real-time progress report suitable for CLI output.
+func (r *Reporter) ReportProgress() string {
+	if r == nil || r.collector == nil {
+		return "no metrics available"
+	}
+
+	metrics := r.collector.CurrentMetrics()
+
+	if r.verbose {
+		return r.formatVerboseProgress(metrics)
+	}
+
+	return r.formatBasicProgress(metrics)
+}
+
+// ReportFinal provides a comprehensive final report.
+func (r *Reporter) ReportFinal() string {
+	if r == nil || r.collector == nil {
+		return ""
+	}
+
+	report := r.collector.GenerateReport()
+
+	if r.verbose {
+		return r.formatVerboseReport(report)
+	}
+
+	return r.formatBasicReport(report.Summary)
+}
+
+// formatBasicProgress formats basic progress information.
+func (r *Reporter) formatBasicProgress(metrics ProcessingMetrics) string {
+	b := newReportBuilder()
+
+	// Basic stats
+	b.writeString(fmt.Sprintf("Processed: %d files", metrics.ProcessedFiles))
+
+	if metrics.SkippedFiles > 0 {
+		b.writeString(fmt.Sprintf(", Skipped: %d", metrics.SkippedFiles))
+	}
+
+	if metrics.ErrorFiles > 0 {
+		if r.colors {
+			b.writeString(fmt.Sprintf(", \033[31mErrors: %d\033[0m", metrics.ErrorFiles))
+		} else {
+			b.writeString(fmt.Sprintf(", Errors: %d", metrics.ErrorFiles))
+		}
+	}
+
+	// Processing rate
+	if metrics.FilesPerSecond > 0 {
+		b.writeString(fmt.Sprintf(" (%.1f files/sec)", metrics.FilesPerSecond))
+	}
+
+	return b.String()
+}
+
+// formatVerboseProgress formats detailed progress information.
+func (r *Reporter) formatVerboseProgress(metrics ProcessingMetrics) string {
+	b := newReportBuilder()
+
+	// Header
+	b.writeString("=== Processing Statistics ===\n")
+
+	// File counts
+	b.writeString(
+		fmt.Sprintf(
+			"Files - Total: %d, Processed: %d, Skipped: %d, Errors: %d\n",
+			metrics.TotalFiles, metrics.ProcessedFiles, metrics.SkippedFiles, metrics.ErrorFiles,
+		),
+	)
+
+	// Size information
+	b.writeString(
+		fmt.Sprintf(
+			"Size - Processed: %s, Average: %s\n",
+			r.formatBytes(metrics.ProcessedSize),
+			r.formatBytes(int64(metrics.AverageFileSize)),
+		),
+	)
+
+	if metrics.LargestFile > 0 {
+		b.writeString(
+			fmt.Sprintf(
+				"File Size Range: %s - %s\n",
+				r.formatBytes(metrics.SmallestFile),
+				r.formatBytes(metrics.LargestFile),
+			),
+		)
+	}
+
+	// Performance
+	b.writeString(
+		fmt.Sprintf(
+			"Performance - Files/sec: %.1f, MB/sec: %.1f\n",
+			metrics.FilesPerSecond,
+			metrics.BytesPerSecond/float64(shared.BytesPerMB),
+		),
+	)
+
+	// Memory usage
+	b.writeString(
+		fmt.Sprintf(
+			"Memory - Current: %dMB, Peak: %dMB, Goroutines: %d\n",
+			metrics.CurrentMemoryMB, metrics.PeakMemoryMB, metrics.GoroutineCount,
+		),
+	)
+
+	// Concurrency
+	b.writeString(
+		fmt.Sprintf(
+			"Concurrency - Current: %d, Max: %d\n",
+			metrics.CurrentConcurrency, metrics.MaxConcurrency,
+		),
+	)
+
+	// Format breakdown (if available)
+	if len(metrics.FormatCounts) > 0 {
+		b.writeString("Format Breakdown:\n")
+		formats := r.sortedMapKeys(metrics.FormatCounts)
+		for _, format := range formats {
+			count := metrics.FormatCounts[format]
+			b.writeString(fmt.Sprintf(shared.MetricsFmtFileCount, format, count))
+		}
+	}
+
+	// Processing time
+	b.writeString(fmt.Sprintf(shared.MetricsFmtProcessingTime, metrics.ProcessingTime.Truncate(time.Millisecond)))
+
+	return b.String()
+}
+
+// formatBasicReport formats a basic final report.
+func (r *Reporter) formatBasicReport(metrics ProcessingMetrics) string {
+	b := newReportBuilder()
+
+	b.writeString("=== Processing Complete ===\n")
+	b.writeString(
+		fmt.Sprintf(
+			"Total Files: %d (Processed: %d, Skipped: %d, Errors: %d)\n",
+			metrics.TotalFiles, metrics.ProcessedFiles, metrics.SkippedFiles, metrics.ErrorFiles,
+		),
+	)
+
+	b.writeString(
+		fmt.Sprintf(
+			"Total Size: %s, Average Rate: %.1f files/sec\n",
+			r.formatBytes(metrics.ProcessedSize), metrics.FilesPerSecond,
+		),
+	)
+
+	b.writeString(fmt.Sprintf(shared.MetricsFmtProcessingTime, metrics.ProcessingTime.Truncate(time.Millisecond)))
+
+	return b.String()
+}
+
+// formatVerboseReport formats a comprehensive final report.
+func (r *Reporter) formatVerboseReport(report ProfileReport) string {
+	b := newReportBuilder()
+
+	b.writeString("=== Comprehensive Processing Report ===\n\n")
+
+	r.writeSummarySection(b, report)
+	r.writeFormatBreakdown(b, report)
+	r.writePhaseBreakdown(b, report)
+	r.writeErrorBreakdown(b, report)
+	r.writeResourceUsage(b, report)
+	r.writeFileSizeStats(b, report)
+	r.writeRecommendations(b, report)
+
+	return b.String()
+}
+
+// writeSummarySection writes the summary section of the verbose report.
+//
+//goland:noinspection ALL
+func (r *Reporter) writeSummarySection(b *reportBuilder, report ProfileReport) {
+	metrics := report.Summary
+
+	b.writeString("SUMMARY:\n")
+	b.fprintf(
+		"  Files: %d total (%d processed, %d skipped, %d errors)\n",
+		metrics.TotalFiles, metrics.ProcessedFiles, metrics.SkippedFiles, metrics.ErrorFiles,
+	)
+	b.fprintf(
+		"  Size: %s processed (avg: %s per file)\n",
+		r.formatBytes(metrics.ProcessedSize), r.formatBytes(int64(metrics.AverageFileSize)),
+	)
+	b.fprintf(
+		"  Time: %v (%.1f files/sec, %.1f MB/sec)\n",
+		metrics.ProcessingTime.Truncate(time.Millisecond),
+		metrics.FilesPerSecond, metrics.BytesPerSecond/float64(shared.BytesPerMB),
+	)
+	b.fprintf("  Performance Index: %.1f\n", report.PerformanceIndex)
+}
+
+// writeFormatBreakdown writes the format breakdown section.
+func (r *Reporter) writeFormatBreakdown(b *reportBuilder, report ProfileReport) {
+	if len(report.FormatBreakdown) == 0 {
+		return
+	}
+
+	b.writeString("\nFORMAT BREAKDOWN:\n")
+	formats := make([]string, 0, len(report.FormatBreakdown))
+	for format := range report.FormatBreakdown {
+		formats = append(formats, format)
+	}
+	sort.Strings(formats)
+
+	for _, format := range formats {
+		formatMetrics := report.FormatBreakdown[format]
+		b.fprintf(shared.MetricsFmtFileCount, format, formatMetrics.Count)
+	}
+}
+
+// writePhaseBreakdown writes the phase timing breakdown section.
+func (r *Reporter) writePhaseBreakdown(b *reportBuilder, report ProfileReport) {
+	if len(report.PhaseBreakdown) == 0 {
+		return
+	}
+
+	b.writeString("\nPHASE BREAKDOWN:\n")
+	phases := []string{
+		shared.MetricsPhaseCollection,
+		shared.MetricsPhaseProcessing,
+		shared.MetricsPhaseWriting,
+		shared.MetricsPhaseFinalize,
+	}
+	for _, phase := range phases {
+		if phaseMetrics, exists := report.PhaseBreakdown[phase]; exists {
+			b.fprintf(
+				"  %s: %v (%.1f%%)\n",
+				cases.Title(language.English).String(phase),
+				phaseMetrics.TotalTime.Truncate(time.Millisecond),
+				phaseMetrics.Percentage,
+			)
+		}
+	}
+}
+
+// writeErrorBreakdown writes the error breakdown section.
+func (r *Reporter) writeErrorBreakdown(b *reportBuilder, report ProfileReport) {
+	if len(report.ErrorBreakdown) == 0 {
+		return
+	}
+
+	b.writeString("\nERROR BREAKDOWN:\n")
+	errors := r.sortedMapKeys(report.ErrorBreakdown)
+	for _, errorType := range errors {
+		count := report.ErrorBreakdown[errorType]
+		b.fprintf("  %s: %d occurrences\n", errorType, count)
+	}
+}
+
+// writeResourceUsage writes the resource usage section.
+func (r *Reporter) writeResourceUsage(b *reportBuilder, report ProfileReport) {
+	metrics := report.Summary
+	b.writeString("\nRESOURCE USAGE:\n")
+	b.fprintf(
+		"  Memory: %dMB current, %dMB peak\n",
+		metrics.CurrentMemoryMB, metrics.PeakMemoryMB,
+	)
+	b.fprintf(
+		"  Concurrency: %d current, %d max, %d goroutines\n",
+		metrics.CurrentConcurrency, metrics.MaxConcurrency, metrics.GoroutineCount,
+	)
+}
+
+// writeFileSizeStats writes the file size statistics section.
+func (r *Reporter) writeFileSizeStats(b *reportBuilder, report ProfileReport) {
+	metrics := report.Summary
+	if metrics.ProcessedFiles == 0 {
+		return
+	}
+
+	b.writeString("\nFILE SIZE STATISTICS:\n")
+	b.fprintf(
+		"  Range: %s - %s\n",
+		r.formatBytes(metrics.SmallestFile), r.formatBytes(metrics.LargestFile),
+	)
+	b.fprintf("  Average: %s\n", r.formatBytes(int64(metrics.AverageFileSize)))
+}
+
+// writeRecommendations writes the recommendations section.
+func (r *Reporter) writeRecommendations(b *reportBuilder, report ProfileReport) {
+	if len(report.Recommendations) == 0 {
+		return
+	}
+
+	b.writeString("\nRECOMMENDATIONS:\n")
+	for i, rec := range report.Recommendations {
+		b.fprintf("  %d. %s\n", i+1, rec)
+	}
+}
+
+// formatBytes formats byte counts in human-readable format.
+func (r *Reporter) formatBytes(bytes int64) string {
+	if bytes == 0 {
+		return "0B"
+	}
+
+	if bytes < shared.BytesPerKB {
+		return fmt.Sprintf(shared.MetricsFmtBytesShort, bytes)
+	}
+
+	exp := 0
+	for n := bytes / shared.BytesPerKB; n >= shared.BytesPerKB; n /= shared.BytesPerKB {
+		exp++
+	}
+
+	divisor := int64(1)
+	for i := 0; i < exp+1; i++ {
+		divisor *= shared.BytesPerKB
+	}
+
+	return fmt.Sprintf(shared.MetricsFmtBytesHuman, float64(bytes)/float64(divisor), "KMGTPE"[exp])
+}
+
+// sortedMapKeys returns sorted keys from a map for consistent output.
+func (r *Reporter) sortedMapKeys(m map[string]int64) []string {
+	keys := make([]string, 0, len(m))
+	for k := range m {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
+	return keys
+}
+
+// QuickStats returns a quick one-line status suitable for progress bars.
+func (r *Reporter) QuickStats() string {
+	if r == nil || r.collector == nil {
+		return "0/0 files"
+	}
+
+	metrics := r.collector.CurrentMetrics()
+
+	status := fmt.Sprintf("%d/%d files", metrics.ProcessedFiles, metrics.TotalFiles)
+	if metrics.FilesPerSecond > 0 {
+		status += fmt.Sprintf(" (%.1f/s)", metrics.FilesPerSecond)
+	}
+
+	if metrics.ErrorFiles > 0 {
+		if r.colors {
+			status += fmt.Sprintf(" \033[31m%d errors\033[0m", metrics.ErrorFiles)
+		} else {
+			status += fmt.Sprintf(" %d errors", metrics.ErrorFiles)
+		}
+	}
+
+	return status
+}
--- a/metrics/reporter_test.go
+++ b/metrics/reporter_test.go
@@ -0,0 +1,518 @@
+package metrics
+
+import (
+	"errors"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/ivuorinen/gibidify/shared"
+)
+
+func TestNewReporter(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, true, true)
+
+	if reporter == nil {
+		t.Fatal("NewReporter returned nil")
+	}
+
+	if reporter.collector != collector {
+		t.Error("Reporter collector not set correctly")
+	}
+
+	if !reporter.verbose {
+		t.Error("Verbose flag not set correctly")
+	}
+
+	if !reporter.colors {
+		t.Error("Colors flag not set correctly")
+	}
+}
+
+func TestReportProgressBasic(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, false, false)
+
+	// Add some test data
+	result := FileProcessingResult{
+		FilePath: shared.TestPathTestFileGo,
+		FileSize: 1024,
+		Success:  true,
+		Format:   "go",
+	}
+	collector.RecordFileProcessed(result)
+
+	// Wait to ensure FilesPerSecond calculation
+	time.Sleep(10 * time.Millisecond)
+
+	progress := reporter.ReportProgress()
+
+	if !strings.Contains(progress, "Processed: 1 files") {
+		t.Errorf("Expected progress to contain processed files count, got: %s", progress)
+	}
+
+	if !strings.Contains(progress, "files/sec") {
+		t.Errorf("Expected progress to contain files/sec, got: %s", progress)
+	}
+}
+
+func TestReportProgressWithErrors(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, false, false)
+
+	// Add successful file
+	successResult := FileProcessingResult{
+		FilePath: "/test/success.go",
+		FileSize: 1024,
+		Success:  true,
+		Format:   "go",
+	}
+	collector.RecordFileProcessed(successResult)
+
+	// Add error file
+	errorResult := FileProcessingResult{
+		FilePath: shared.TestPathTestErrorGo,
+		FileSize: 512,
+		Success:  false,
+		Error:    errors.New(shared.TestErrSyntaxError),
+	}
+	collector.RecordFileProcessed(errorResult)
+
+	progress := reporter.ReportProgress()
+
+	if !strings.Contains(progress, "Processed: 1 files") {
+		t.Errorf("Expected progress to contain processed files count, got: %s", progress)
+	}
+
+	if !strings.Contains(progress, "Errors: 1") {
+		t.Errorf("Expected progress to contain error count, got: %s", progress)
+	}
+}
+
+func TestReportProgressWithSkipped(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, false, false)
+
+	// Add successful file
+	successResult := FileProcessingResult{
+		FilePath: "/test/success.go",
+		FileSize: 1024,
+		Success:  true,
+		Format:   "go",
+	}
+	collector.RecordFileProcessed(successResult)
+
+	// Add skipped file
+	skippedResult := FileProcessingResult{
+		FilePath:   "/test/binary.exe",
+		FileSize:   2048,
+		Success:    false,
+		Skipped:    true,
+		SkipReason: "binary file",
+	}
+	collector.RecordFileProcessed(skippedResult)
+
+	progress := reporter.ReportProgress()
+
+	if !strings.Contains(progress, "Skipped: 1") {
+		t.Errorf("Expected progress to contain skipped count, got: %s", progress)
+	}
+}
+
+func TestReportProgressVerbose(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, true, false)
+
+	// Add test data
+	files := []FileProcessingResult{
+		{FilePath: shared.TestPathTestFile1Go, FileSize: 1000, Success: true, Format: "go"},
+		{FilePath: shared.TestPathTestFile2JS, FileSize: 2000, Success: true, Format: "js"},
+		{FilePath: "/test/file3.py", FileSize: 1500, Success: true, Format: "py"},
+	}
+
+	for _, file := range files {
+		collector.RecordFileProcessed(file)
+	}
+
+	collector.RecordPhaseTime(shared.MetricsPhaseCollection, 50*time.Millisecond)
+	collector.RecordPhaseTime(shared.MetricsPhaseProcessing, 100*time.Millisecond)
+
+	progress := reporter.ReportProgress()
+
+	// Check for verbose content
+	if !strings.Contains(progress, "=== Processing Statistics ===") {
+		t.Error("Expected verbose header not found")
+	}
+
+	if !strings.Contains(progress, "Format Breakdown:") {
+		t.Error("Expected format breakdown not found")
+	}
+
+	if !strings.Contains(progress, "go: 1 files") {
+		t.Error("Expected go format count not found")
+	}
+
+	if !strings.Contains(progress, "Memory - Current:") {
+		t.Error("Expected memory information not found")
+	}
+
+	if !strings.Contains(progress, "Concurrency - Current:") {
+		t.Error("Expected concurrency information not found")
+	}
+}
+
+func TestReportFinalBasic(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, false, false)
+
+	// Add test data
+	files := []FileProcessingResult{
+		{FilePath: shared.TestPathTestFile1Go, FileSize: 1000, Success: true, Format: "go"},
+		{FilePath: shared.TestPathTestFile2JS, FileSize: 2000, Success: true, Format: "js"},
+		{
+			FilePath: shared.TestPathTestErrorPy,
+			FileSize: 500,
+			Success:  false,
+			Error:    errors.New(shared.TestErrSyntaxError),
+		},
+	}
+
+	for _, file := range files {
+		collector.RecordFileProcessed(file)
+	}
+
+	collector.Finish()
+	final := reporter.ReportFinal()
+
+	if !strings.Contains(final, "=== Processing Complete ===") {
+		t.Error("Expected completion header not found")
+	}
+
+	if !strings.Contains(final, "Total Files: 3") {
+		t.Error("Expected total files count not found")
+	}
+
+	if !strings.Contains(final, "Processed: 2") {
+		t.Error("Expected processed files count not found")
+	}
+
+	if !strings.Contains(final, "Errors: 1") {
+		t.Error("Expected error count not found")
+	}
+}
+
+func TestReportFinalVerbose(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, true, false)
+
+	// Add comprehensive test data
+	files := []FileProcessingResult{
+		{FilePath: shared.TestPathTestFile1Go, FileSize: 1000, Success: true, Format: "go"},
+		{FilePath: "/test/file2.go", FileSize: 2000, Success: true, Format: "go"},
+		{FilePath: "/test/file3.js", FileSize: 1500, Success: true, Format: "js"},
+		{
+			FilePath: shared.TestPathTestErrorPy,
+			FileSize: 500,
+			Success:  false,
+			Error:    errors.New(shared.TestErrSyntaxError),
+		},
+		{FilePath: "/test/skip.bin", FileSize: 3000, Success: false, Skipped: true, SkipReason: "binary"},
+	}
+
+	for _, file := range files {
+		collector.RecordFileProcessed(file)
+	}
+
+	collector.RecordPhaseTime(shared.MetricsPhaseCollection, 50*time.Millisecond)
+	collector.RecordPhaseTime(shared.MetricsPhaseProcessing, 150*time.Millisecond)
+	collector.RecordPhaseTime(shared.MetricsPhaseWriting, 25*time.Millisecond)
+
+	collector.Finish()
+	final := reporter.ReportFinal()
+
+	// Check comprehensive report sections
+	if !strings.Contains(final, "=== Comprehensive Processing Report ===") {
+		t.Error("Expected comprehensive header not found")
+	}
+
+	if !strings.Contains(final, "SUMMARY:") {
+		t.Error("Expected summary section not found")
+	}
+
+	if !strings.Contains(final, "FORMAT BREAKDOWN:") {
+		t.Error("Expected format breakdown section not found")
+	}
+
+	if !strings.Contains(final, "PHASE BREAKDOWN:") {
+		t.Error("Expected phase breakdown section not found")
+	}
+
+	if !strings.Contains(final, "ERROR BREAKDOWN:") {
+		t.Error("Expected error breakdown section not found")
+	}
+
+	if !strings.Contains(final, "RESOURCE USAGE:") {
+		t.Error("Expected resource usage section not found")
+	}
+
+	if !strings.Contains(final, "FILE SIZE STATISTICS:") {
+		t.Error("Expected file size statistics section not found")
+	}
+
+	if !strings.Contains(final, "RECOMMENDATIONS:") {
+		t.Error("Expected recommendations section not found")
+	}
+
+	// Check specific values
+	if !strings.Contains(final, "go: 2 files") {
+		t.Error("Expected go format count not found")
+	}
+
+	if !strings.Contains(final, "js: 1 files") {
+		t.Error("Expected js format count not found")
+	}
+
+	if !strings.Contains(final, "syntax error: 1 occurrences") {
+		t.Error("Expected error count not found")
+	}
+}
+
+func TestFormatBytes(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, false, false)
+
+	testCases := []struct {
+		bytes    int64
+		expected string
+	}{
+		{0, "0B"},
+		{512, "512B"},
+		{1024, "1.0KB"},
+		{1536, "1.5KB"},
+		{1024 * 1024, "1.0MB"},
+		{1024 * 1024 * 1024, "1.0GB"},
+		{5 * 1024 * 1024, "5.0MB"},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.expected, func(t *testing.T) {
+			result := reporter.formatBytes(tc.bytes)
+			if result != tc.expected {
+				t.Errorf("formatBytes(%d) = %s, want %s", tc.bytes, result, tc.expected)
+			}
+		})
+	}
+}
+
+func TestGetQuickStats(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, false, false)
+
+	// Add test data
+	files := []FileProcessingResult{
+		{FilePath: shared.TestPathTestFile1Go, FileSize: 1000, Success: true, Format: "go"},
+		{FilePath: shared.TestPathTestFile2JS, FileSize: 2000, Success: true, Format: "js"},
+		{
+			FilePath: shared.TestPathTestErrorPy,
+			FileSize: 500,
+			Success:  false,
+			Error:    errors.New(shared.TestErrTestErrorMsg),
+		},
+	}
+
+	for _, file := range files {
+		collector.RecordFileProcessed(file)
+	}
+
+	// Wait to ensure rate calculation
+	time.Sleep(10 * time.Millisecond)
+
+	stats := reporter.QuickStats()
+
+	if !strings.Contains(stats, "2/3 files") {
+		t.Errorf("Expected processed/total files, got: %s", stats)
+	}
+
+	if !strings.Contains(stats, "/s)") {
+		t.Errorf("Expected rate information, got: %s", stats)
+	}
+
+	if !strings.Contains(stats, "1 errors") {
+		t.Errorf("Expected error count, got: %s", stats)
+	}
+}
+
+func TestGetQuickStatsWithColors(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, false, true)
+
+	// Add error file
+	errorResult := FileProcessingResult{
+		FilePath: shared.TestPathTestErrorGo,
+		FileSize: 512,
+		Success:  false,
+		Error:    errors.New(shared.TestErrTestErrorMsg),
+	}
+	collector.RecordFileProcessed(errorResult)
+
+	stats := reporter.QuickStats()
+
+	// Should contain ANSI color codes for errors
+	if !strings.Contains(stats, "\033[31m") {
+		t.Errorf("Expected color codes for errors, got: %s", stats)
+	}
+
+	if !strings.Contains(stats, "\033[0m") {
+		t.Errorf("Expected color reset code, got: %s", stats)
+	}
+}
+
+func TestReporterEmptyData(t *testing.T) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, false, false)
+
+	// Test with no data
+	progress := reporter.ReportProgress()
+	if !strings.Contains(progress, "Processed: 0 files") {
+		t.Errorf("Expected empty progress report, got: %s", progress)
+	}
+
+	final := reporter.ReportFinal()
+	if !strings.Contains(final, "Total Files: 0") {
+		t.Errorf("Expected empty final report, got: %s", final)
+	}
+
+	stats := reporter.QuickStats()
+	if !strings.Contains(stats, "0/0 files") {
+		t.Errorf("Expected empty stats, got: %s", stats)
+	}
+}
+
+// setupBenchmarkReporter creates a collector with test data for benchmarking.
+func setupBenchmarkReporter(fileCount int, verbose, colors bool) *Reporter {
+	collector := NewCollector()
+
+	// Add a mix of successful, failed, and skipped files
+	for i := 0; i < fileCount; i++ {
+		var result FileProcessingResult
+		switch i % 10 {
+		case 0:
+			result = FileProcessingResult{
+				FilePath: shared.TestPathTestErrorGo,
+				FileSize: 500,
+				Success:  false,
+				Error:    errors.New(shared.TestErrTestErrorMsg),
+			}
+		case 1:
+			result = FileProcessingResult{
+				FilePath:   "/test/binary.exe",
+				FileSize:   2048,
+				Success:    false,
+				Skipped:    true,
+				SkipReason: "binary file",
+			}
+		default:
+			formats := []string{"go", "js", "py", "ts", "rs", "java", "cpp", "rb"}
+			result = FileProcessingResult{
+				FilePath: shared.TestPathTestFileGo,
+				FileSize: int64(1000 + i*100),
+				Success:  true,
+				Format:   formats[i%len(formats)],
+			}
+		}
+		collector.RecordFileProcessed(result)
+	}
+
+	collector.RecordPhaseTime(shared.MetricsPhaseCollection, 50*time.Millisecond)
+	collector.RecordPhaseTime(shared.MetricsPhaseProcessing, 150*time.Millisecond)
+	collector.RecordPhaseTime(shared.MetricsPhaseWriting, 25*time.Millisecond)
+
+	return NewReporter(collector, verbose, colors)
+}
+
+func BenchmarkReporterQuickStats(b *testing.B) {
+	benchmarks := []struct {
+		name  string
+		files int
+	}{
+		{"10files", 10},
+		{"100files", 100},
+		{"1000files", 1000},
+	}
+
+	for _, bm := range benchmarks {
+		b.Run(bm.name, func(b *testing.B) {
+			reporter := setupBenchmarkReporter(bm.files, false, false)
+			b.ResetTimer()
+
+			for b.Loop() {
+				_ = reporter.QuickStats()
+			}
+		})
+	}
+}
+
+func BenchmarkReporterReportProgress(b *testing.B) {
+	benchmarks := []struct {
+		name    string
+		files   int
+		verbose bool
+	}{
+		{"basic_10files", 10, false},
+		{"basic_100files", 100, false},
+		{"verbose_10files", 10, true},
+		{"verbose_100files", 100, true},
+	}
+
+	for _, bm := range benchmarks {
+		b.Run(bm.name, func(b *testing.B) {
+			reporter := setupBenchmarkReporter(bm.files, bm.verbose, false)
+			b.ResetTimer()
+
+			for b.Loop() {
+				_ = reporter.ReportProgress()
+			}
+		})
+	}
+}
+
+func BenchmarkReporterReportFinal(b *testing.B) {
+	benchmarks := []struct {
+		name    string
+		files   int
+		verbose bool
+	}{
+		{"basic_10files", 10, false},
+		{"basic_100files", 100, false},
+		{"basic_1000files", 1000, false},
+		{"verbose_10files", 10, true},
+		{"verbose_100files", 100, true},
+		{"verbose_1000files", 1000, true},
+	}
+
+	for _, bm := range benchmarks {
+		b.Run(bm.name, func(b *testing.B) {
+			reporter := setupBenchmarkReporter(bm.files, bm.verbose, false)
+			reporter.collector.Finish()
+			b.ResetTimer()
+
+			for b.Loop() {
+				_ = reporter.ReportFinal()
+			}
+		})
+	}
+}
+
+func BenchmarkFormatBytes(b *testing.B) {
+	collector := NewCollector()
+	reporter := NewReporter(collector, false, false)
+
+	sizes := []int64{0, 512, 1024, 1024 * 1024, 1024 * 1024 * 1024}
+
+	for b.Loop() {
+		for _, size := range sizes {
+			_ = reporter.formatBytes(size)
+		}
+	}
+}
--- a/metrics/types.go
+++ b/metrics/types.go
@@ -0,0 +1,134 @@
+// Package metrics provides comprehensive processing statistics and profiling capabilities.
+package metrics
+
+import (
+	"sync"
+	"time"
+)
+
+// ProcessingMetrics provides comprehensive processing statistics.
+type ProcessingMetrics struct {
+	// File processing metrics
+	TotalFiles     int64     `json:"total_files"`
+	ProcessedFiles int64     `json:"processed_files"`
+	SkippedFiles   int64     `json:"skipped_files"`
+	ErrorFiles     int64     `json:"error_files"`
+	LastUpdated    time.Time `json:"last_updated"`
+
+	// Size metrics
+	TotalSize       int64   `json:"total_size_bytes"`
+	ProcessedSize   int64   `json:"processed_size_bytes"`
+	AverageFileSize float64 `json:"average_file_size_bytes"`
+	LargestFile     int64   `json:"largest_file_bytes"`
+	SmallestFile    int64   `json:"smallest_file_bytes"`
+
+	// Performance metrics
+	StartTime      time.Time     `json:"start_time"`
+	EndTime        time.Time     `json:"end_time,omitempty"`
+	ProcessingTime time.Duration `json:"processing_duration"`
+	FilesPerSecond float64       `json:"files_per_second"`
+	BytesPerSecond float64       `json:"bytes_per_second"`
+
+	// Memory and resource metrics
+	PeakMemoryMB    int64 `json:"peak_memory_mb"`
+	CurrentMemoryMB int64 `json:"current_memory_mb"`
+	GoroutineCount  int   `json:"goroutine_count"`
+
+	// Format specific metrics
+	FormatCounts map[string]int64 `json:"format_counts"`
+	ErrorCounts  map[string]int64 `json:"error_counts"`
+
+	// Concurrency metrics
+	MaxConcurrency     int   `json:"max_concurrency"`
+	CurrentConcurrency int32 `json:"current_concurrency"`
+
+	// Phase timings
+	PhaseTimings map[string]time.Duration `json:"phase_timings"`
+}
+
+// Collector collects and manages processing metrics.
+type Collector struct {
+	metrics    ProcessingMetrics
+	mu         sync.RWMutex
+	startTime  time.Time
+	lastUpdate time.Time
+
+	// Atomic counters for high-concurrency access
+	totalFiles     int64
+	processedFiles int64
+	skippedFiles   int64
+	errorFiles     int64
+	totalSize      int64
+	processedSize  int64
+	largestFile    int64
+	smallestFile   int64 // Using max int64 as initial value to track minimum
+
+	// Concurrency tracking
+	concurrency     int32
+	peakConcurrency int32
+
+	// Format and error tracking with mutex protection
+	formatCounts map[string]int64
+	errorCounts  map[string]int64
+
+	// Phase timing tracking
+	phaseTimings map[string]time.Duration
+}
+
+// FileProcessingResult represents the result of processing a single file.
+type FileProcessingResult struct {
+	FilePath       string        `json:"file_path"`
+	FileSize       int64         `json:"file_size"`
+	Format         string        `json:"format"`
+	ProcessingTime time.Duration `json:"processing_time"`
+	Success        bool          `json:"success"`
+	Error          error         `json:"error,omitempty"`
+	Skipped        bool          `json:"skipped"`
+	SkipReason     string        `json:"skip_reason,omitempty"`
+}
+
+// ProfileReport represents a comprehensive profiling report.
+type ProfileReport struct {
+	Summary          ProcessingMetrics        `json:"summary"`
+	TopLargestFiles  []FileInfo               `json:"top_largest_files"`
+	TopSlowestFiles  []FileInfo               `json:"top_slowest_files"`
+	FormatBreakdown  map[string]FormatMetrics `json:"format_breakdown"`
+	ErrorBreakdown   map[string]int64         `json:"error_breakdown"`
+	HourlyStats      []HourlyProcessingStats  `json:"hourly_stats,omitempty"`
+	PhaseBreakdown   map[string]PhaseMetrics  `json:"phase_breakdown"`
+	PerformanceIndex float64                  `json:"performance_index"`
+	Recommendations  []string                 `json:"recommendations"`
+}
+
+// FileInfo represents information about a processed file.
+type FileInfo struct {
+	Path           string        `json:"path"`
+	Size           int64         `json:"size"`
+	ProcessingTime time.Duration `json:"processing_time"`
+	Format         string        `json:"format"`
+}
+
+// FormatMetrics represents metrics for a specific file format.
+type FormatMetrics struct {
+	Count                 int64         `json:"count"`
+	TotalSize             int64         `json:"total_size"`
+	AverageSize           float64       `json:"average_size"`
+	TotalProcessingTime   time.Duration `json:"total_processing_time"`
+	AverageProcessingTime time.Duration `json:"average_processing_time"`
+}
+
+// HourlyProcessingStats represents processing statistics for an hour.
+type HourlyProcessingStats struct {
+	Hour           time.Time `json:"hour"`
+	FilesProcessed int64     `json:"files_processed"`
+	BytesProcessed int64     `json:"bytes_processed"`
+	AverageRate    float64   `json:"average_rate"`
+}
+
+// PhaseMetrics represents timing metrics for processing phases.
+type PhaseMetrics struct {
+	TotalTime   time.Duration `json:"total_time"`
+	Count       int64         `json:"count"`
+	AverageTime time.Duration `json:"average_time"`
+	Percentage  float64       `json:"percentage_of_total"`
+}