feat: update go to 1.25, add permissions and envs (#49)

* chore(ci): update go to 1.25, add permissions and envs
* fix(ci): update pr-lint.yml
* chore: update go, fix linting
* fix: tests and linting
* fix(lint): lint fixes, renovate should now pass
* fix: updates, security upgrades
* chore: workflow updates, lint
* fix: more lint, checkmake, and other fixes
* fix: more lint, convert scripts to POSIX compliant
* fix: simplify codeql workflow
* tests: increase test coverage, fix found issues
* fix(lint): editorconfig checking, add to linters
* fix(lint): shellcheck, add to linters
* fix(lint): apply cr comment suggestions
* fix(ci): remove step-security/harden-runner
* fix(lint): remove duplication, apply cr fixes
* fix(ci): tests in CI/CD pipeline
* chore(lint): deduplication of strings
* fix(lint): apply cr comment suggestions
* fix(ci): actionlint
* fix(lint): apply cr comment suggestions
* chore: lint, add deps management
This commit is contained in:
2025-10-10 12:14:42 +03:00
committed by GitHub
parent 958f5952a0
commit 3f65b813bd
100 changed files with 6997 additions and 1225 deletions

View File

@@ -3,6 +3,7 @@ package fileproc
import (
"context"
"math"
"runtime"
"sync"
"sync/atomic"
@@ -11,6 +12,7 @@ import (
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/gibidiutils"
)
// BackpressureManager manages memory usage and applies back-pressure when needed.
@@ -59,21 +61,22 @@ func (bp *BackpressureManager) CreateChannels() (chan string, chan WriteRequest)
}
// ShouldApplyBackpressure checks if back-pressure should be applied.
func (bp *BackpressureManager) ShouldApplyBackpressure(ctx context.Context) bool {
func (bp *BackpressureManager) ShouldApplyBackpressure(_ context.Context) bool {
if !bp.enabled {
return false
}
// Check if we should evaluate memory usage
filesProcessed := atomic.AddInt64(&bp.filesProcessed, 1)
if int(filesProcessed)%bp.memoryCheckInterval != 0 {
// Avoid divide by zero - if interval is 0, check every file
if bp.memoryCheckInterval > 0 && int(filesProcessed)%bp.memoryCheckInterval != 0 {
return false
}
// Get current memory usage
var m runtime.MemStats
runtime.ReadMemStats(&m)
currentMemory := int64(m.Alloc)
currentMemory := gibidiutils.SafeUint64ToInt64WithDefault(m.Alloc, math.MaxInt64)
bp.mu.Lock()
defer bp.mu.Unlock()
@@ -133,7 +136,7 @@ func (bp *BackpressureManager) GetStats() BackpressureStats {
return BackpressureStats{
Enabled: bp.enabled,
FilesProcessed: atomic.LoadInt64(&bp.filesProcessed),
CurrentMemoryUsage: int64(m.Alloc),
CurrentMemoryUsage: gibidiutils.SafeUint64ToInt64WithDefault(m.Alloc, math.MaxInt64),
MaxMemoryUsage: bp.maxMemoryUsage,
MemoryWarningActive: bp.memoryWarningLogged,
LastMemoryCheck: bp.lastMemoryCheck,
@@ -160,8 +163,8 @@ func (bp *BackpressureManager) WaitForChannelSpace(ctx context.Context, fileCh c
return
}
// Check if file channel is getting full (>90% capacity)
if len(fileCh) > bp.maxPendingFiles*9/10 {
// Check if file channel is getting full (>=90% capacity)
if bp.maxPendingFiles > 0 && len(fileCh) >= bp.maxPendingFiles*9/10 {
logrus.Debugf("File channel is %d%% full, waiting for space", len(fileCh)*100/bp.maxPendingFiles)
// Wait a bit for the channel to drain
@@ -172,8 +175,8 @@ func (bp *BackpressureManager) WaitForChannelSpace(ctx context.Context, fileCh c
}
}
// Check if write channel is getting full (>90% capacity)
if len(writeCh) > bp.maxPendingWrites*9/10 {
// Check if write channel is getting full (>=90% capacity)
if bp.maxPendingWrites > 0 && len(writeCh) >= bp.maxPendingWrites*9/10 {
logrus.Debugf("Write channel is %d%% full, waiting for space", len(writeCh)*100/bp.maxPendingWrites)
// Wait a bit for the channel to drain

View File

@@ -0,0 +1,177 @@
package fileproc
import (
"context"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestBackpressureManagerShouldApplyBackpressure(t *testing.T) {
ctx := context.Background()
t.Run("returns false when disabled", func(t *testing.T) {
bm := NewBackpressureManager()
bm.enabled = false
shouldApply := bm.ShouldApplyBackpressure(ctx)
assert.False(t, shouldApply)
})
t.Run("checks memory at intervals", func(_ *testing.T) {
bm := NewBackpressureManager()
bm.enabled = true
bm.memoryCheckInterval = 10
// Should not check memory on most calls
for i := 1; i < 10; i++ {
shouldApply := bm.ShouldApplyBackpressure(ctx)
// Can't predict result, but shouldn't panic
_ = shouldApply
}
// Should check memory on 10th call
shouldApply := bm.ShouldApplyBackpressure(ctx)
// Result depends on actual memory usage
_ = shouldApply
})
t.Run("detects high memory usage", func(t *testing.T) {
bm := NewBackpressureManager()
bm.enabled = true
bm.memoryCheckInterval = 1
bm.maxMemoryUsage = 1 // Set very low limit to trigger
shouldApply := bm.ShouldApplyBackpressure(ctx)
// Should detect high memory usage
assert.True(t, shouldApply)
})
}
func TestBackpressureManagerApplyBackpressure(t *testing.T) {
ctx := context.Background()
t.Run("does nothing when disabled", func(t *testing.T) {
bm := NewBackpressureManager()
bm.enabled = false
// Use a channel to verify the function returns quickly
done := make(chan struct{})
go func() {
bm.ApplyBackpressure(ctx)
close(done)
}()
// Should complete quickly when disabled
select {
case <-done:
// Success - function returned
case <-time.After(50 * time.Millisecond):
t.Fatal("ApplyBackpressure did not return quickly when disabled")
}
})
t.Run("applies delay when enabled", func(t *testing.T) {
bm := NewBackpressureManager()
bm.enabled = true
// Use a channel to verify the function blocks for some time
done := make(chan struct{})
started := make(chan struct{})
go func() {
close(started)
bm.ApplyBackpressure(ctx)
close(done)
}()
// Wait for goroutine to start
<-started
// Should NOT complete immediately - verify it blocks for at least 5ms
select {
case <-done:
t.Fatal("ApplyBackpressure returned too quickly when enabled")
case <-time.After(5 * time.Millisecond):
// Good - it's blocking as expected
}
// Now wait for it to complete (should finish within reasonable time)
select {
case <-done:
// Success - function eventually returned
case <-time.After(500 * time.Millisecond):
t.Fatal("ApplyBackpressure did not complete within timeout")
}
})
t.Run("respects context cancellation", func(t *testing.T) {
bm := NewBackpressureManager()
bm.enabled = true
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately
start := time.Now()
bm.ApplyBackpressure(ctx)
duration := time.Since(start)
// Should return quickly when context is cancelled
assert.Less(t, duration, 5*time.Millisecond)
})
}
func TestBackpressureManagerLogBackpressureInfo(t *testing.T) {
bm := NewBackpressureManager()
bm.enabled = true // Ensure enabled so filesProcessed is incremented
// Apply some operations
ctx := context.Background()
bm.ShouldApplyBackpressure(ctx)
bm.ApplyBackpressure(ctx)
// This should not panic
bm.LogBackpressureInfo()
stats := bm.GetStats()
assert.Greater(t, stats.FilesProcessed, int64(0))
}
func TestBackpressureManagerMemoryLimiting(t *testing.T) {
t.Run("triggers on low memory limit", func(t *testing.T) {
bm := NewBackpressureManager()
bm.enabled = true
bm.memoryCheckInterval = 1 // Check every file
bm.maxMemoryUsage = 1 // Very low limit to guarantee trigger
ctx := context.Background()
// Should detect memory over limit
shouldApply := bm.ShouldApplyBackpressure(ctx)
assert.True(t, shouldApply)
stats := bm.GetStats()
assert.True(t, stats.MemoryWarningActive)
})
t.Run("resets warning when memory normalizes", func(t *testing.T) {
bm := NewBackpressureManager()
bm.enabled = true
bm.memoryCheckInterval = 1
// Simulate warning by first triggering high memory usage
bm.maxMemoryUsage = 1 // Very low to trigger warning
ctx := context.Background()
_ = bm.ShouldApplyBackpressure(ctx)
stats := bm.GetStats()
assert.True(t, stats.MemoryWarningActive)
// Now set high limit so we're under it
bm.maxMemoryUsage = 1024 * 1024 * 1024 * 10 // 10GB
shouldApply := bm.ShouldApplyBackpressure(ctx)
assert.False(t, shouldApply)
// Warning should be reset (via public API)
stats = bm.GetStats()
assert.False(t, stats.MemoryWarningActive)
})
}

View File

@@ -0,0 +1,262 @@
package fileproc
import (
"context"
"testing"
"time"
"github.com/spf13/viper"
"github.com/stretchr/testify/assert"
)
const (
// CI-safe timeout constants
fastOpTimeout = 100 * time.Millisecond // Operations that should complete quickly
slowOpMinTime = 10 * time.Millisecond // Minimum time for blocking operations
)
// cleanupViperConfig is a test helper that captures and restores viper configuration.
// It takes a testing.T and a list of config keys to save/restore.
// Returns a cleanup function that should be called via t.Cleanup.
func cleanupViperConfig(t *testing.T, keys ...string) {
t.Helper()
// Capture original values
origValues := make(map[string]interface{})
for _, key := range keys {
origValues[key] = viper.Get(key)
}
// Register cleanup to restore values
t.Cleanup(func() {
for key, val := range origValues {
if val != nil {
viper.Set(key, val)
}
}
})
}
func TestBackpressureManagerCreateChannels(t *testing.T) {
t.Run("creates buffered channels when enabled", func(t *testing.T) {
// Capture and restore viper config
cleanupViperConfig(t, testBackpressureEnabled, testBackpressureMaxFiles, testBackpressureMaxWrites)
viper.Set(testBackpressureEnabled, true)
viper.Set(testBackpressureMaxFiles, 10)
viper.Set(testBackpressureMaxWrites, 10)
bm := NewBackpressureManager()
fileCh, writeCh := bm.CreateChannels()
assert.NotNil(t, fileCh)
assert.NotNil(t, writeCh)
// Test that channels have buffer capacity
assert.Greater(t, cap(fileCh), 0)
assert.Greater(t, cap(writeCh), 0)
// Test sending and receiving
fileCh <- "test.go"
val := <-fileCh
assert.Equal(t, "test.go", val)
writeCh <- WriteRequest{Content: "test content"}
writeReq := <-writeCh
assert.Equal(t, "test content", writeReq.Content)
close(fileCh)
close(writeCh)
})
t.Run("creates unbuffered channels when disabled", func(t *testing.T) {
// Use viper to configure instead of direct field access
cleanupViperConfig(t, testBackpressureEnabled)
viper.Set(testBackpressureEnabled, false)
bm := NewBackpressureManager()
fileCh, writeCh := bm.CreateChannels()
assert.NotNil(t, fileCh)
assert.NotNil(t, writeCh)
// Unbuffered channels have capacity 0
assert.Equal(t, 0, cap(fileCh))
assert.Equal(t, 0, cap(writeCh))
close(fileCh)
close(writeCh)
})
}
func TestBackpressureManagerWaitForChannelSpace(t *testing.T) {
t.Run("does nothing when disabled", func(t *testing.T) {
// Use viper to configure instead of direct field access
cleanupViperConfig(t, testBackpressureEnabled)
viper.Set(testBackpressureEnabled, false)
bm := NewBackpressureManager()
fileCh := make(chan string, 1)
writeCh := make(chan WriteRequest, 1)
// Use context with timeout instead of measuring elapsed time
ctx, cancel := context.WithTimeout(context.Background(), fastOpTimeout)
defer cancel()
done := make(chan struct{})
go func() {
bm.WaitForChannelSpace(ctx, fileCh, writeCh)
close(done)
}()
// Should return immediately (before timeout)
select {
case <-done:
// Success - operation completed quickly
case <-ctx.Done():
t.Fatal("WaitForChannelSpace should return immediately when disabled")
}
close(fileCh)
close(writeCh)
})
t.Run("waits when file channel is nearly full", func(t *testing.T) {
// Use viper to configure instead of direct field access
cleanupViperConfig(t, testBackpressureEnabled, testBackpressureMaxFiles)
viper.Set(testBackpressureEnabled, true)
viper.Set(testBackpressureMaxFiles, 10)
bm := NewBackpressureManager()
// Create channel with exact capacity
fileCh := make(chan string, 10)
writeCh := make(chan WriteRequest, 10)
// Fill file channel to >90% (with minimum of 1)
target := max(1, int(float64(cap(fileCh))*0.9))
for i := 0; i < target; i++ {
fileCh <- "file.txt"
}
// Test that it blocks by verifying it doesn't complete immediately
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
done := make(chan struct{})
start := time.Now()
go func() {
bm.WaitForChannelSpace(ctx, fileCh, writeCh)
close(done)
}()
// Verify it doesn't complete immediately (within first millisecond)
select {
case <-done:
t.Fatal("WaitForChannelSpace should block when channel is nearly full")
case <-time.After(1 * time.Millisecond):
// Good - it's blocking as expected
}
// Wait for it to complete
<-done
duration := time.Since(start)
// Just verify it took some measurable time (very lenient for CI)
assert.GreaterOrEqual(t, duration, 1*time.Millisecond)
// Clean up
for i := 0; i < target; i++ {
<-fileCh
}
close(fileCh)
close(writeCh)
})
t.Run("waits when write channel is nearly full", func(t *testing.T) {
// Use viper to configure instead of direct field access
cleanupViperConfig(t, testBackpressureEnabled, testBackpressureMaxWrites)
viper.Set(testBackpressureEnabled, true)
viper.Set(testBackpressureMaxWrites, 10)
bm := NewBackpressureManager()
fileCh := make(chan string, 10)
writeCh := make(chan WriteRequest, 10)
// Fill write channel to >90% (with minimum of 1)
target := max(1, int(float64(cap(writeCh))*0.9))
for i := 0; i < target; i++ {
writeCh <- WriteRequest{}
}
// Test that it blocks by verifying it doesn't complete immediately
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
done := make(chan struct{})
start := time.Now()
go func() {
bm.WaitForChannelSpace(ctx, fileCh, writeCh)
close(done)
}()
// Verify it doesn't complete immediately (within first millisecond)
select {
case <-done:
t.Fatal("WaitForChannelSpace should block when channel is nearly full")
case <-time.After(1 * time.Millisecond):
// Good - it's blocking as expected
}
// Wait for it to complete
<-done
duration := time.Since(start)
// Just verify it took some measurable time (very lenient for CI)
assert.GreaterOrEqual(t, duration, 1*time.Millisecond)
// Clean up
for i := 0; i < target; i++ {
<-writeCh
}
close(fileCh)
close(writeCh)
})
t.Run("respects context cancellation", func(t *testing.T) {
// Use viper to configure instead of direct field access
cleanupViperConfig(t, testBackpressureEnabled, testBackpressureMaxFiles)
viper.Set(testBackpressureEnabled, true)
viper.Set(testBackpressureMaxFiles, 10)
bm := NewBackpressureManager()
fileCh := make(chan string, 10)
writeCh := make(chan WriteRequest, 10)
// Fill channel
for i := 0; i < 10; i++ {
fileCh <- "file.txt"
}
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately
// Use timeout to verify it returns quickly
done := make(chan struct{})
go func() {
bm.WaitForChannelSpace(ctx, fileCh, writeCh)
close(done)
}()
// Should return quickly when context is cancelled
select {
case <-done:
// Success - returned due to cancellation
case <-time.After(fastOpTimeout):
t.Fatal("WaitForChannelSpace should return immediately when context is cancelled")
}
// Clean up
for i := 0; i < 10; i++ {
<-fileCh
}
close(fileCh)
close(writeCh)
})
}

View File

@@ -0,0 +1,195 @@
package fileproc
import (
"context"
"sync"
"testing"
"time"
"github.com/spf13/viper"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestBackpressureManagerConcurrency(t *testing.T) {
// Configure via viper instead of direct field access
origEnabled := viper.Get(testBackpressureEnabled)
t.Cleanup(func() {
if origEnabled != nil {
viper.Set(testBackpressureEnabled, origEnabled)
}
})
viper.Set(testBackpressureEnabled, true)
bm := NewBackpressureManager()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var wg sync.WaitGroup
// Multiple goroutines checking backpressure
for i := 0; i < 10; i++ {
wg.Add(1)
go func() {
defer wg.Done()
bm.ShouldApplyBackpressure(ctx)
}()
}
// Multiple goroutines applying backpressure
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
bm.ApplyBackpressure(ctx)
}()
}
// Multiple goroutines getting stats
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
bm.GetStats()
}()
}
// Multiple goroutines creating channels
// Note: CreateChannels returns new channels each time, caller owns them
type channelResult struct {
fileCh chan string
writeCh chan WriteRequest
}
results := make(chan channelResult, 3)
for i := 0; i < 3; i++ {
wg.Add(1)
go func() {
defer wg.Done()
fileCh, writeCh := bm.CreateChannels()
results <- channelResult{fileCh, writeCh}
}()
}
wg.Wait()
close(results)
// Verify channels are created and have expected properties
for result := range results {
assert.NotNil(t, result.fileCh)
assert.NotNil(t, result.writeCh)
// Close channels to prevent resource leak (caller owns them)
close(result.fileCh)
close(result.writeCh)
}
// Verify stats are consistent
stats := bm.GetStats()
assert.GreaterOrEqual(t, stats.FilesProcessed, int64(10))
}
func TestBackpressureManagerIntegration(t *testing.T) {
// Configure via viper instead of direct field access
origEnabled := viper.Get(testBackpressureEnabled)
origMaxFiles := viper.Get(testBackpressureMaxFiles)
origMaxWrites := viper.Get(testBackpressureMaxWrites)
origCheckInterval := viper.Get(testBackpressureMemoryCheck)
origMaxMemory := viper.Get(testBackpressureMaxMemory)
t.Cleanup(func() {
if origEnabled != nil {
viper.Set(testBackpressureEnabled, origEnabled)
}
if origMaxFiles != nil {
viper.Set(testBackpressureMaxFiles, origMaxFiles)
}
if origMaxWrites != nil {
viper.Set(testBackpressureMaxWrites, origMaxWrites)
}
if origCheckInterval != nil {
viper.Set(testBackpressureMemoryCheck, origCheckInterval)
}
if origMaxMemory != nil {
viper.Set(testBackpressureMaxMemory, origMaxMemory)
}
})
viper.Set(testBackpressureEnabled, true)
viper.Set(testBackpressureMaxFiles, 10)
viper.Set(testBackpressureMaxWrites, 10)
viper.Set(testBackpressureMemoryCheck, 10)
viper.Set(testBackpressureMaxMemory, 100*1024*1024) // 100MB
bm := NewBackpressureManager()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Create channels - caller owns these channels and is responsible for closing them
fileCh, writeCh := bm.CreateChannels()
require.NotNil(t, fileCh)
require.NotNil(t, writeCh)
require.Greater(t, cap(fileCh), 0, "fileCh should be buffered")
require.Greater(t, cap(writeCh), 0, "writeCh should be buffered")
// Simulate file processing
var wg sync.WaitGroup
// Producer
wg.Add(1)
go func() {
defer wg.Done()
for i := 0; i < 100; i++ {
// Check for backpressure
if bm.ShouldApplyBackpressure(ctx) {
bm.ApplyBackpressure(ctx)
}
// Wait for channel space if needed
bm.WaitForChannelSpace(ctx, fileCh, writeCh)
select {
case fileCh <- "file.txt":
// File sent
case <-ctx.Done():
return
}
}
}()
// Consumer
wg.Add(1)
go func() {
defer wg.Done()
for i := 0; i < 100; i++ {
select {
case <-fileCh:
// Process file (do not manually increment filesProcessed)
case <-ctx.Done():
return
}
}
}()
// Wait for completion
done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()
select {
case <-done:
// Success
case <-time.After(5 * time.Second):
t.Fatal("Integration test timeout")
}
// Log final info
bm.LogBackpressureInfo()
// Check final stats
stats := bm.GetStats()
assert.GreaterOrEqual(t, stats.FilesProcessed, int64(100))
// Clean up - caller owns the channels, safe to close now that goroutines have finished
close(fileCh)
close(writeCh)
}

View File

@@ -0,0 +1,151 @@
package fileproc
import (
"context"
"testing"
"github.com/spf13/viper"
"github.com/stretchr/testify/assert"
)
// setupViperCleanup is a test helper that captures and restores viper configuration.
// It takes a testing.T and a list of config keys to save/restore.
func setupViperCleanup(t *testing.T, keys []string) {
t.Helper()
// Capture original values and track which keys existed
origValues := make(map[string]interface{})
keysExisted := make(map[string]bool)
for _, key := range keys {
val := viper.Get(key)
origValues[key] = val
keysExisted[key] = viper.IsSet(key)
}
// Register cleanup to restore values
t.Cleanup(func() {
for _, key := range keys {
if keysExisted[key] {
viper.Set(key, origValues[key])
} else {
// Key didn't exist originally, so remove it
allSettings := viper.AllSettings()
delete(allSettings, key)
viper.Reset()
for k, v := range allSettings {
viper.Set(k, v)
}
}
}
})
}
func TestNewBackpressureManager(t *testing.T) {
keys := []string{
testBackpressureEnabled,
testBackpressureMaxMemory,
testBackpressureMemoryCheck,
testBackpressureMaxFiles,
testBackpressureMaxWrites,
}
setupViperCleanup(t, keys)
viper.Set(testBackpressureEnabled, true)
viper.Set(testBackpressureMaxMemory, 100)
viper.Set(testBackpressureMemoryCheck, 10)
viper.Set(testBackpressureMaxFiles, 10)
viper.Set(testBackpressureMaxWrites, 10)
bm := NewBackpressureManager()
assert.NotNil(t, bm)
assert.True(t, bm.enabled)
assert.Greater(t, bm.maxMemoryUsage, int64(0))
assert.Greater(t, bm.memoryCheckInterval, 0)
assert.Greater(t, bm.maxPendingFiles, 0)
assert.Greater(t, bm.maxPendingWrites, 0)
assert.Equal(t, int64(0), bm.filesProcessed)
}
func TestBackpressureStatsStructure(t *testing.T) {
// Behavioral test that exercises BackpressureManager and validates stats
keys := []string{
testBackpressureEnabled,
testBackpressureMaxMemory,
testBackpressureMemoryCheck,
testBackpressureMaxFiles,
testBackpressureMaxWrites,
}
setupViperCleanup(t, keys)
// Configure backpressure with realistic settings
viper.Set(testBackpressureEnabled, true)
viper.Set(testBackpressureMaxMemory, 100*1024*1024) // 100MB
viper.Set(testBackpressureMemoryCheck, 1) // Check every file
viper.Set(testBackpressureMaxFiles, 1000)
viper.Set(testBackpressureMaxWrites, 500)
bm := NewBackpressureManager()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Simulate processing files
initialStats := bm.GetStats()
assert.True(t, initialStats.Enabled, "backpressure should be enabled")
assert.Equal(t, int64(0), initialStats.FilesProcessed, "initially no files processed")
// Capture initial timestamp to verify it gets updated
initialLastCheck := initialStats.LastMemoryCheck
// Process some files to trigger memory checks
for i := 0; i < 5; i++ {
bm.ShouldApplyBackpressure(ctx)
}
// Verify stats reflect the operations
stats := bm.GetStats()
assert.True(t, stats.Enabled, "enabled flag should be set")
assert.Equal(t, int64(5), stats.FilesProcessed, "should have processed 5 files")
assert.Greater(t, stats.CurrentMemoryUsage, int64(0), "memory usage should be tracked")
assert.Equal(t, int64(100*1024*1024), stats.MaxMemoryUsage, "max memory should match config")
assert.Equal(t, 1000, stats.MaxPendingFiles, "maxPendingFiles should match config")
assert.Equal(t, 500, stats.MaxPendingWrites, "maxPendingWrites should match config")
assert.True(t, stats.LastMemoryCheck.After(initialLastCheck) || stats.LastMemoryCheck.Equal(initialLastCheck),
"lastMemoryCheck should be updated or remain initialized")
}
func TestBackpressureManagerGetStats(t *testing.T) {
keys := []string{
testBackpressureEnabled,
testBackpressureMemoryCheck,
}
setupViperCleanup(t, keys)
// Ensure config enables backpressure and checks every call
viper.Set(testBackpressureEnabled, true)
viper.Set(testBackpressureMemoryCheck, 1)
bm := NewBackpressureManager()
// Capture initial timestamp to verify it gets updated
initialStats := bm.GetStats()
initialLastCheck := initialStats.LastMemoryCheck
// Process some files to update stats
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
for i := 0; i < 5; i++ {
bm.ShouldApplyBackpressure(ctx)
}
stats := bm.GetStats()
assert.True(t, stats.Enabled)
assert.Equal(t, int64(5), stats.FilesProcessed)
assert.Greater(t, stats.CurrentMemoryUsage, int64(0))
assert.Equal(t, bm.maxMemoryUsage, stats.MaxMemoryUsage)
assert.Equal(t, bm.maxPendingFiles, stats.MaxPendingFiles)
assert.Equal(t, bm.maxPendingWrites, stats.MaxPendingWrites)
// LastMemoryCheck should be updated after processing files (memoryCheckInterval=1)
assert.True(t, stats.LastMemoryCheck.After(initialLastCheck),
"lastMemoryCheck should be updated after memory checks")
}

View File

@@ -1,9 +1,162 @@
package fileproc
import "strings"
import (
"fmt"
"path/filepath"
"strings"
)
const (
// MaxRegistryEntries is the maximum number of entries allowed in registry config slices/maps.
MaxRegistryEntries = 1000
// MaxExtensionLength is the maximum length for a single extension string.
MaxExtensionLength = 100
)
// RegistryConfig holds configuration for file type registry.
// All paths must be relative without path traversal (no ".." or leading "/").
// Extensions in CustomLanguages keys must start with "." or be alphanumeric with underscore/hyphen.
type RegistryConfig struct {
// CustomImages: file extensions to treat as images (e.g., ".svg", ".webp").
// Must be relative paths without ".." or leading separators.
CustomImages []string
// CustomBinary: file extensions to treat as binary (e.g., ".bin", ".dat").
// Must be relative paths without ".." or leading separators.
CustomBinary []string
// CustomLanguages: maps file extensions to language names (e.g., {".tsx": "TypeScript"}).
// Keys must start with "." or be alphanumeric with underscore/hyphen.
CustomLanguages map[string]string
// DisabledImages: image extensions to disable from default registry.
DisabledImages []string
// DisabledBinary: binary extensions to disable from default registry.
DisabledBinary []string
// DisabledLanguages: language extensions to disable from default registry.
DisabledLanguages []string
}
// Validate checks the RegistryConfig for invalid entries and enforces limits.
func (c *RegistryConfig) Validate() error {
// Validate CustomImages
if err := validateExtensionSlice(c.CustomImages, "CustomImages"); err != nil {
return err
}
// Validate CustomBinary
if err := validateExtensionSlice(c.CustomBinary, "CustomBinary"); err != nil {
return err
}
// Validate CustomLanguages
if len(c.CustomLanguages) > MaxRegistryEntries {
return fmt.Errorf(
"CustomLanguages exceeds maximum entries (%d > %d)",
len(c.CustomLanguages),
MaxRegistryEntries,
)
}
for ext, lang := range c.CustomLanguages {
if err := validateExtension(ext, "CustomLanguages key"); err != nil {
return err
}
if len(lang) > MaxExtensionLength {
return fmt.Errorf(
"CustomLanguages value %q exceeds maximum length (%d > %d)",
lang,
len(lang),
MaxExtensionLength,
)
}
}
// Validate Disabled slices
if err := validateExtensionSlice(c.DisabledImages, "DisabledImages"); err != nil {
return err
}
if err := validateExtensionSlice(c.DisabledBinary, "DisabledBinary"); err != nil {
return err
}
return validateExtensionSlice(c.DisabledLanguages, "DisabledLanguages")
}
// validateExtensionSlice validates a slice of extensions for path safety and limits.
func validateExtensionSlice(slice []string, fieldName string) error {
if len(slice) > MaxRegistryEntries {
return fmt.Errorf("%s exceeds maximum entries (%d > %d)", fieldName, len(slice), MaxRegistryEntries)
}
for _, ext := range slice {
if err := validateExtension(ext, fieldName); err != nil {
return err
}
}
return nil
}
// validateExtension validates a single extension for path safety.
//
//revive:disable-next-line:cyclomatic
func validateExtension(ext, context string) error {
// Reject empty strings
if ext == "" {
return fmt.Errorf("%s entry cannot be empty", context)
}
if len(ext) > MaxExtensionLength {
return fmt.Errorf(
"%s entry %q exceeds maximum length (%d > %d)",
context, ext, len(ext), MaxExtensionLength,
)
}
// Reject absolute paths
if filepath.IsAbs(ext) {
return fmt.Errorf("%s entry %q is an absolute path (not allowed)", context, ext)
}
// Reject path traversal
if strings.Contains(ext, "..") {
return fmt.Errorf("%s entry %q contains path traversal (not allowed)", context, ext)
}
// For extensions, verify they start with "." or are alphanumeric
if strings.HasPrefix(ext, ".") {
// Reject extensions containing path separators
if strings.ContainsRune(ext, filepath.Separator) || strings.ContainsRune(ext, '/') ||
strings.ContainsRune(ext, '\\') {
return fmt.Errorf("%s entry %q contains path separators (not allowed)", context, ext)
}
// Valid extension format
return nil
}
// Check if purely alphanumeric (for bare names)
for _, r := range ext {
isValid := (r >= 'a' && r <= 'z') ||
(r >= 'A' && r <= 'Z') ||
(r >= '0' && r <= '9') ||
r == '_' || r == '-'
if !isValid {
return fmt.Errorf(
"%s entry %q contains invalid characters (must start with '.' or be alphanumeric/_/-)",
context,
ext,
)
}
}
return nil
}
// ApplyCustomExtensions applies custom extensions from configuration.
func (r *FileTypeRegistry) ApplyCustomExtensions(customImages, customBinary []string, customLanguages map[string]string) {
func (r *FileTypeRegistry) ApplyCustomExtensions(
customImages, customBinary []string,
customLanguages map[string]string,
) {
// Add custom image extensions
r.addExtensions(customImages, r.AddImageExtension)
@@ -29,12 +182,24 @@ func (r *FileTypeRegistry) addExtensions(extensions []string, adder func(string)
// ConfigureFromSettings applies configuration settings to the registry.
// This function is called from main.go after config is loaded to avoid circular imports.
func ConfigureFromSettings(
customImages, customBinary []string,
customLanguages map[string]string,
disabledImages, disabledBinary, disabledLanguages []string,
) {
// It validates the configuration before applying it.
func ConfigureFromSettings(config RegistryConfig) error {
// Validate configuration first
if err := config.Validate(); err != nil {
return err
}
registry := GetDefaultRegistry()
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
// Only apply custom extensions if they are non-empty (len() for nil slices/maps is zero)
if len(config.CustomImages) > 0 || len(config.CustomBinary) > 0 || len(config.CustomLanguages) > 0 {
registry.ApplyCustomExtensions(config.CustomImages, config.CustomBinary, config.CustomLanguages)
}
// Only disable extensions if they are non-empty
if len(config.DisabledImages) > 0 || len(config.DisabledBinary) > 0 || len(config.DisabledLanguages) > 0 {
registry.DisableExtensions(config.DisabledImages, config.DisabledBinary, config.DisabledLanguages)
}
return nil
}

View File

@@ -14,10 +14,10 @@ func TestFileTypeRegistry_ThreadSafety(t *testing.T) {
var wg sync.WaitGroup
// Test concurrent read operations
t.Run("ConcurrentReads", func(t *testing.T) {
t.Run("ConcurrentReads", func(_ *testing.T) {
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
go func(_ int) {
defer wg.Done()
registry := GetDefaultRegistry()
@@ -102,4 +102,4 @@ func TestFileTypeRegistry_ThreadSafety(t *testing.T) {
}
wg.Wait()
})
}
}

View File

@@ -1,8 +1,9 @@
package fileproc
import (
"sync"
"testing"
"github.com/stretchr/testify/require"
)
// TestFileTypeRegistry_Configuration tests the configuration functionality.
@@ -142,7 +143,7 @@ func TestFileTypeRegistry_Configuration(t *testing.T) {
}
})
// Test case insensitive handling
// Test case-insensitive handling
t.Run("CaseInsensitiveHandling", func(t *testing.T) {
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
@@ -184,8 +185,9 @@ func TestFileTypeRegistry_Configuration(t *testing.T) {
// TestConfigureFromSettings tests the global configuration function.
func TestConfigureFromSettings(t *testing.T) {
// Reset registry to ensure clean state
registryOnce = sync.Once{}
registry = nil
ResetRegistryForTesting()
// Ensure cleanup runs even if test fails
t.Cleanup(ResetRegistryForTesting)
// Test configuration application
customImages := []string{".webp", ".avif"}
@@ -195,14 +197,15 @@ func TestConfigureFromSettings(t *testing.T) {
disabledBinary := []string{".exe"} // Disable default extension
disabledLanguages := []string{".rb"} // Disable default extension
ConfigureFromSettings(
customImages,
customBinary,
customLanguages,
disabledImages,
disabledBinary,
disabledLanguages,
)
err := ConfigureFromSettings(RegistryConfig{
CustomImages: customImages,
CustomBinary: customBinary,
CustomLanguages: customLanguages,
DisabledImages: disabledImages,
DisabledBinary: disabledBinary,
DisabledLanguages: disabledLanguages,
})
require.NoError(t, err)
// Test that custom extensions work
if !IsImage("test.webp") {
@@ -238,14 +241,15 @@ func TestConfigureFromSettings(t *testing.T) {
}
// Test multiple calls don't override previous configuration
ConfigureFromSettings(
[]string{".extra"},
[]string{},
map[string]string{},
[]string{},
[]string{},
[]string{},
)
err = ConfigureFromSettings(RegistryConfig{
CustomImages: []string{".extra"},
CustomBinary: []string{},
CustomLanguages: map[string]string{},
DisabledImages: []string{},
DisabledBinary: []string{},
DisabledLanguages: []string{},
})
require.NoError(t, err)
// Previous configuration should still work
if !IsImage("test.webp") {
@@ -255,4 +259,4 @@ func TestConfigureFromSettings(t *testing.T) {
if !IsImage("test.extra") {
t.Error("Expected new configuration to be applied")
}
}
}

View File

@@ -4,9 +4,21 @@ import (
"testing"
)
// newTestRegistry creates a fresh registry instance for testing to avoid global state pollution.
func newTestRegistry() *FileTypeRegistry {
return &FileTypeRegistry{
imageExts: getImageExtensions(),
binaryExts: getBinaryExtensions(),
languageMap: getLanguageMap(),
extCache: make(map[string]string, 1000),
resultCache: make(map[string]FileTypeResult, 500),
maxCacheSize: 500,
}
}
// TestFileTypeRegistry_LanguageDetection tests the language detection functionality.
func TestFileTypeRegistry_LanguageDetection(t *testing.T) {
registry := GetDefaultRegistry()
registry := newTestRegistry()
tests := []struct {
filename string
@@ -94,7 +106,7 @@ func TestFileTypeRegistry_LanguageDetection(t *testing.T) {
// TestFileTypeRegistry_ImageDetection tests the image detection functionality.
func TestFileTypeRegistry_ImageDetection(t *testing.T) {
registry := GetDefaultRegistry()
registry := newTestRegistry()
tests := []struct {
filename string
@@ -144,7 +156,7 @@ func TestFileTypeRegistry_ImageDetection(t *testing.T) {
// TestFileTypeRegistry_BinaryDetection tests the binary detection functionality.
func TestFileTypeRegistry_BinaryDetection(t *testing.T) {
registry := GetDefaultRegistry()
registry := newTestRegistry()
tests := []struct {
filename string
@@ -208,11 +220,11 @@ func TestFileTypeRegistry_BinaryDetection(t *testing.T) {
{"page.html", false},
// Edge cases
{"", false}, // Empty filename
{"binary", false}, // No extension
{".exe", true}, // Just extension
{"file.exe.txt", false}, // Multiple extensions
{"file.unknown", false}, // Unknown extension
{"", false}, // Empty filename
{"binary", false}, // No extension
{".exe", true}, // Just extension
{"file.exe.txt", false}, // Multiple extensions
{"file.unknown", false}, // Unknown extension
}
for _, tt := range tests {
@@ -223,4 +235,4 @@ func TestFileTypeRegistry_BinaryDetection(t *testing.T) {
}
})
}
}
}

View File

@@ -31,7 +31,7 @@ func TestFileTypeRegistry_EdgeCases(t *testing.T) {
}
for _, tc := range edgeCases {
t.Run(tc.name, func(t *testing.T) {
t.Run(tc.name, func(_ *testing.T) {
// These should not panic
_ = registry.IsImage(tc.filename)
_ = registry.IsBinary(tc.filename)
@@ -125,4 +125,4 @@ func BenchmarkFileTypeRegistry_ConcurrentAccess(b *testing.B) {
_ = GetLanguage(filename)
}
})
}
}

View File

@@ -21,7 +21,7 @@ func TestFileTypeRegistry_ModificationMethods(t *testing.T) {
t.Errorf("Expected .webp to be recognized as image after adding")
}
// Test case insensitive addition
// Test case-insensitive addition
registry.AddImageExtension(".AVIF")
if !registry.IsImage("test.avif") {
t.Errorf("Expected .avif to be recognized as image after adding .AVIF")
@@ -51,7 +51,7 @@ func TestFileTypeRegistry_ModificationMethods(t *testing.T) {
t.Errorf("Expected .custom to be recognized as binary after adding")
}
// Test case insensitive addition
// Test case-insensitive addition
registry.AddBinaryExtension(".SPECIAL")
if !registry.IsBinary("file.special") {
t.Errorf("Expected .special to be recognized as binary after adding .SPECIAL")
@@ -81,7 +81,7 @@ func TestFileTypeRegistry_ModificationMethods(t *testing.T) {
t.Errorf("Expected CustomLang, got %s", lang)
}
// Test case insensitive addition
// Test case-insensitive addition
registry.AddLanguageMapping(".ABC", "UpperLang")
if lang := registry.GetLanguage("file.abc"); lang != "UpperLang" {
t.Errorf("Expected UpperLang, got %s", lang)
@@ -134,4 +134,4 @@ func TestFileTypeRegistry_DefaultRegistryConsistency(t *testing.T) {
t.Errorf("Iteration %d: Expected .txt to not be recognized as binary", i)
}
}
}
}

View File

@@ -6,7 +6,7 @@ import (
"io"
"os"
"github.com/ivuorinen/gibidify/utils"
"github.com/ivuorinen/gibidify/gibidiutils"
)
// JSONWriter handles JSON format output with streaming support.
@@ -27,27 +27,42 @@ func NewJSONWriter(outFile *os.File) *JSONWriter {
func (w *JSONWriter) Start(prefix, suffix string) error {
// Start JSON structure
if _, err := w.outFile.WriteString(`{"prefix":"`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON start")
return gibidiutils.WrapError(
err,
gibidiutils.ErrorTypeIO,
gibidiutils.CodeIOWrite,
"failed to write JSON start",
)
}
// Write escaped prefix
escapedPrefix := utils.EscapeForJSON(prefix)
if err := utils.WriteWithErrorWrap(w.outFile, escapedPrefix, "failed to write JSON prefix", ""); err != nil {
escapedPrefix := gibidiutils.EscapeForJSON(prefix)
if err := gibidiutils.WriteWithErrorWrap(w.outFile, escapedPrefix, "failed to write JSON prefix", ""); err != nil {
return err
}
if _, err := w.outFile.WriteString(`","suffix":"`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON middle")
return gibidiutils.WrapError(
err,
gibidiutils.ErrorTypeIO,
gibidiutils.CodeIOWrite,
"failed to write JSON middle",
)
}
// Write escaped suffix
escapedSuffix := utils.EscapeForJSON(suffix)
if err := utils.WriteWithErrorWrap(w.outFile, escapedSuffix, "failed to write JSON suffix", ""); err != nil {
escapedSuffix := gibidiutils.EscapeForJSON(suffix)
if err := gibidiutils.WriteWithErrorWrap(w.outFile, escapedSuffix, "failed to write JSON suffix", ""); err != nil {
return err
}
if _, err := w.outFile.WriteString(`","files":[`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON files start")
return gibidiutils.WrapError(
err,
gibidiutils.ErrorTypeIO,
gibidiutils.CodeIOWrite,
"failed to write JSON files start",
)
}
return nil
@@ -57,7 +72,12 @@ func (w *JSONWriter) Start(prefix, suffix string) error {
func (w *JSONWriter) WriteFile(req WriteRequest) error {
if !w.firstFile {
if _, err := w.outFile.WriteString(","); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON separator")
return gibidiutils.WrapError(
err,
gibidiutils.ErrorTypeIO,
gibidiutils.CodeIOWrite,
"failed to write JSON separator",
)
}
}
w.firstFile = false
@@ -72,21 +92,24 @@ func (w *JSONWriter) WriteFile(req WriteRequest) error {
func (w *JSONWriter) Close() error {
// Close JSON structure
if _, err := w.outFile.WriteString("]}"); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON end")
return gibidiutils.WrapError(err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite, "failed to write JSON end")
}
return nil
}
// writeStreaming writes a large file as JSON in streaming chunks.
func (w *JSONWriter) writeStreaming(req WriteRequest) error {
defer utils.SafeCloseReader(req.Reader, req.Path)
defer gibidiutils.SafeCloseReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write file start
escapedPath := utils.EscapeForJSON(req.Path)
escapedPath := gibidiutils.EscapeForJSON(req.Path)
if _, err := fmt.Fprintf(w.outFile, `{"path":"%s","language":"%s","content":"`, escapedPath, language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file start").WithFilePath(req.Path)
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite,
"failed to write JSON file start",
).WithFilePath(req.Path)
}
// Stream content with JSON escaping
@@ -96,7 +119,10 @@ func (w *JSONWriter) writeStreaming(req WriteRequest) error {
// Write file end
if _, err := w.outFile.WriteString(`"}`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file end").WithFilePath(req.Path)
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite,
"failed to write JSON file end",
).WithFilePath(req.Path)
}
return nil
@@ -113,25 +139,29 @@ func (w *JSONWriter) writeInline(req WriteRequest) error {
encoded, err := json.Marshal(fileData)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingEncode, "failed to marshal JSON").WithFilePath(req.Path)
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeProcessing, gibidiutils.CodeProcessingEncode,
"failed to marshal JSON",
).WithFilePath(req.Path)
}
if _, err := w.outFile.Write(encoded); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file").WithFilePath(req.Path)
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite,
"failed to write JSON file",
).WithFilePath(req.Path)
}
return nil
}
// streamJSONContent streams content with JSON escaping.
func (w *JSONWriter) streamJSONContent(reader io.Reader, path string) error {
return utils.StreamContent(reader, w.outFile, StreamChunkSize, path, func(chunk []byte) []byte {
escaped := utils.EscapeForJSON(string(chunk))
return gibidiutils.StreamContent(reader, w.outFile, StreamChunkSize, path, func(chunk []byte) []byte {
escaped := gibidiutils.EscapeForJSON(string(chunk))
return []byte(escaped)
})
}
// startJSONWriter handles JSON format output with streaming support.
func startJSONWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
@@ -140,19 +170,19 @@ func startJSONWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<-
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write JSON start", err)
gibidiutils.LogError("Failed to write JSON start", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write JSON file", err)
gibidiutils.LogError("Failed to write JSON file", err)
}
}
// Close writer
if err := writer.Close(); err != nil {
utils.LogError("Failed to write JSON end", err)
gibidiutils.LogError("Failed to write JSON end", err)
}
}

View File

@@ -4,11 +4,13 @@ import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/ivuorinen/gibidify/utils"
"github.com/ivuorinen/gibidify/gibidiutils"
)
// MarkdownWriter handles markdown format output with streaming support.
// MarkdownWriter handles Markdown format output with streaming support.
type MarkdownWriter struct {
outFile *os.File
}
@@ -19,16 +21,21 @@ func NewMarkdownWriter(outFile *os.File) *MarkdownWriter {
}
// Start writes the markdown header.
func (w *MarkdownWriter) Start(prefix, suffix string) error {
func (w *MarkdownWriter) Start(prefix, _ string) error {
if prefix != "" {
if _, err := fmt.Fprintf(w.outFile, "# %s\n\n", prefix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write prefix")
return gibidiutils.WrapError(
err,
gibidiutils.ErrorTypeIO,
gibidiutils.CodeIOWrite,
"failed to write prefix",
)
}
}
return nil
}
// WriteFile writes a file entry in markdown format.
// WriteFile writes a file entry in Markdown format.
func (w *MarkdownWriter) WriteFile(req WriteRequest) error {
if req.IsStream {
return w.writeStreaming(req)
@@ -40,21 +47,99 @@ func (w *MarkdownWriter) WriteFile(req WriteRequest) error {
func (w *MarkdownWriter) Close(suffix string) error {
if suffix != "" {
if _, err := fmt.Fprintf(w.outFile, "\n# %s\n", suffix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write suffix")
return gibidiutils.WrapError(
err,
gibidiutils.ErrorTypeIO,
gibidiutils.CodeIOWrite,
"failed to write suffix",
)
}
}
return nil
}
// validateMarkdownPath validates a file path for markdown output.
func validateMarkdownPath(path string) error {
trimmed := strings.TrimSpace(path)
if trimmed == "" {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationRequired,
"file path cannot be empty",
"",
nil,
)
}
// Reject absolute paths
if filepath.IsAbs(trimmed) {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationPath,
"absolute paths are not allowed",
trimmed,
map[string]any{"path": trimmed},
)
}
// Clean and validate path components
cleaned := filepath.Clean(trimmed)
if filepath.IsAbs(cleaned) || strings.HasPrefix(cleaned, "/") {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationPath,
"path must be relative",
trimmed,
map[string]any{"path": trimmed, "cleaned": cleaned},
)
}
// Check for path traversal in components
components := strings.Split(filepath.ToSlash(cleaned), "/")
for _, component := range components {
if component == ".." {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationPath,
"path traversal not allowed",
trimmed,
map[string]any{"path": trimmed, "cleaned": cleaned},
)
}
}
return nil
}
// writeStreaming writes a large file in streaming chunks.
func (w *MarkdownWriter) writeStreaming(req WriteRequest) error {
defer w.closeReader(req.Reader, req.Path)
// Validate path before use
if err := validateMarkdownPath(req.Path); err != nil {
return err
}
// Check for nil reader
if req.Reader == nil {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationRequired,
"nil reader in write request",
"",
nil,
).WithFilePath(req.Path)
}
defer gibidiutils.SafeCloseReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write file header
if _, err := fmt.Fprintf(w.outFile, "## File: `%s`\n```%s\n", req.Path, language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file header").WithFilePath(req.Path)
safePath := gibidiutils.EscapeForMarkdown(req.Path)
if _, err := fmt.Fprintf(w.outFile, "## File: `%s`\n```%s\n", safePath, language); err != nil {
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite,
"failed to write file header",
).WithFilePath(req.Path)
}
// Stream file content in chunks
@@ -64,7 +149,10 @@ func (w *MarkdownWriter) writeStreaming(req WriteRequest) error {
// Write file footer
if _, err := w.outFile.WriteString("\n```\n\n"); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file footer").WithFilePath(req.Path)
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite,
"failed to write file footer",
).WithFilePath(req.Path)
}
return nil
@@ -72,68 +160,55 @@ func (w *MarkdownWriter) writeStreaming(req WriteRequest) error {
// writeInline writes a small file directly from content.
func (w *MarkdownWriter) writeInline(req WriteRequest) error {
// Validate path before use
if err := validateMarkdownPath(req.Path); err != nil {
return err
}
language := detectLanguage(req.Path)
formatted := fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", req.Path, language, req.Content)
safePath := gibidiutils.EscapeForMarkdown(req.Path)
formatted := fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", safePath, language, req.Content)
if _, err := w.outFile.WriteString(formatted); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write inline content").WithFilePath(req.Path)
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite,
"failed to write inline content",
).WithFilePath(req.Path)
}
return nil
}
// streamContent streams file content in chunks.
func (w *MarkdownWriter) streamContent(reader io.Reader, path string) error {
buf := make([]byte, StreamChunkSize)
for {
n, err := reader.Read(buf)
if n > 0 {
if _, writeErr := w.outFile.Write(buf[:n]); writeErr != nil {
return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write chunk").WithFilePath(path)
}
}
if err == io.EOF {
break
}
if err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read chunk").WithFilePath(path)
}
}
return nil
return gibidiutils.StreamContent(reader, w.outFile, StreamChunkSize, path, nil)
}
// closeReader safely closes a reader if it implements io.Closer.
func (w *MarkdownWriter) closeReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
utils.LogError(
"Failed to close file reader",
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// startMarkdownWriter handles markdown format output with streaming support.
func startMarkdownWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
// startMarkdownWriter handles Markdown format output with streaming support.
func startMarkdownWriter(
outFile *os.File,
writeCh <-chan WriteRequest,
done chan<- struct{},
prefix, suffix string,
) {
defer close(done)
writer := NewMarkdownWriter(outFile)
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write markdown prefix", err)
gibidiutils.LogError("Failed to write markdown prefix", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write markdown file", err)
gibidiutils.LogError("Failed to write markdown file", err)
}
}
// Close writer
if err := writer.Close(suffix); err != nil {
utils.LogError("Failed to write markdown suffix", err)
gibidiutils.LogError("Failed to write markdown suffix", err)
}
}

View File

@@ -3,6 +3,7 @@ package fileproc
import (
"context"
"errors"
"fmt"
"io"
"os"
@@ -13,7 +14,7 @@ import (
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/utils"
"github.com/ivuorinen/gibidify/gibidiutils"
)
const (
@@ -33,6 +34,26 @@ type WriteRequest struct {
Reader io.Reader
}
// multiReaderCloser wraps an io.Reader with a Close method that closes underlying closers.
type multiReaderCloser struct {
reader io.Reader
closers []io.Closer
}
func (m *multiReaderCloser) Read(p []byte) (n int, err error) {
return m.reader.Read(p)
}
func (m *multiReaderCloser) Close() error {
var firstErr error
for _, c := range m.closers {
if err := c.Close(); err != nil && firstErr == nil {
firstErr = err
}
}
return firstErr
}
// FileProcessor handles file processing operations.
type FileProcessor struct {
rootPath string
@@ -58,6 +79,34 @@ func NewFileProcessorWithMonitor(rootPath string, monitor *ResourceMonitor) *Fil
}
}
// checkContextCancellation checks if context is cancelled and logs an error if so.
// Returns true if context is cancelled, false otherwise.
func (p *FileProcessor) checkContextCancellation(ctx context.Context, filePath, stage string) bool {
select {
case <-ctx.Done():
// Format stage with leading space if provided
stageMsg := stage
if stage != "" {
stageMsg = " " + stage
}
gibidiutils.LogErrorf(
gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeResourceLimitTimeout,
fmt.Sprintf("file processing cancelled%s", stageMsg),
filePath,
nil,
),
"File processing cancelled%s: %s",
stageMsg,
filePath,
)
return true
default:
return false
}
}
// ProcessFile reads the file at filePath and sends a formatted output to outCh.
// It automatically chooses between loading the entire file or streaming based on file size.
func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) {
@@ -67,7 +116,13 @@ func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) {
}
// ProcessFileWithMonitor processes a file using a shared resource monitor.
func ProcessFileWithMonitor(ctx context.Context, filePath string, outCh chan<- WriteRequest, rootPath string, monitor *ResourceMonitor) {
func ProcessFileWithMonitor(
ctx context.Context,
filePath string,
outCh chan<- WriteRequest,
rootPath string,
monitor *ResourceMonitor,
) {
processor := NewFileProcessorWithMonitor(rootPath, monitor)
processor.ProcessWithContext(ctx, filePath, outCh)
}
@@ -86,10 +141,17 @@ func (p *FileProcessor) ProcessWithContext(ctx context.Context, filePath string,
// Wait for rate limiting
if err := p.resourceMonitor.WaitForRateLimit(fileCtx); err != nil {
if err == context.DeadlineExceeded {
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing timeout during rate limiting", filePath, nil),
"File processing timeout during rate limiting: %s", filePath,
if errors.Is(err, context.DeadlineExceeded) {
gibidiutils.LogErrorf(
gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeResourceLimitTimeout,
"file processing timeout during rate limiting",
filePath,
nil,
),
"File processing timeout during rate limiting: %s",
filePath,
)
}
return
@@ -103,10 +165,17 @@ func (p *FileProcessor) ProcessWithContext(ctx context.Context, filePath string,
// Acquire read slot for concurrent processing
if err := p.resourceMonitor.AcquireReadSlot(fileCtx); err != nil {
if err == context.DeadlineExceeded {
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing timeout waiting for read slot", filePath, nil),
"File processing timeout waiting for read slot: %s", filePath,
if errors.Is(err, context.DeadlineExceeded) {
gibidiutils.LogErrorf(
gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeResourceLimitTimeout,
"file processing timeout waiting for read slot",
filePath,
nil,
),
"File processing timeout waiting for read slot: %s",
filePath,
)
}
return
@@ -115,7 +184,7 @@ func (p *FileProcessor) ProcessWithContext(ctx context.Context, filePath string,
// Check hard memory limits before processing
if err := p.resourceMonitor.CheckHardMemoryLimit(); err != nil {
utils.LogErrorf(err, "Hard memory limit check failed for file: %s", filePath)
gibidiutils.LogErrorf(err, "Hard memory limit check failed for file: %s", filePath)
return
}
@@ -138,7 +207,6 @@ func (p *FileProcessor) ProcessWithContext(ctx context.Context, filePath string,
}
}
// validateFileWithLimits checks if the file can be processed with resource limits.
func (p *FileProcessor) validateFileWithLimits(ctx context.Context, filePath string) (os.FileInfo, error) {
// Check context cancellation
@@ -150,24 +218,27 @@ func (p *FileProcessor) validateFileWithLimits(ctx context.Context, filePath str
fileInfo, err := os.Stat(filePath)
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to stat file").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to stat file %s", filePath)
return nil, err
structErr := gibidiutils.WrapError(
err, gibidiutils.ErrorTypeFileSystem, gibidiutils.CodeFSAccess,
"failed to stat file",
).WithFilePath(filePath)
gibidiutils.LogErrorf(structErr, "Failed to stat file %s", filePath)
return nil, structErr
}
// Check traditional size limit
if fileInfo.Size() > p.sizeLimit {
context := map[string]interface{}{
filesizeContext := map[string]interface{}{
"file_size": fileInfo.Size(),
"size_limit": p.sizeLimit,
}
utils.LogErrorf(
utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationSize,
gibidiutils.LogErrorf(
gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationSize,
fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", fileInfo.Size(), p.sizeLimit),
filePath,
context,
filesizeContext,
),
"Skipping large file %s", filePath,
)
@@ -176,7 +247,7 @@ func (p *FileProcessor) validateFileWithLimits(ctx context.Context, filePath str
// Check resource limits
if err := p.resourceMonitor.ValidateFileProcessing(filePath, fileInfo.Size()); err != nil {
utils.LogErrorf(err, "Resource limit validation failed for file: %s", filePath)
gibidiutils.LogErrorf(err, "Resource limit validation failed for file: %s", filePath)
return nil, err
}
@@ -192,66 +263,54 @@ func (p *FileProcessor) getRelativePath(filePath string) string {
return relPath
}
// processInMemoryWithContext loads the entire file into memory with context awareness.
func (p *FileProcessor) processInMemoryWithContext(ctx context.Context, filePath, relPath string, outCh chan<- WriteRequest) {
func (p *FileProcessor) processInMemoryWithContext(
ctx context.Context,
filePath, relPath string,
outCh chan<- WriteRequest,
) {
// Check context before reading
select {
case <-ctx.Done():
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled", filePath, nil),
"File processing cancelled: %s", filePath,
)
if p.checkContextCancellation(ctx, filePath, "") {
return
default:
}
content, err := os.ReadFile(filePath) // #nosec G304 - filePath is validated by walker
// #nosec G304 - filePath is validated by walker
content, err := os.ReadFile(filePath)
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to read file").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to read file %s", filePath)
structErr := gibidiutils.WrapError(
err, gibidiutils.ErrorTypeProcessing, gibidiutils.CodeProcessingFileRead,
"failed to read file",
).WithFilePath(filePath)
gibidiutils.LogErrorf(structErr, "Failed to read file %s", filePath)
return
}
// Check context again after reading
select {
case <-ctx.Done():
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled after read", filePath, nil),
"File processing cancelled after read: %s", filePath,
)
if p.checkContextCancellation(ctx, filePath, "after read") {
return
default:
}
// Try to send the result, but respect context cancellation
select {
case <-ctx.Done():
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled before output", filePath, nil),
"File processing cancelled before output: %s", filePath,
)
// Check context before sending output
if p.checkContextCancellation(ctx, filePath, "before output") {
return
case outCh <- WriteRequest{
}
outCh <- WriteRequest{
Path: relPath,
Content: p.formatContent(relPath, string(content)),
IsStream: false,
}:
}
}
// processStreamingWithContext creates a streaming reader for large files with context awareness.
func (p *FileProcessor) processStreamingWithContext(ctx context.Context, filePath, relPath string, outCh chan<- WriteRequest) {
func (p *FileProcessor) processStreamingWithContext(
ctx context.Context,
filePath, relPath string,
outCh chan<- WriteRequest,
) {
// Check context before creating reader
select {
case <-ctx.Done():
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "streaming processing cancelled", filePath, nil),
"Streaming processing cancelled: %s", filePath,
)
if p.checkContextCancellation(ctx, filePath, "before streaming") {
return
default:
}
reader := p.createStreamReaderWithContext(ctx, filePath, relPath)
@@ -259,43 +318,47 @@ func (p *FileProcessor) processStreamingWithContext(ctx context.Context, filePat
return // Error already logged
}
// Try to send the result, but respect context cancellation
select {
case <-ctx.Done():
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "streaming processing cancelled before output", filePath, nil),
"Streaming processing cancelled before output: %s", filePath,
)
// Check context before sending output
if p.checkContextCancellation(ctx, filePath, "before streaming output") {
// Close the reader to prevent file descriptor leak
if closer, ok := reader.(io.Closer); ok {
_ = closer.Close()
}
return
case outCh <- WriteRequest{
}
outCh <- WriteRequest{
Path: relPath,
Content: "", // Empty since content is in Reader
IsStream: true,
Reader: reader,
}:
}
}
// createStreamReaderWithContext creates a reader that combines header and file content with context awareness.
func (p *FileProcessor) createStreamReaderWithContext(ctx context.Context, filePath, relPath string) io.Reader {
// Check context before opening file
select {
case <-ctx.Done():
if p.checkContextCancellation(ctx, filePath, "before opening file") {
return nil
default:
}
file, err := os.Open(filePath) // #nosec G304 - filePath is validated by walker
// #nosec G304 - filePath is validated by walker
file, err := os.Open(filePath)
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to open file for streaming").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to open file for streaming %s", filePath)
structErr := gibidiutils.WrapError(
err, gibidiutils.ErrorTypeProcessing, gibidiutils.CodeProcessingFileRead,
"failed to open file for streaming",
).WithFilePath(filePath)
gibidiutils.LogErrorf(structErr, "Failed to open file for streaming %s", filePath)
return nil
}
// Note: file will be closed by the writer
header := p.formatHeader(relPath)
return io.MultiReader(header, file)
// Wrap in multiReaderCloser to ensure file is closed even on cancellation
return &multiReaderCloser{
reader: io.MultiReader(header, file),
closers: []io.Closer{file},
}
}
// formatContent formats the file content with header.

View File

@@ -51,9 +51,11 @@ func (rm *ResourceMonitor) CreateFileProcessingContext(parent context.Context) (
}
// CreateOverallProcessingContext creates a context with overall processing timeout.
func (rm *ResourceMonitor) CreateOverallProcessingContext(parent context.Context) (context.Context, context.CancelFunc) {
func (rm *ResourceMonitor) CreateOverallProcessingContext(
parent context.Context,
) (context.Context, context.CancelFunc) {
if !rm.enabled || rm.overallTimeout <= 0 {
return parent, func() {}
}
return context.WithTimeout(parent, rm.overallTimeout)
}
}

View File

@@ -35,7 +35,7 @@ func TestResourceMonitor_ConcurrentReadsLimit(t *testing.T) {
t.Errorf("Expected no error for second read slot, got %v", err)
}
// Third read slot should timeout (context deadline exceeded)
// Third read slot should time out (context deadline exceeded)
err = rm.AcquireReadSlot(ctx)
if err == nil {
t.Error("Expected timeout error for third read slot, got nil")
@@ -43,11 +43,11 @@ func TestResourceMonitor_ConcurrentReadsLimit(t *testing.T) {
// Release one slot and try again
rm.ReleaseReadSlot()
// Create new context for the next attempt
ctx2, cancel2 := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel2()
err = rm.AcquireReadSlot(ctx2)
if err != nil {
t.Errorf("Expected no error after releasing a slot, got %v", err)
@@ -92,4 +92,4 @@ func TestResourceMonitor_TimeoutContexts(t *testing.T) {
} else if time.Until(deadline) > 2*time.Second+100*time.Millisecond {
t.Error("Overall processing timeout appears to be too long")
}
}
}

View File

@@ -78,4 +78,4 @@ func TestResourceMonitor_Integration(t *testing.T) {
// Test resource limit logging
rm.LogResourceInfo()
}
}

View File

@@ -6,6 +6,8 @@ import (
"time"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/gibidiutils"
)
// RecordFileProcessed records that a file has been successfully processed.
@@ -55,7 +57,7 @@ func (rm *ResourceMonitor) GetMetrics() ResourceMetrics {
ProcessingDuration: duration,
AverageFileSize: avgFileSize,
ProcessingRate: processingRate,
MemoryUsageMB: int64(m.Alloc) / 1024 / 1024,
MemoryUsageMB: gibidiutils.SafeUint64ToInt64WithDefault(m.Alloc, 0) / 1024 / 1024,
MaxMemoryUsageMB: int64(rm.hardMemoryLimitMB),
ViolationsDetected: violations,
DegradationActive: rm.degradationActive,
@@ -67,8 +69,13 @@ func (rm *ResourceMonitor) GetMetrics() ResourceMetrics {
// LogResourceInfo logs current resource limit configuration.
func (rm *ResourceMonitor) LogResourceInfo() {
if rm.enabled {
logrus.Infof("Resource limits enabled: maxFiles=%d, maxTotalSize=%dMB, fileTimeout=%ds, overallTimeout=%ds",
rm.maxFiles, rm.maxTotalSize/1024/1024, int(rm.fileProcessingTimeout.Seconds()), int(rm.overallTimeout.Seconds()))
logrus.Infof(
"Resource limits enabled: maxFiles=%d, maxTotalSize=%dMB, fileTimeout=%ds, overallTimeout=%ds",
rm.maxFiles,
rm.maxTotalSize/1024/1024,
int(rm.fileProcessingTimeout.Seconds()),
int(rm.overallTimeout.Seconds()),
)
logrus.Infof("Resource limits: maxConcurrentReads=%d, rateLimitFPS=%d, hardMemoryMB=%d",
rm.maxConcurrentReads, rm.rateLimitFilesPerSec, rm.hardMemoryLimitMB)
logrus.Infof("Resource features: gracefulDegradation=%v, monitoring=%v",
@@ -76,4 +83,4 @@ func (rm *ResourceMonitor) LogResourceInfo() {
} else {
logrus.Info("Resource limits disabled")
}
}
}

View File

@@ -46,4 +46,4 @@ func TestResourceMonitor_Metrics(t *testing.T) {
if !metrics.LastUpdated.After(time.Now().Add(-time.Second)) {
t.Error("Expected recent LastUpdated timestamp")
}
}
}

View File

@@ -33,4 +33,4 @@ func (rm *ResourceMonitor) rateLimiterRefill() {
// Channel is full, skip
}
}
}
}

View File

@@ -37,4 +37,4 @@ func TestResourceMonitor_RateLimiting(t *testing.T) {
if duration < 200*time.Millisecond {
t.Logf("Rate limiting may not be working as expected, took only %v", duration)
}
}
}

View File

@@ -19,4 +19,4 @@ func (rm *ResourceMonitor) Close() {
if rm.rateLimiter != nil {
rm.rateLimiter.Stop()
}
}
}

View File

@@ -100,9 +100,9 @@ func NewResourceMonitor() *ResourceMonitor {
}
rateLimitFull:
// Start rate limiter refill goroutine
// Start rate limiter refill goroutine
go rm.rateLimiterRefill()
}
return rm
}
}

View File

@@ -34,7 +34,7 @@ func TestResourceMonitor_NewResourceMonitor(t *testing.T) {
}
if rm.fileProcessingTimeout != time.Duration(config.DefaultFileProcessingTimeoutSec)*time.Second {
t.Errorf("Expected fileProcessingTimeout to be %v, got %v",
t.Errorf("Expected fileProcessingTimeout to be %v, got %v",
time.Duration(config.DefaultFileProcessingTimeoutSec)*time.Second, rm.fileProcessingTimeout)
}
@@ -71,4 +71,4 @@ func TestResourceMonitor_DisabledResourceLimits(t *testing.T) {
if err != nil {
t.Errorf("Expected no error when rate limiting disabled, got %v", err)
}
}
}

View File

@@ -7,7 +7,7 @@ import (
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/utils"
"github.com/ivuorinen/gibidify/gibidiutils"
)
// ValidateFileProcessing checks if a file can be processed based on resource limits.
@@ -21,9 +21,9 @@ func (rm *ResourceMonitor) ValidateFileProcessing(filePath string, fileSize int6
// Check if emergency stop is active
if rm.emergencyStopRequested {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitMemory,
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeResourceLimitMemory,
"processing stopped due to emergency memory condition",
filePath,
map[string]interface{}{
@@ -35,9 +35,9 @@ func (rm *ResourceMonitor) ValidateFileProcessing(filePath string, fileSize int6
// Check file count limit
currentFiles := atomic.LoadInt64(&rm.filesProcessed)
if int(currentFiles) >= rm.maxFiles {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitFiles,
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeResourceLimitFiles,
"maximum file count limit exceeded",
filePath,
map[string]interface{}{
@@ -50,9 +50,9 @@ func (rm *ResourceMonitor) ValidateFileProcessing(filePath string, fileSize int6
// Check total size limit
currentTotalSize := atomic.LoadInt64(&rm.totalSizeProcessed)
if currentTotalSize+fileSize > rm.maxTotalSize {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitTotalSize,
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeResourceLimitTotalSize,
"maximum total size limit would be exceeded",
filePath,
map[string]interface{}{
@@ -65,9 +65,9 @@ func (rm *ResourceMonitor) ValidateFileProcessing(filePath string, fileSize int6
// Check overall timeout
if time.Since(rm.startTime) > rm.overallTimeout {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitTimeout,
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeResourceLimitTimeout,
"overall processing timeout exceeded",
filePath,
map[string]interface{}{
@@ -88,7 +88,7 @@ func (rm *ResourceMonitor) CheckHardMemoryLimit() error {
var m runtime.MemStats
runtime.ReadMemStats(&m)
currentMemory := int64(m.Alloc)
currentMemory := gibidiutils.SafeUint64ToInt64WithDefault(m.Alloc, 0)
if currentMemory > rm.hardMemoryLimitBytes {
rm.mu.Lock()
@@ -108,14 +108,14 @@ func (rm *ResourceMonitor) CheckHardMemoryLimit() error {
// Check again after GC
runtime.ReadMemStats(&m)
currentMemory = int64(m.Alloc)
currentMemory = gibidiutils.SafeUint64ToInt64WithDefault(m.Alloc, 0)
if currentMemory > rm.hardMemoryLimitBytes {
// Still over limit, activate emergency stop
rm.emergencyStopRequested = true
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitMemory,
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeResourceLimitMemory,
"hard memory limit exceeded, emergency stop activated",
"",
map[string]interface{}{
@@ -124,16 +124,15 @@ func (rm *ResourceMonitor) CheckHardMemoryLimit() error {
"emergency_stop": true,
},
)
} else {
// Memory freed by GC, continue with degradation
rm.degradationActive = true
logrus.Info("Memory freed by garbage collection, continuing with degradation mode")
}
// Memory freed by GC, continue with degradation
rm.degradationActive = true
logrus.Info("Memory freed by garbage collection, continuing with degradation mode")
} else {
// No graceful degradation, hard stop
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitMemory,
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeResourceLimitMemory,
"hard memory limit exceeded",
"",
map[string]interface{}{
@@ -145,4 +144,4 @@ func (rm *ResourceMonitor) CheckHardMemoryLimit() error {
}
return nil
}
}

View File

@@ -1,12 +1,13 @@
package fileproc
import (
"errors"
"testing"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/gibidiutils"
"github.com/ivuorinen/gibidify/testutil"
"github.com/ivuorinen/gibidify/utils"
)
func TestResourceMonitor_FileCountLimit(t *testing.T) {
@@ -40,11 +41,12 @@ func TestResourceMonitor_FileCountLimit(t *testing.T) {
}
// Verify it's the correct error type
structErr, ok := err.(*utils.StructuredError)
var structErr *gibidiutils.StructuredError
ok := errors.As(err, &structErr)
if !ok {
t.Errorf("Expected StructuredError, got %T", err)
} else if structErr.Code != utils.CodeResourceLimitFiles {
t.Errorf("Expected error code %s, got %s", utils.CodeResourceLimitFiles, structErr.Code)
} else if structErr.Code != gibidiutils.CodeResourceLimitFiles {
t.Errorf("Expected error code %s, got %s", gibidiutils.CodeResourceLimitFiles, structErr.Code)
}
}
@@ -79,10 +81,11 @@ func TestResourceMonitor_TotalSizeLimit(t *testing.T) {
}
// Verify it's the correct error type
structErr, ok := err.(*utils.StructuredError)
var structErr *gibidiutils.StructuredError
ok := errors.As(err, &structErr)
if !ok {
t.Errorf("Expected StructuredError, got %T", err)
} else if structErr.Code != utils.CodeResourceLimitTotalSize {
t.Errorf("Expected error code %s, got %s", utils.CodeResourceLimitTotalSize, structErr.Code)
} else if structErr.Code != gibidiutils.CodeResourceLimitTotalSize {
t.Errorf("Expected error code %s, got %s", gibidiutils.CodeResourceLimitTotalSize, structErr.Code)
}
}
}

View File

@@ -0,0 +1,12 @@
package fileproc
// Test constants to avoid duplication in test files.
// These constants are used across multiple test files in the fileproc package.
const (
// Backpressure configuration keys
testBackpressureEnabled = "backpressure.enabled"
testBackpressureMaxMemory = "backpressure.maxMemoryUsage"
testBackpressureMemoryCheck = "backpressure.memoryCheckInterval"
testBackpressureMaxFiles = "backpressure.maxPendingFiles"
testBackpressureMaxWrites = "backpressure.maxPendingWrites"
)

View File

@@ -5,7 +5,7 @@ import (
"os"
"path/filepath"
"github.com/ivuorinen/gibidify/utils"
"github.com/ivuorinen/gibidify/gibidiutils"
)
// Walker defines an interface for scanning directories.
@@ -30,9 +30,12 @@ func NewProdWalker() *ProdWalker {
// Walk scans the given root directory recursively and returns a slice of file paths
// that are not ignored based on .gitignore/.ignore files, the configuration, or the default binary/image filter.
func (w *ProdWalker) Walk(root string) ([]string, error) {
absRoot, err := utils.GetAbsolutePath(root)
absRoot, err := gibidiutils.GetAbsolutePath(root)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to resolve root path").WithFilePath(root)
return nil, gibidiutils.WrapError(
err, gibidiutils.ErrorTypeFileSystem, gibidiutils.CodeFSPathResolution,
"failed to resolve root path",
).WithFilePath(root)
}
return w.walkDir(absRoot, []ignoreRule{})
}
@@ -47,7 +50,10 @@ func (w *ProdWalker) walkDir(currentDir string, parentRules []ignoreRule) ([]str
entries, err := os.ReadDir(currentDir)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to read directory").WithFilePath(currentDir)
return nil, gibidiutils.WrapError(
err, gibidiutils.ErrorTypeFileSystem, gibidiutils.CodeFSAccess,
"failed to read directory",
).WithFilePath(currentDir)
}
rules := loadIgnoreRules(currentDir, parentRules)
@@ -63,7 +69,10 @@ func (w *ProdWalker) walkDir(currentDir string, parentRules []ignoreRule) ([]str
if entry.IsDir() {
subFiles, err := w.walkDir(fullPath, rules)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingTraversal, "failed to traverse subdirectory").WithFilePath(fullPath)
return nil, gibidiutils.WrapError(
err, gibidiutils.ErrorTypeProcessing, gibidiutils.CodeProcessingTraversal,
"failed to traverse subdirectory",
).WithFilePath(fullPath)
}
results = append(results, subFiles...)
} else {

View File

@@ -61,6 +61,8 @@ func TestProdWalkerBinaryCheck(t *testing.T) {
// Reset FileTypeRegistry to ensure clean state
fileproc.ResetRegistryForTesting()
// Ensure cleanup runs even if test fails
t.Cleanup(fileproc.ResetRegistryForTesting)
// Run walker
w := fileproc.NewProdWalker()

View File

@@ -5,30 +5,100 @@ import (
"fmt"
"os"
"github.com/ivuorinen/gibidify/utils"
"github.com/ivuorinen/gibidify/gibidiutils"
)
// StartWriter writes the output in the specified format with memory optimization.
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format, prefix, suffix string) {
switch format {
case "markdown":
startMarkdownWriter(outFile, writeCh, done, prefix, suffix)
case "json":
startJSONWriter(outFile, writeCh, done, prefix, suffix)
case "yaml":
startYAMLWriter(outFile, writeCh, done, prefix, suffix)
// WriterConfig holds configuration for the writer.
type WriterConfig struct {
Format string
Prefix string
Suffix string
}
// Validate checks if the WriterConfig is valid.
func (c WriterConfig) Validate() error {
if c.Format == "" {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationFormat,
"format cannot be empty",
"",
nil,
)
}
switch c.Format {
case "markdown", "json", "yaml":
return nil
default:
context := map[string]interface{}{
"format": format,
context := map[string]any{
"format": c.Format,
}
err := utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationFormat,
fmt.Sprintf("unsupported format: %s", format),
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationFormat,
fmt.Sprintf("unsupported format: %s", c.Format),
"",
context,
)
utils.LogError("Failed to encode output", err)
}
}
// StartWriter writes the output in the specified format with memory optimization.
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, config WriterConfig) {
// Validate config
if err := config.Validate(); err != nil {
gibidiutils.LogError("Invalid writer configuration", err)
close(done)
return
}
// Validate outFile is not nil
if outFile == nil {
err := gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeIO,
gibidiutils.CodeIOFileWrite,
"output file is nil",
"",
nil,
)
gibidiutils.LogError("Failed to write output", err)
close(done)
return
}
// Validate outFile is accessible
if _, err := outFile.Stat(); err != nil {
structErr := gibidiutils.WrapError(
err,
gibidiutils.ErrorTypeIO,
gibidiutils.CodeIOFileWrite,
"failed to stat output file",
)
gibidiutils.LogError("Failed to validate output file", structErr)
close(done)
return
}
switch config.Format {
case "markdown":
startMarkdownWriter(outFile, writeCh, done, config.Prefix, config.Suffix)
case "json":
startJSONWriter(outFile, writeCh, done, config.Prefix, config.Suffix)
case "yaml":
startYAMLWriter(outFile, writeCh, done, config.Prefix, config.Suffix)
default:
context := map[string]interface{}{
"format": config.Format,
}
err := gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationFormat,
fmt.Sprintf("unsupported format: %s", config.Format),
"",
context,
)
gibidiutils.LogError("Failed to encode output", err)
close(done)
}
}

View File

@@ -68,7 +68,11 @@ func runWriterTest(t *testing.T, format string) []byte {
wg.Add(1)
go func() {
defer wg.Done()
fileproc.StartWriter(outFile, writeCh, doneCh, format, "PREFIX", "SUFFIX")
fileproc.StartWriter(outFile, writeCh, doneCh, fileproc.WriterConfig{
Format: format,
Prefix: "PREFIX",
Suffix: "SUFFIX",
})
}()
// Wait until writer signals completion

View File

@@ -5,9 +5,10 @@ import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/ivuorinen/gibidify/utils"
"github.com/ivuorinen/gibidify/gibidiutils"
)
// YAMLWriter handles YAML format output with streaming support.
@@ -20,11 +21,151 @@ func NewYAMLWriter(outFile *os.File) *YAMLWriter {
return &YAMLWriter{outFile: outFile}
}
const (
maxPathLength = 4096 // Maximum total path length
maxFilenameLength = 255 // Maximum individual filename component length
)
// validatePathComponents validates individual path components for security issues.
func validatePathComponents(trimmed, cleaned string, components []string) error {
for i, component := range components {
// Reject path components that are exactly ".." (path traversal)
if component == ".." {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationPath,
"path traversal not allowed",
trimmed,
map[string]any{
"path": trimmed,
"cleaned": cleaned,
"invalid_component": component,
"component_index": i,
},
)
}
// Reject empty components (e.g., from "foo//bar")
if component == "" && i > 0 && i < len(components)-1 {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationPath,
"path contains empty component",
trimmed,
map[string]any{
"path": trimmed,
"cleaned": cleaned,
"component_index": i,
},
)
}
// Enforce maximum filename length for each component
if len(component) > maxFilenameLength {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationPath,
"path component exceeds maximum length",
trimmed,
map[string]any{
"component": component,
"component_length": len(component),
"max_length": maxFilenameLength,
"component_index": i,
},
)
}
}
return nil
}
// validatePath validates and sanitizes a file path for safe output.
// It rejects absolute paths, path traversal attempts, empty paths, and overly long paths.
func validatePath(path string) error {
// Reject empty paths
trimmed := strings.TrimSpace(path)
if trimmed == "" {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationRequired,
"file path cannot be empty",
"",
nil,
)
}
// Enforce maximum path length to prevent resource abuse
if len(trimmed) > maxPathLength {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationPath,
"path exceeds maximum length",
trimmed,
map[string]any{
"path_length": len(trimmed),
"max_length": maxPathLength,
},
)
}
// Reject absolute paths
if filepath.IsAbs(trimmed) {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationPath,
"absolute paths are not allowed",
trimmed,
map[string]any{"path": trimmed},
)
}
// Validate original trimmed path components before cleaning
origComponents := strings.Split(filepath.ToSlash(trimmed), "/")
for _, comp := range origComponents {
if comp == "" || comp == "." || comp == ".." {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationPath,
"invalid or traversal path component in original path",
trimmed,
map[string]any{"path": trimmed, "component": comp},
)
}
}
// Clean the path to normalize it
cleaned := filepath.Clean(trimmed)
// After cleaning, ensure it's still relative and doesn't start with /
if filepath.IsAbs(cleaned) || strings.HasPrefix(cleaned, "/") {
return gibidiutils.NewStructuredError(
gibidiutils.ErrorTypeValidation,
gibidiutils.CodeValidationPath,
"path must be relative",
trimmed,
map[string]any{"path": trimmed, "cleaned": cleaned},
)
}
// Split into components and validate each one
// Use ToSlash to normalize for cross-platform validation
components := strings.Split(filepath.ToSlash(cleaned), "/")
return validatePathComponents(trimmed, cleaned, components)
}
// Start writes the YAML header.
func (w *YAMLWriter) Start(prefix, suffix string) error {
// Write YAML header
if _, err := fmt.Fprintf(w.outFile, "prefix: %s\nsuffix: %s\nfiles:\n", yamlQuoteString(prefix), yamlQuoteString(suffix)); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML header")
if _, err := fmt.Fprintf(
w.outFile, "prefix: %s\nsuffix: %s\nfiles:\n",
gibidiutils.EscapeForYAML(prefix), gibidiutils.EscapeForYAML(suffix),
); err != nil {
return gibidiutils.WrapError(
err,
gibidiutils.ErrorTypeIO,
gibidiutils.CodeIOWrite,
"failed to write YAML header",
)
}
return nil
}
@@ -44,13 +185,32 @@ func (w *YAMLWriter) Close() error {
// writeStreaming writes a large file as YAML in streaming chunks.
func (w *YAMLWriter) writeStreaming(req WriteRequest) error {
defer w.closeReader(req.Reader, req.Path)
// Validate path before using it
if err := validatePath(req.Path); err != nil {
return err
}
// Check for nil reader
if req.Reader == nil {
return gibidiutils.WrapError(
nil, gibidiutils.ErrorTypeValidation, gibidiutils.CodeValidationRequired,
"nil reader in write request",
).WithFilePath(req.Path)
}
defer gibidiutils.SafeCloseReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write YAML file entry start
if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(req.Path), language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML file start").WithFilePath(req.Path)
if _, err := fmt.Fprintf(
w.outFile, " - path: %s\n language: %s\n content: |\n",
gibidiutils.EscapeForYAML(req.Path), language,
); err != nil {
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite,
"failed to write YAML file start",
).WithFilePath(req.Path)
}
// Stream content with YAML indentation
@@ -59,6 +219,11 @@ func (w *YAMLWriter) writeStreaming(req WriteRequest) error {
// writeInline writes a small file directly as YAML.
func (w *YAMLWriter) writeInline(req WriteRequest) error {
// Validate path before using it
if err := validatePath(req.Path); err != nil {
return err
}
language := detectLanguage(req.Path)
fileData := FileData{
Path: req.Path,
@@ -67,15 +232,24 @@ func (w *YAMLWriter) writeInline(req WriteRequest) error {
}
// Write YAML entry
if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(fileData.Path), fileData.Language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML entry start").WithFilePath(req.Path)
if _, err := fmt.Fprintf(
w.outFile, " - path: %s\n language: %s\n content: |\n",
gibidiutils.EscapeForYAML(fileData.Path), fileData.Language,
); err != nil {
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite,
"failed to write YAML entry start",
).WithFilePath(req.Path)
}
// Write indented content
lines := strings.Split(fileData.Content, "\n")
for _, line := range lines {
if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML content line").WithFilePath(req.Path)
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite,
"failed to write YAML content line",
).WithFilePath(req.Path)
}
}
@@ -85,43 +259,29 @@ func (w *YAMLWriter) writeInline(req WriteRequest) error {
// streamYAMLContent streams content with YAML indentation.
func (w *YAMLWriter) streamYAMLContent(reader io.Reader, path string) error {
scanner := bufio.NewScanner(reader)
// Increase buffer size to handle long lines (up to 10MB per line)
buf := make([]byte, 0, 64*1024)
scanner.Buffer(buf, 10*1024*1024)
for scanner.Scan() {
line := scanner.Text()
if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML line").WithFilePath(path)
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOWrite,
"failed to write YAML line",
).WithFilePath(path)
}
}
if err := scanner.Err(); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to scan YAML content").WithFilePath(path)
return gibidiutils.WrapError(
err, gibidiutils.ErrorTypeIO, gibidiutils.CodeIOFileRead,
"failed to scan YAML content",
).WithFilePath(path)
}
return nil
}
// closeReader safely closes a reader if it implements io.Closer.
func (w *YAMLWriter) closeReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
utils.LogError(
"Failed to close file reader",
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// yamlQuoteString quotes a string for YAML output if needed.
func yamlQuoteString(s string) string {
if s == "" {
return `""`
}
// Simple YAML quoting - use double quotes if string contains special characters
if strings.ContainsAny(s, "\n\r\t:\"'\\") {
return fmt.Sprintf(`"%s"`, strings.ReplaceAll(s, `"`, `\"`))
}
return s
}
// startYAMLWriter handles YAML format output with streaming support.
func startYAMLWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
@@ -130,19 +290,19 @@ func startYAMLWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<-
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write YAML header", err)
gibidiutils.LogError("Failed to write YAML header", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write YAML file", err)
gibidiutils.LogError("Failed to write YAML file", err)
}
}
// Close writer
if err := writer.Close(); err != nil {
utils.LogError("Failed to write YAML end", err)
gibidiutils.LogError("Failed to write YAML end", err)
}
}