From b369d317b1eb3b356bc1dba579858e6fef6cb8c6 Mon Sep 17 00:00:00 2001 From: Ismo Vuorinen Date: Sat, 19 Jul 2025 01:37:52 +0300 Subject: [PATCH] feat(security): improve security features, fixes --- .checkmake | 8 + .github/workflows/security.yml | 142 ++++++++++ .golangci.yml | 10 +- .yamllint | 40 +++ Makefile | 65 +++-- cli/flags.go | 11 + cli/processor.go | 138 ++++++++-- config.example.yaml | 14 +- config.yaml.example | 79 ++++++ config/config.go | 214 ++++++++++++++- fileproc/processor.go | 194 ++++++++++++-- fileproc/resource_monitor.go | 423 +++++++++++++++++++++++++++++ fileproc/resource_monitor_test.go | 377 ++++++++++++++++++++++++++ fileproc/writer.go | 7 +- scripts/help.txt | 25 ++ scripts/lint.sh | 14 + scripts/security-scan.sh | 426 ++++++++++++++++++++++++++++++ utils/errors.go | 30 ++- utils/paths.go | 141 ++++++++++ 19 files changed, 2266 insertions(+), 92 deletions(-) create mode 100644 .checkmake create mode 100644 .github/workflows/security.yml create mode 100644 .yamllint create mode 100644 config.yaml.example create mode 100644 fileproc/resource_monitor.go create mode 100644 fileproc/resource_monitor_test.go create mode 100644 scripts/help.txt create mode 100755 scripts/lint.sh create mode 100755 scripts/security-scan.sh diff --git a/.checkmake b/.checkmake new file mode 100644 index 0000000..f050ebd --- /dev/null +++ b/.checkmake @@ -0,0 +1,8 @@ +# checkmake configuration +# See: https://github.com/mrtazz/checkmake#configuration + +[rules.timestampexpansion] +disabled = true + +[rules.maxbodylength] +disabled = true \ No newline at end of file diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000..045bb50 --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,142 @@ +name: Security Scan + +on: + push: + branches: [main, develop] + pull_request: + branches: [main, develop] + schedule: + # Run security scan weekly on Sundays at 00:00 UTC + - cron: '0 0 * * 0' + +permissions: + security-events: write + contents: read + actions: read + +jobs: + security: + name: Security Analysis + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.23' + + - name: Cache Go modules + uses: actions/cache@v4 + with: + path: | + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go- + + # Security Scanning with gosec + - name: Run gosec Security Scanner + uses: securecodewarrior/github-action-gosec@master + with: + args: '-fmt sarif -out gosec-results.sarif ./...' + + - name: Upload gosec results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + if: always() + with: + sarif_file: gosec-results.sarif + + # Dependency Vulnerability Scanning + - name: Run govulncheck + run: | + go install golang.org/x/vuln/cmd/govulncheck@latest + govulncheck -json ./... > govulncheck-results.json || true + + - name: Parse govulncheck results + run: | + if [ -s govulncheck-results.json ]; then + echo "::warning::Vulnerability check completed. Check govulncheck-results.json for details." + if grep -q '"finding"' govulncheck-results.json; then + echo "::error::Vulnerabilities found in dependencies!" + cat govulncheck-results.json + exit 1 + fi + fi + + # Additional Security Linting + - name: Run security-focused golangci-lint + run: | + go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest + golangci-lint run --enable=gosec,gocritic,bodyclose,rowserrcheck,misspell,unconvert,unparam,unused \ + --timeout=5m + + # Makefile Linting + - name: Run checkmake on Makefile + run: | + go install github.com/mrtazz/checkmake/cmd/checkmake@latest + checkmake --config=.checkmake Makefile + + # Shell Script Formatting Check + - name: Check shell script formatting + run: | + go install mvdan.cc/sh/v3/cmd/shfmt@latest + shfmt -d . + + # YAML Linting + - name: Run YAML linting + run: | + go install github.com/excilsploft/yamllint@latest + yamllint -c .yamllint . + + # Secrets Detection (basic patterns) + - name: Run secrets detection + run: | + echo "Scanning for potential secrets..." + # Look for common secret patterns + git log --all --full-history -- . | grep -i -E "(password|secret|key|token|api_key)" || true + find . -type f -name "*.go" -exec grep -H -i -E "(password|secret|key|token|api_key)\s*[:=]" {} \; || true + + # Check for hardcoded IPs and URLs + - name: Check for hardcoded network addresses + run: | + echo "Scanning for hardcoded network addresses..." + find . -type f -name "*.go" -exec grep -H -E "([0-9]{1,3}\.){3}[0-9]{1,3}" {} \; || true + find . -type f -name "*.go" -exec grep -H -E "https?://[^/\s]+" {} \; | \ + grep -v "example.com|localhost|127.0.0.1" || true + + # Docker Security (if Dockerfile exists) + - name: Run Docker security scan + if: hashFiles('Dockerfile') != '' + run: | + docker run --rm -v "$PWD":/workspace \ + aquasec/trivy:latest fs --security-checks vuln,config /workspace/Dockerfile || true + + # SAST with CodeQL (if available) + - name: Initialize CodeQL + if: github.event_name != 'schedule' + uses: github/codeql-action/init@v3 + with: + languages: go + + - name: Autobuild + if: github.event_name != 'schedule' + uses: github/codeql-action/autobuild@v3 + + - name: Perform CodeQL Analysis + if: github.event_name != 'schedule' + uses: github/codeql-action/analyze@v3 + + # Upload artifacts for review + - name: Upload security scan results + uses: actions/upload-artifact@v4 + if: always() + with: + name: security-scan-results + path: | + gosec-results.sarif + govulncheck-results.json + retention-days: 30 diff --git a/.golangci.yml b/.golangci.yml index 7a4d2f0..f13663c 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -143,7 +143,7 @@ linters-settings: # - insert_final_newline = true (enforced by gofumpt) # - trim_trailing_whitespace = true (enforced by whitespace linter) # - indent_style = tab, tab_width = 2 (enforced by gofumpt and lll) - + whitespace: multi-if: false # EditorConfig: trim trailing whitespace multi-func: false # EditorConfig: trim trailing whitespace @@ -198,18 +198,18 @@ issues: max-issues-per-linter: 0 max-same-issues: 0 uniq-by-line: true - + exclude-dirs: - vendor - third_party - testdata - examples - .git - + exclude-files: - ".*\\.pb\\.go$" - ".*\\.gen\\.go$" - + exclude-rules: - path: _test\.go linters: @@ -253,4 +253,4 @@ issues: severity: default-severity: error - case-sensitive: false \ No newline at end of file + case-sensitive: false diff --git a/.yamllint b/.yamllint new file mode 100644 index 0000000..88a729f --- /dev/null +++ b/.yamllint @@ -0,0 +1,40 @@ +# yamllint configuration +# See: https://yamllint.readthedocs.io/en/stable/configuration.html + +extends: default + +# Ignore generated output files +ignore: | + gibidify.yaml + gibidify.yml + output.yaml + output.yml + +rules: + # Allow longer lines for URLs and commands in GitHub Actions + line-length: + max: 120 + level: warning + + # Allow 2-space indentation to match EditorConfig + indentation: + spaces: 2 + indent-sequences: true + check-multi-line-strings: false + + # Allow truthy values like 'on' in GitHub Actions + truthy: + allowed-values: ['true', 'false', 'on', 'off'] + check-keys: false + + # Allow empty values in YAML + empty-values: + forbid-in-block-mappings: false + forbid-in-flow-mappings: false + + # Relax comments formatting + comments: + min-spaces-from-content: 1 + + # Allow document start marker to be optional + document-start: disable diff --git a/Makefile b/Makefile index f9a43b7..7241ae6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help install-tools lint lint-fix lint-verbose test coverage build clean all build-benchmark benchmark benchmark-collection benchmark-processing benchmark-concurrency benchmark-format +.PHONY: help install-tools lint lint-fix lint-verbose test coverage build clean all build-benchmark benchmark benchmark-collection benchmark-processing benchmark-concurrency benchmark-format security security-full vuln-check check-all dev-setup # Default target shows help .DEFAULT_GOAL := help @@ -6,28 +6,9 @@ # All target runs full workflow all: lint test build -# Help target +# Help target help: - @echo "Available targets:" - @echo " install-tools - Install required linting and development tools" - @echo " lint - Run all linters" - @echo " lint-fix - Run linters with auto-fix enabled" - @echo " lint-verbose - Run linters with verbose output" - @echo " test - Run tests" - @echo " coverage - Run tests with coverage" - @echo " build - Build the application" - @echo " clean - Clean build artifacts" - @echo " all - Run lint, test, and build" - @echo "" - @echo "Benchmark targets:" - @echo " build-benchmark - Build the benchmark binary" - @echo " benchmark - Run all benchmarks" - @echo " benchmark-collection - Run file collection benchmarks" - @echo " benchmark-processing - Run file processing benchmarks" - @echo " benchmark-concurrency - Run concurrency benchmarks" - @echo " benchmark-format - Run format benchmarks" - @echo "" - @echo "Run 'make ' to execute a specific target." + @cat scripts/help.txt # Install required tools install-tools: @@ -43,12 +24,17 @@ install-tools: @go install github.com/securego/gosec/v2/cmd/gosec@latest @echo "Installing gocyclo..." @go install github.com/fzipp/gocyclo/cmd/gocyclo@latest + @echo "Installing checkmake..." + @go install github.com/mrtazz/checkmake/cmd/checkmake@latest + @echo "Installing shfmt..." + @go install mvdan.cc/sh/v3/cmd/shfmt@latest + @echo "Installing yamllint (Go-based)..." + @go install github.com/excilsploft/yamllint@latest @echo "All tools installed successfully!" # Run linters lint: - @echo "Running golangci-lint..." - @golangci-lint run ./... + @./scripts/lint.sh # Run linters with auto-fix lint-fix: @@ -60,14 +46,27 @@ lint-fix: @go fmt ./... @echo "Running go mod tidy..." @go mod tidy + @echo "Running shfmt formatting..." + @shfmt -w -i 2 -ci . @echo "Running golangci-lint with --fix..." @golangci-lint run --fix ./... @echo "Auto-fix completed. Running final lint check..." @golangci-lint run ./... + @echo "Running checkmake..." + @checkmake --config=.checkmake Makefile + @echo "Running yamllint..." + @yamllint -c .yamllint . # Run linters with verbose output lint-verbose: + @echo "Running golangci-lint (verbose)..." @golangci-lint run -v ./... + @echo "Running checkmake (verbose)..." + @checkmake --config=.checkmake --format="{{.Line}}:{{.Rule}}:{{.Violation}}" Makefile + @echo "Running shfmt check (verbose)..." + @shfmt -d . + @echo "Running yamllint (verbose)..." + @yamllint -c .yamllint -f parsable . # Run tests test: @@ -129,4 +128,20 @@ benchmark-concurrency: build-benchmark benchmark-format: build-benchmark @echo "Running format benchmarks..." - @./gibidify-benchmark -type=format \ No newline at end of file + @./gibidify-benchmark -type=format + +# Security targets +security: + @echo "Running comprehensive security scan..." + @./scripts/security-scan.sh + +security-full: + @echo "Running full security analysis..." + @./scripts/security-scan.sh + @echo "Running additional security checks..." + @golangci-lint run --enable-all --disable=depguard,exhaustruct,ireturn,varnamelen,wrapcheck --timeout=10m + +vuln-check: + @echo "Checking for dependency vulnerabilities..." + @go install golang.org/x/vuln/cmd/govulncheck@latest + @govulncheck ./... \ No newline at end of file diff --git a/cli/flags.go b/cli/flags.go index d18ab3e..4ca831d 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -66,6 +66,11 @@ func (f *Flags) validate() error { return NewCLIMissingSourceError() } + // Validate source path for security + if err := utils.ValidateSourcePath(f.SourceDir); err != nil { + return err + } + // Validate output format if err := config.ValidateOutputFormat(f.Format); err != nil { return err @@ -89,5 +94,11 @@ func (f *Flags) setDefaultDestination() error { baseName := utils.GetBaseName(absRoot) f.Destination = baseName + "." + f.Format } + + // Validate destination path for security + if err := utils.ValidateDestinationPath(f.Destination); err != nil { + return err + } + return nil } diff --git a/cli/processor.go b/cli/processor.go index 5c2cd1d..1bc3d67 100644 --- a/cli/processor.go +++ b/cli/processor.go @@ -2,6 +2,7 @@ package cli import ( "context" + "fmt" "os" "sync" @@ -14,9 +15,10 @@ import ( // Processor handles the main file processing logic. type Processor struct { - flags *Flags - backpressure *fileproc.BackpressureManager - ui *UIManager + flags *Flags + backpressure *fileproc.BackpressureManager + resourceMonitor *fileproc.ResourceMonitor + ui *UIManager } // NewProcessor creates a new processor with the given flags. @@ -28,14 +30,19 @@ func NewProcessor(flags *Flags) *Processor { ui.SetProgressOutput(!flags.NoProgress) return &Processor{ - flags: flags, - backpressure: fileproc.NewBackpressureManager(), - ui: ui, + flags: flags, + backpressure: fileproc.NewBackpressureManager(), + resourceMonitor: fileproc.NewResourceMonitor(), + ui: ui, } } // Process executes the main file processing workflow. func (p *Processor) Process(ctx context.Context) error { + // Create overall processing context with timeout + overallCtx, overallCancel := p.resourceMonitor.CreateOverallProcessingContext(ctx) + defer overallCancel() + // Configure file type registry p.configureFileTypes() @@ -46,6 +53,10 @@ func (p *Processor) Process(ctx context.Context) error { p.ui.PrintInfo("Destination: %s", p.flags.Destination) p.ui.PrintInfo("Workers: %d", p.flags.Concurrency) + // Log resource monitoring configuration + p.resourceMonitor.LogResourceInfo() + p.backpressure.LogBackpressureInfo() + // Collect files with progress indication p.ui.PrintInfo("📁 Collecting files...") files, err := p.collectFiles() @@ -56,8 +67,13 @@ func (p *Processor) Process(ctx context.Context) error { // Show collection results p.ui.PrintSuccess("Found %d files to process", len(files)) - // Process files - return p.processFiles(ctx, files) + // Pre-validate file collection against resource limits + if err := p.validateFileCollection(files); err != nil { + return err + } + + // Process files with overall timeout + return p.processFiles(overallCtx, files) } // configureFileTypes configures the file type registry. @@ -84,6 +100,61 @@ func (p *Processor) collectFiles() ([]string, error) { return files, nil } +// validateFileCollection validates the collected files against resource limits. +func (p *Processor) validateFileCollection(files []string) error { + if !config.GetResourceLimitsEnabled() { + return nil + } + + // Check file count limit + maxFiles := config.GetMaxFiles() + if len(files) > maxFiles { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitFiles, + fmt.Sprintf("file count (%d) exceeds maximum limit (%d)", len(files), maxFiles), + "", + map[string]interface{}{ + "file_count": len(files), + "max_files": maxFiles, + }, + ) + } + + // Check total size limit (estimate) + maxTotalSize := config.GetMaxTotalSize() + totalSize := int64(0) + oversizedFiles := 0 + + for _, filePath := range files { + if fileInfo, err := os.Stat(filePath); err == nil { + totalSize += fileInfo.Size() + if totalSize > maxTotalSize { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitTotalSize, + fmt.Sprintf("total file size (%d bytes) would exceed maximum limit (%d bytes)", totalSize, maxTotalSize), + "", + map[string]interface{}{ + "total_size": totalSize, + "max_total_size": maxTotalSize, + "files_checked": len(files), + }, + ) + } + } else { + oversizedFiles++ + } + } + + if oversizedFiles > 0 { + logrus.Warnf("Could not stat %d files during pre-validation", oversizedFiles) + } + + logrus.Infof("Pre-validation passed: %d files, %d MB total", len(files), totalSize/1024/1024) + return nil +} + // processFiles processes the collected files. func (p *Processor) processFiles(ctx context.Context, files []string) error { outFile, err := p.createOutputFile() @@ -127,7 +198,8 @@ func (p *Processor) processFiles(ctx context.Context, files []string) error { // createOutputFile creates the output file. func (p *Processor) createOutputFile() (*os.File, error) { - outFile, err := os.Create(p.flags.Destination) // #nosec G304 - destination is user-provided CLI arg + // Destination path has been validated in CLI flags validation for path traversal attempts + outFile, err := os.Create(p.flags.Destination) // #nosec G304 - destination is validated in flags.validate() if err != nil { return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create output file").WithFilePath(p.flags.Destination) } @@ -153,19 +225,27 @@ func (p *Processor) worker(ctx context.Context, wg *sync.WaitGroup, fileCh chan if !ok { return } - p.processFile(filePath, writeCh) + p.processFile(ctx, filePath, writeCh) } } } -// processFile processes a single file. -func (p *Processor) processFile(filePath string, writeCh chan fileproc.WriteRequest) { +// processFile processes a single file with resource monitoring. +func (p *Processor) processFile(ctx context.Context, filePath string, writeCh chan fileproc.WriteRequest) { + // Check for emergency stop + if p.resourceMonitor.IsEmergencyStopActive() { + logrus.Warnf("Emergency stop active, skipping file: %s", filePath) + return + } + absRoot, err := utils.GetAbsolutePath(p.flags.SourceDir) if err != nil { utils.LogError("Failed to get absolute path", err) return } - fileproc.ProcessFile(filePath, writeCh, absRoot) + + // Use the resource monitor-aware processing + fileproc.ProcessFileWithMonitor(ctx, filePath, writeCh, absRoot, p.resourceMonitor) // Update progress bar p.ui.UpdateProgress(1) @@ -200,11 +280,35 @@ func (p *Processor) waitForCompletion(wg *sync.WaitGroup, writeCh chan fileproc. <-writerDone } -// logFinalStats logs the final back-pressure statistics. +// logFinalStats logs the final back-pressure and resource monitoring statistics. func (p *Processor) logFinalStats() { - stats := p.backpressure.GetStats() - if stats.Enabled { + // Log back-pressure stats + backpressureStats := p.backpressure.GetStats() + if backpressureStats.Enabled { logrus.Infof("Back-pressure stats: processed=%d files, memory=%dMB/%dMB", - stats.FilesProcessed, stats.CurrentMemoryUsage/1024/1024, stats.MaxMemoryUsage/1024/1024) + backpressureStats.FilesProcessed, backpressureStats.CurrentMemoryUsage/1024/1024, backpressureStats.MaxMemoryUsage/1024/1024) } + + // Log resource monitoring stats + resourceStats := p.resourceMonitor.GetMetrics() + if config.GetResourceLimitsEnabled() { + logrus.Infof("Resource stats: processed=%d files, totalSize=%dMB, avgFileSize=%.2fKB, rate=%.2f files/sec", + resourceStats.FilesProcessed, resourceStats.TotalSizeProcessed/1024/1024, + resourceStats.AverageFileSize/1024, resourceStats.ProcessingRate) + + if len(resourceStats.ViolationsDetected) > 0 { + logrus.Warnf("Resource violations detected: %v", resourceStats.ViolationsDetected) + } + + if resourceStats.DegradationActive { + logrus.Warnf("Processing completed with degradation mode active") + } + + if resourceStats.EmergencyStopActive { + logrus.Errorf("Processing completed with emergency stop active") + } + } + + // Clean up resource monitor + p.resourceMonitor.Close() } diff --git a/config.example.yaml b/config.example.yaml index fad9a43..5bf2e79 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -25,20 +25,20 @@ ignoreDirectories: fileTypes: # Enable/disable file type detection entirely (default: true) enabled: true - + # Add custom image extensions customImageExtensions: - .webp - .avif - .heic - .jxl - + # Add custom binary extensions customBinaryExtensions: - .custom - .proprietary - .blob - + # Add custom language mappings customLanguages: .zig: zig @@ -51,17 +51,17 @@ fileTypes: .fennel: fennel .wast: wast .wat: wat - + # Disable specific default image extensions disabledImageExtensions: - .bmp # Disable bitmap support - .tif # Disable TIFF support - + # Disable specific default binary extensions disabledBinaryExtensions: - .exe # Don't treat executables as binary - .dll # Don't treat DLL files as binary - + # Disable specific default language extensions disabledLanguageExtensions: - .bat # Don't detect batch files @@ -81,4 +81,4 @@ filePatterns: - "*.go" - "*.py" - "*.js" - - "*.ts" \ No newline at end of file + - "*.ts" diff --git a/config.yaml.example b/config.yaml.example new file mode 100644 index 0000000..b2aca82 --- /dev/null +++ b/config.yaml.example @@ -0,0 +1,79 @@ +# Gibidify Configuration Example +# This file demonstrates all available configuration options + +# File size limit for individual files (in bytes) +# Default: 5242880 (5MB), Min: 1024 (1KB), Max: 104857600 (100MB) +fileSizeLimit: 5242880 + +# Directories to ignore during traversal +ignoreDirectories: + - vendor + - node_modules + - .git + - dist + - build + - target + - bower_components + - cache + - tmp + +# File type detection and filtering +fileTypes: + enabled: true + customImageExtensions: [] + customBinaryExtensions: [] + customLanguages: {} + disabledImageExtensions: [] + disabledBinaryExtensions: [] + disabledLanguageExtensions: [] + +# Back-pressure management for memory optimization +backpressure: + enabled: true + maxPendingFiles: 1000 # Max files in channel buffer + maxPendingWrites: 100 # Max writes in channel buffer + maxMemoryUsage: 104857600 # 100MB soft memory limit + memoryCheckInterval: 1000 # Check memory every N files + +# Resource limits for DoS protection and security +resourceLimits: + enabled: true + + # File processing limits + maxFiles: 10000 # Maximum number of files to process + maxTotalSize: 1073741824 # Maximum total size (1GB) + + # Timeout limits (in seconds) + fileProcessingTimeoutSec: 30 # Timeout for individual file processing + overallTimeoutSec: 3600 # Overall processing timeout (1 hour) + + # Concurrency limits + maxConcurrentReads: 10 # Maximum concurrent file reading operations + + # Rate limiting (0 = disabled) + rateLimitFilesPerSec: 0 # Files per second rate limit + + # Memory limits + hardMemoryLimitMB: 512 # Hard memory limit (512MB) + + # Safety features + enableGracefulDegradation: true # Enable graceful degradation on resource pressure + enableResourceMonitoring: true # Enable detailed resource monitoring + +# Optional: Maximum concurrency for workers +# Default: number of CPU cores +# maxConcurrency: 4 + +# Optional: Supported output formats +# Default: ["json", "yaml", "markdown"] +# supportedFormats: +# - json +# - yaml +# - markdown + +# Optional: File patterns to include +# Default: all files (empty list means no pattern filtering) +# filePatterns: +# - "*.go" +# - "*.py" +# - "*.js" \ No newline at end of file diff --git a/config/config.go b/config/config.go index 5e7fd80..f8f34c3 100644 --- a/config/config.go +++ b/config/config.go @@ -20,6 +20,57 @@ const ( MinFileSizeLimit = 1024 // MaxFileSizeLimit is the maximum allowed file size limit (100MB). MaxFileSizeLimit = 104857600 + + // Resource Limit Constants + + // DefaultMaxFiles is the default maximum number of files to process. + DefaultMaxFiles = 10000 + // MinMaxFiles is the minimum allowed file count limit. + MinMaxFiles = 1 + // MaxMaxFiles is the maximum allowed file count limit. + MaxMaxFiles = 1000000 + + // DefaultMaxTotalSize is the default maximum total size of files (1GB). + DefaultMaxTotalSize = 1073741824 + // MinMaxTotalSize is the minimum allowed total size limit (1MB). + MinMaxTotalSize = 1048576 + // MaxMaxTotalSize is the maximum allowed total size limit (100GB). + MaxMaxTotalSize = 107374182400 + + // DefaultFileProcessingTimeoutSec is the default timeout for individual file processing (30 seconds). + DefaultFileProcessingTimeoutSec = 30 + // MinFileProcessingTimeoutSec is the minimum allowed file processing timeout (1 second). + MinFileProcessingTimeoutSec = 1 + // MaxFileProcessingTimeoutSec is the maximum allowed file processing timeout (300 seconds). + MaxFileProcessingTimeoutSec = 300 + + // DefaultOverallTimeoutSec is the default timeout for overall processing (3600 seconds = 1 hour). + DefaultOverallTimeoutSec = 3600 + // MinOverallTimeoutSec is the minimum allowed overall timeout (10 seconds). + MinOverallTimeoutSec = 10 + // MaxOverallTimeoutSec is the maximum allowed overall timeout (86400 seconds = 24 hours). + MaxOverallTimeoutSec = 86400 + + // DefaultMaxConcurrentReads is the default maximum concurrent file reading operations. + DefaultMaxConcurrentReads = 10 + // MinMaxConcurrentReads is the minimum allowed concurrent reads. + MinMaxConcurrentReads = 1 + // MaxMaxConcurrentReads is the maximum allowed concurrent reads. + MaxMaxConcurrentReads = 100 + + // DefaultRateLimitFilesPerSec is the default rate limit for file processing (0 = disabled). + DefaultRateLimitFilesPerSec = 0 + // MinRateLimitFilesPerSec is the minimum rate limit. + MinRateLimitFilesPerSec = 0 + // MaxRateLimitFilesPerSec is the maximum rate limit. + MaxRateLimitFilesPerSec = 10000 + + // DefaultHardMemoryLimitMB is the default hard memory limit (512MB). + DefaultHardMemoryLimitMB = 512 + // MinHardMemoryLimitMB is the minimum hard memory limit (64MB). + MinHardMemoryLimitMB = 64 + // MaxHardMemoryLimitMB is the maximum hard memory limit (8192MB = 8GB). + MaxHardMemoryLimitMB = 8192 ) // LoadConfig reads configuration from a YAML file. @@ -32,7 +83,13 @@ func LoadConfig() { viper.SetConfigType("yaml") if xdgConfig := os.Getenv("XDG_CONFIG_HOME"); xdgConfig != "" { - viper.AddConfigPath(filepath.Join(xdgConfig, "gibidify")) + // Validate XDG_CONFIG_HOME for path traversal attempts + if err := utils.ValidateConfigPath(xdgConfig); err != nil { + logrus.Warnf("Invalid XDG_CONFIG_HOME path, using default config: %v", err) + } else { + configPath := filepath.Join(xdgConfig, "gibidify") + viper.AddConfigPath(configPath) + } } else if home, err := os.UserHomeDir(); err == nil { viper.AddConfigPath(filepath.Join(home, ".config", "gibidify")) } @@ -81,6 +138,18 @@ func setDefaultConfig() { viper.SetDefault("backpressure.maxPendingWrites", 100) // Max writes in write channel buffer viper.SetDefault("backpressure.maxMemoryUsage", 104857600) // 100MB max memory usage viper.SetDefault("backpressure.memoryCheckInterval", 1000) // Check memory every 1000 files + + // Resource limit defaults + viper.SetDefault("resourceLimits.enabled", true) + viper.SetDefault("resourceLimits.maxFiles", DefaultMaxFiles) + viper.SetDefault("resourceLimits.maxTotalSize", DefaultMaxTotalSize) + viper.SetDefault("resourceLimits.fileProcessingTimeoutSec", DefaultFileProcessingTimeoutSec) + viper.SetDefault("resourceLimits.overallTimeoutSec", DefaultOverallTimeoutSec) + viper.SetDefault("resourceLimits.maxConcurrentReads", DefaultMaxConcurrentReads) + viper.SetDefault("resourceLimits.rateLimitFilesPerSec", DefaultRateLimitFilesPerSec) + viper.SetDefault("resourceLimits.hardMemoryLimitMB", DefaultHardMemoryLimitMB) + viper.SetDefault("resourceLimits.enableGracefulDegradation", true) + viper.SetDefault("resourceLimits.enableResourceMonitoring", true) } // GetFileSizeLimit returns the file size limit from configuration. @@ -249,12 +318,85 @@ func ValidateConfig() error { } } + // Validate resource limits configuration + if viper.IsSet("resourceLimits.maxFiles") { + maxFiles := viper.GetInt("resourceLimits.maxFiles") + if maxFiles < MinMaxFiles { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxFiles (%d) must be at least %d", maxFiles, MinMaxFiles)) + } + if maxFiles > MaxMaxFiles { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxFiles (%d) exceeds maximum (%d)", maxFiles, MaxMaxFiles)) + } + } + + if viper.IsSet("resourceLimits.maxTotalSize") { + maxTotalSize := viper.GetInt64("resourceLimits.maxTotalSize") + if maxTotalSize < MinMaxTotalSize { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxTotalSize (%d) must be at least %d", maxTotalSize, MinMaxTotalSize)) + } + if maxTotalSize > MaxMaxTotalSize { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxTotalSize (%d) exceeds maximum (%d)", maxTotalSize, MaxMaxTotalSize)) + } + } + + if viper.IsSet("resourceLimits.fileProcessingTimeoutSec") { + timeout := viper.GetInt("resourceLimits.fileProcessingTimeoutSec") + if timeout < MinFileProcessingTimeoutSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.fileProcessingTimeoutSec (%d) must be at least %d", timeout, MinFileProcessingTimeoutSec)) + } + if timeout > MaxFileProcessingTimeoutSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.fileProcessingTimeoutSec (%d) exceeds maximum (%d)", timeout, MaxFileProcessingTimeoutSec)) + } + } + + if viper.IsSet("resourceLimits.overallTimeoutSec") { + timeout := viper.GetInt("resourceLimits.overallTimeoutSec") + if timeout < MinOverallTimeoutSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.overallTimeoutSec (%d) must be at least %d", timeout, MinOverallTimeoutSec)) + } + if timeout > MaxOverallTimeoutSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.overallTimeoutSec (%d) exceeds maximum (%d)", timeout, MaxOverallTimeoutSec)) + } + } + + if viper.IsSet("resourceLimits.maxConcurrentReads") { + maxReads := viper.GetInt("resourceLimits.maxConcurrentReads") + if maxReads < MinMaxConcurrentReads { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxConcurrentReads (%d) must be at least %d", maxReads, MinMaxConcurrentReads)) + } + if maxReads > MaxMaxConcurrentReads { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxConcurrentReads (%d) exceeds maximum (%d)", maxReads, MaxMaxConcurrentReads)) + } + } + + if viper.IsSet("resourceLimits.rateLimitFilesPerSec") { + rateLimit := viper.GetInt("resourceLimits.rateLimitFilesPerSec") + if rateLimit < MinRateLimitFilesPerSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.rateLimitFilesPerSec (%d) must be at least %d", rateLimit, MinRateLimitFilesPerSec)) + } + if rateLimit > MaxRateLimitFilesPerSec { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.rateLimitFilesPerSec (%d) exceeds maximum (%d)", rateLimit, MaxRateLimitFilesPerSec)) + } + } + + if viper.IsSet("resourceLimits.hardMemoryLimitMB") { + memLimit := viper.GetInt("resourceLimits.hardMemoryLimitMB") + if memLimit < MinHardMemoryLimitMB { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.hardMemoryLimitMB (%d) must be at least %d", memLimit, MinHardMemoryLimitMB)) + } + if memLimit > MaxHardMemoryLimitMB { + validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.hardMemoryLimitMB (%d) exceeds maximum (%d)", memLimit, MaxHardMemoryLimitMB)) + } + } + if len(validationErrors) > 0 { return utils.NewStructuredError( utils.ErrorTypeConfiguration, utils.CodeConfigValidation, "configuration validation failed: "+strings.Join(validationErrors, "; "), - ).WithContext("validation_errors", validationErrors) + "", + map[string]interface{}{"validation_errors": validationErrors}, + ) } return nil @@ -290,7 +432,9 @@ func ValidateFileSize(size int64) error { utils.ErrorTypeValidation, utils.CodeValidationSize, fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", size, limit), - ).WithContext("file_size", size).WithContext("size_limit", limit) + "", + map[string]interface{}{"file_size": size, "size_limit": limit}, + ) } return nil } @@ -302,7 +446,9 @@ func ValidateOutputFormat(format string) error { utils.ErrorTypeValidation, utils.CodeValidationFormat, fmt.Sprintf("unsupported output format: %s (supported: json, yaml, markdown)", format), - ).WithContext("format", format) + "", + map[string]interface{}{"format": format}, + ) } return nil } @@ -314,7 +460,9 @@ func ValidateConcurrency(concurrency int) error { utils.ErrorTypeValidation, utils.CodeValidationFormat, fmt.Sprintf("concurrency (%d) must be at least 1", concurrency), - ).WithContext("concurrency", concurrency) + "", + map[string]interface{}{"concurrency": concurrency}, + ) } if viper.IsSet("maxConcurrency") { @@ -324,7 +472,9 @@ func ValidateConcurrency(concurrency int) error { utils.ErrorTypeValidation, utils.CodeValidationFormat, fmt.Sprintf("concurrency (%d) exceeds maximum (%d)", concurrency, maxConcurrency), - ).WithContext("concurrency", concurrency).WithContext("max_concurrency", maxConcurrency) + "", + map[string]interface{}{"concurrency": concurrency, "max_concurrency": maxConcurrency}, + ) } } @@ -392,3 +542,55 @@ func GetMaxMemoryUsage() int64 { func GetMemoryCheckInterval() int { return viper.GetInt("backpressure.memoryCheckInterval") } + +// Resource Limit Configuration Getters + +// GetResourceLimitsEnabled returns whether resource limits are enabled. +func GetResourceLimitsEnabled() bool { + return viper.GetBool("resourceLimits.enabled") +} + +// GetMaxFiles returns the maximum number of files that can be processed. +func GetMaxFiles() int { + return viper.GetInt("resourceLimits.maxFiles") +} + +// GetMaxTotalSize returns the maximum total size of files that can be processed. +func GetMaxTotalSize() int64 { + return viper.GetInt64("resourceLimits.maxTotalSize") +} + +// GetFileProcessingTimeoutSec returns the timeout for individual file processing in seconds. +func GetFileProcessingTimeoutSec() int { + return viper.GetInt("resourceLimits.fileProcessingTimeoutSec") +} + +// GetOverallTimeoutSec returns the timeout for overall processing in seconds. +func GetOverallTimeoutSec() int { + return viper.GetInt("resourceLimits.overallTimeoutSec") +} + +// GetMaxConcurrentReads returns the maximum number of concurrent file reading operations. +func GetMaxConcurrentReads() int { + return viper.GetInt("resourceLimits.maxConcurrentReads") +} + +// GetRateLimitFilesPerSec returns the rate limit for file processing (files per second). +func GetRateLimitFilesPerSec() int { + return viper.GetInt("resourceLimits.rateLimitFilesPerSec") +} + +// GetHardMemoryLimitMB returns the hard memory limit in megabytes. +func GetHardMemoryLimitMB() int { + return viper.GetInt("resourceLimits.hardMemoryLimitMB") +} + +// GetEnableGracefulDegradation returns whether graceful degradation is enabled. +func GetEnableGracefulDegradation() bool { + return viper.GetBool("resourceLimits.enableGracefulDegradation") +} + +// GetEnableResourceMonitoring returns whether resource monitoring is enabled. +func GetEnableResourceMonitoring() bool { + return viper.GetBool("resourceLimits.enableResourceMonitoring") +} diff --git a/fileproc/processor.go b/fileproc/processor.go index 335c364..f9ff983 100644 --- a/fileproc/processor.go +++ b/fileproc/processor.go @@ -2,11 +2,15 @@ package fileproc import ( + "context" "fmt" "io" "os" "path/filepath" "strings" + "time" + + "github.com/sirupsen/logrus" "github.com/ivuorinen/gibidify/config" "github.com/ivuorinen/gibidify/utils" @@ -31,15 +35,26 @@ type WriteRequest struct { // FileProcessor handles file processing operations. type FileProcessor struct { - rootPath string - sizeLimit int64 + rootPath string + sizeLimit int64 + resourceMonitor *ResourceMonitor } // NewFileProcessor creates a new file processor. func NewFileProcessor(rootPath string) *FileProcessor { return &FileProcessor{ - rootPath: rootPath, - sizeLimit: config.GetFileSizeLimit(), + rootPath: rootPath, + sizeLimit: config.GetFileSizeLimit(), + resourceMonitor: NewResourceMonitor(), + } +} + +// NewFileProcessorWithMonitor creates a new file processor with a shared resource monitor. +func NewFileProcessorWithMonitor(rootPath string, monitor *ResourceMonitor) *FileProcessor { + return &FileProcessor{ + rootPath: rootPath, + sizeLimit: config.GetFileSizeLimit(), + resourceMonitor: monitor, } } @@ -47,30 +62,92 @@ func NewFileProcessor(rootPath string) *FileProcessor { // It automatically chooses between loading the entire file or streaming based on file size. func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) { processor := NewFileProcessor(rootPath) - processor.Process(filePath, outCh) + ctx := context.Background() + processor.ProcessWithContext(ctx, filePath, outCh) +} + +// ProcessFileWithMonitor processes a file using a shared resource monitor. +func ProcessFileWithMonitor(ctx context.Context, filePath string, outCh chan<- WriteRequest, rootPath string, monitor *ResourceMonitor) { + processor := NewFileProcessorWithMonitor(rootPath, monitor) + processor.ProcessWithContext(ctx, filePath, outCh) } // Process handles file processing with the configured settings. func (p *FileProcessor) Process(filePath string, outCh chan<- WriteRequest) { - // Validate file - fileInfo, err := p.validateFile(filePath) + ctx := context.Background() + p.ProcessWithContext(ctx, filePath, outCh) +} + +// ProcessWithContext handles file processing with context and resource monitoring. +func (p *FileProcessor) ProcessWithContext(ctx context.Context, filePath string, outCh chan<- WriteRequest) { + // Create file processing context with timeout + fileCtx, fileCancel := p.resourceMonitor.CreateFileProcessingContext(ctx) + defer fileCancel() + + // Wait for rate limiting + if err := p.resourceMonitor.WaitForRateLimit(fileCtx); err != nil { + if err == context.DeadlineExceeded { + utils.LogErrorf( + utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing timeout during rate limiting", filePath, nil), + "File processing timeout during rate limiting: %s", filePath, + ) + } + return + } + + // Validate file and check resource limits + fileInfo, err := p.validateFileWithLimits(fileCtx, filePath) if err != nil { return // Error already logged } + // Acquire read slot for concurrent processing + if err := p.resourceMonitor.AcquireReadSlot(fileCtx); err != nil { + if err == context.DeadlineExceeded { + utils.LogErrorf( + utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing timeout waiting for read slot", filePath, nil), + "File processing timeout waiting for read slot: %s", filePath, + ) + } + return + } + defer p.resourceMonitor.ReleaseReadSlot() + + // Check hard memory limits before processing + if err := p.resourceMonitor.CheckHardMemoryLimit(); err != nil { + utils.LogErrorf(err, "Hard memory limit check failed for file: %s", filePath) + return + } + // Get relative path relPath := p.getRelativePath(filePath) + // Process file with timeout + processStart := time.Now() + defer func() { + // Record successful processing + p.resourceMonitor.RecordFileProcessed(fileInfo.Size()) + logrus.Debugf("File processed in %v: %s", time.Since(processStart), filePath) + }() + // Choose processing strategy based on file size if fileInfo.Size() <= StreamThreshold { - p.processInMemory(filePath, relPath, outCh) + p.processInMemoryWithContext(fileCtx, filePath, relPath, outCh) } else { - p.processStreaming(filePath, relPath, outCh) + p.processStreamingWithContext(fileCtx, filePath, relPath, outCh) } } -// validateFile checks if the file can be processed. -func (p *FileProcessor) validateFile(filePath string) (os.FileInfo, error) { + +// validateFileWithLimits checks if the file can be processed with resource limits. +func (p *FileProcessor) validateFileWithLimits(ctx context.Context, filePath string) (os.FileInfo, error) { + // Check context cancellation + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + fileInfo, err := os.Stat(filePath) if err != nil { structErr := utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to stat file").WithFilePath(filePath) @@ -78,19 +155,31 @@ func (p *FileProcessor) validateFile(filePath string) (os.FileInfo, error) { return nil, err } - // Check size limit + // Check traditional size limit if fileInfo.Size() > p.sizeLimit { + context := map[string]interface{}{ + "file_size": fileInfo.Size(), + "size_limit": p.sizeLimit, + } utils.LogErrorf( utils.NewStructuredError( utils.ErrorTypeValidation, utils.CodeValidationSize, fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", fileInfo.Size(), p.sizeLimit), - ).WithFilePath(filePath).WithContext("file_size", fileInfo.Size()).WithContext("size_limit", p.sizeLimit), + filePath, + context, + ), "Skipping large file %s", filePath, ) return nil, fmt.Errorf("file too large") } + // Check resource limits + if err := p.resourceMonitor.ValidateFileProcessing(filePath, fileInfo.Size()); err != nil { + utils.LogErrorf(err, "Resource limit validation failed for file: %s", filePath) + return nil, err + } + return fileInfo, nil } @@ -103,8 +192,20 @@ func (p *FileProcessor) getRelativePath(filePath string) string { return relPath } -// processInMemory loads the entire file into memory (for small files). -func (p *FileProcessor) processInMemory(filePath, relPath string, outCh chan<- WriteRequest) { + +// processInMemoryWithContext loads the entire file into memory with context awareness. +func (p *FileProcessor) processInMemoryWithContext(ctx context.Context, filePath, relPath string, outCh chan<- WriteRequest) { + // Check context before reading + select { + case <-ctx.Done(): + utils.LogErrorf( + utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled", filePath, nil), + "File processing cancelled: %s", filePath, + ) + return + default: + } + content, err := os.ReadFile(filePath) // #nosec G304 - filePath is validated by walker if err != nil { structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to read file").WithFilePath(filePath) @@ -112,30 +213,79 @@ func (p *FileProcessor) processInMemory(filePath, relPath string, outCh chan<- W return } - outCh <- WriteRequest{ + // Check context again after reading + select { + case <-ctx.Done(): + utils.LogErrorf( + utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled after read", filePath, nil), + "File processing cancelled after read: %s", filePath, + ) + return + default: + } + + // Try to send the result, but respect context cancellation + select { + case <-ctx.Done(): + utils.LogErrorf( + utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled before output", filePath, nil), + "File processing cancelled before output: %s", filePath, + ) + return + case outCh <- WriteRequest{ Path: relPath, Content: p.formatContent(relPath, string(content)), IsStream: false, + }: } } -// processStreaming creates a streaming reader for large files. -func (p *FileProcessor) processStreaming(filePath, relPath string, outCh chan<- WriteRequest) { - reader := p.createStreamReader(filePath, relPath) + +// processStreamingWithContext creates a streaming reader for large files with context awareness. +func (p *FileProcessor) processStreamingWithContext(ctx context.Context, filePath, relPath string, outCh chan<- WriteRequest) { + // Check context before creating reader + select { + case <-ctx.Done(): + utils.LogErrorf( + utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "streaming processing cancelled", filePath, nil), + "Streaming processing cancelled: %s", filePath, + ) + return + default: + } + + reader := p.createStreamReaderWithContext(ctx, filePath, relPath) if reader == nil { return // Error already logged } - outCh <- WriteRequest{ + // Try to send the result, but respect context cancellation + select { + case <-ctx.Done(): + utils.LogErrorf( + utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "streaming processing cancelled before output", filePath, nil), + "Streaming processing cancelled before output: %s", filePath, + ) + return + case outCh <- WriteRequest{ Path: relPath, Content: "", // Empty since content is in Reader IsStream: true, Reader: reader, + }: } } -// createStreamReader creates a reader that combines header and file content. -func (p *FileProcessor) createStreamReader(filePath, relPath string) io.Reader { + +// createStreamReaderWithContext creates a reader that combines header and file content with context awareness. +func (p *FileProcessor) createStreamReaderWithContext(ctx context.Context, filePath, relPath string) io.Reader { + // Check context before opening file + select { + case <-ctx.Done(): + return nil + default: + } + file, err := os.Open(filePath) // #nosec G304 - filePath is validated by walker if err != nil { structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to open file for streaming").WithFilePath(filePath) diff --git a/fileproc/resource_monitor.go b/fileproc/resource_monitor.go new file mode 100644 index 0000000..e409aa9 --- /dev/null +++ b/fileproc/resource_monitor.go @@ -0,0 +1,423 @@ +// Package fileproc provides resource monitoring and limit enforcement for security. +package fileproc + +import ( + "context" + "runtime" + "sync" + "sync/atomic" + "time" + + "github.com/sirupsen/logrus" + + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/utils" +) + +// ResourceMonitor monitors resource usage and enforces limits to prevent DoS attacks. +type ResourceMonitor struct { + enabled bool + maxFiles int + maxTotalSize int64 + fileProcessingTimeout time.Duration + overallTimeout time.Duration + maxConcurrentReads int + rateLimitFilesPerSec int + hardMemoryLimitMB int + enableGracefulDegr bool + enableResourceMon bool + + // Current state tracking + filesProcessed int64 + totalSizeProcessed int64 + concurrentReads int64 + startTime time.Time + lastRateLimitCheck time.Time + hardMemoryLimitBytes int64 + + // Rate limiting + rateLimiter *time.Ticker + rateLimitChan chan struct{} + + // Synchronization + mu sync.RWMutex + violationLogged map[string]bool + degradationActive bool + emergencyStopRequested bool +} + +// ResourceMetrics holds comprehensive resource usage metrics. +type ResourceMetrics struct { + FilesProcessed int64 `json:"files_processed"` + TotalSizeProcessed int64 `json:"total_size_processed"` + ConcurrentReads int64 `json:"concurrent_reads"` + ProcessingDuration time.Duration `json:"processing_duration"` + AverageFileSize float64 `json:"average_file_size"` + ProcessingRate float64 `json:"processing_rate_files_per_sec"` + MemoryUsageMB int64 `json:"memory_usage_mb"` + MaxMemoryUsageMB int64 `json:"max_memory_usage_mb"` + ViolationsDetected []string `json:"violations_detected"` + DegradationActive bool `json:"degradation_active"` + EmergencyStopActive bool `json:"emergency_stop_active"` + LastUpdated time.Time `json:"last_updated"` +} + +// ResourceViolation represents a detected resource limit violation. +type ResourceViolation struct { + Type string `json:"type"` + Message string `json:"message"` + Current interface{} `json:"current"` + Limit interface{} `json:"limit"` + Timestamp time.Time `json:"timestamp"` + Context map[string]interface{} `json:"context"` +} + +// NewResourceMonitor creates a new resource monitor with configuration. +func NewResourceMonitor() *ResourceMonitor { + rm := &ResourceMonitor{ + enabled: config.GetResourceLimitsEnabled(), + maxFiles: config.GetMaxFiles(), + maxTotalSize: config.GetMaxTotalSize(), + fileProcessingTimeout: time.Duration(config.GetFileProcessingTimeoutSec()) * time.Second, + overallTimeout: time.Duration(config.GetOverallTimeoutSec()) * time.Second, + maxConcurrentReads: config.GetMaxConcurrentReads(), + rateLimitFilesPerSec: config.GetRateLimitFilesPerSec(), + hardMemoryLimitMB: config.GetHardMemoryLimitMB(), + enableGracefulDegr: config.GetEnableGracefulDegradation(), + enableResourceMon: config.GetEnableResourceMonitoring(), + startTime: time.Now(), + lastRateLimitCheck: time.Now(), + violationLogged: make(map[string]bool), + hardMemoryLimitBytes: int64(config.GetHardMemoryLimitMB()) * 1024 * 1024, + } + + // Initialize rate limiter if rate limiting is enabled + if rm.enabled && rm.rateLimitFilesPerSec > 0 { + interval := time.Second / time.Duration(rm.rateLimitFilesPerSec) + rm.rateLimiter = time.NewTicker(interval) + rm.rateLimitChan = make(chan struct{}, rm.rateLimitFilesPerSec) + + // Pre-fill the rate limit channel + for i := 0; i < rm.rateLimitFilesPerSec; i++ { + select { + case rm.rateLimitChan <- struct{}{}: + default: + goto rateLimitFull + } + } + rateLimitFull: + + // Start rate limiter refill goroutine + go rm.rateLimiterRefill() + } + + return rm +} + +// ValidateFileProcessing checks if a file can be processed based on resource limits. +func (rm *ResourceMonitor) ValidateFileProcessing(filePath string, fileSize int64) error { + if !rm.enabled { + return nil + } + + rm.mu.RLock() + defer rm.mu.RUnlock() + + // Check if emergency stop is active + if rm.emergencyStopRequested { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitMemory, + "processing stopped due to emergency memory condition", + filePath, + map[string]interface{}{ + "emergency_stop_active": true, + }, + ) + } + + // Check file count limit + currentFiles := atomic.LoadInt64(&rm.filesProcessed) + if int(currentFiles) >= rm.maxFiles { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitFiles, + "maximum file count limit exceeded", + filePath, + map[string]interface{}{ + "current_files": currentFiles, + "max_files": rm.maxFiles, + }, + ) + } + + // Check total size limit + currentTotalSize := atomic.LoadInt64(&rm.totalSizeProcessed) + if currentTotalSize+fileSize > rm.maxTotalSize { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitTotalSize, + "maximum total size limit would be exceeded", + filePath, + map[string]interface{}{ + "current_total_size": currentTotalSize, + "file_size": fileSize, + "max_total_size": rm.maxTotalSize, + }, + ) + } + + // Check overall timeout + if time.Since(rm.startTime) > rm.overallTimeout { + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitTimeout, + "overall processing timeout exceeded", + filePath, + map[string]interface{}{ + "processing_duration": time.Since(rm.startTime), + "overall_timeout": rm.overallTimeout, + }, + ) + } + + return nil +} + +// AcquireReadSlot attempts to acquire a slot for concurrent file reading. +func (rm *ResourceMonitor) AcquireReadSlot(ctx context.Context) error { + if !rm.enabled { + return nil + } + + // Wait for available read slot + for { + currentReads := atomic.LoadInt64(&rm.concurrentReads) + if currentReads < int64(rm.maxConcurrentReads) { + if atomic.CompareAndSwapInt64(&rm.concurrentReads, currentReads, currentReads+1) { + break + } + // CAS failed, retry + continue + } + + // Wait and retry + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(time.Millisecond): + // Continue loop + } + } + + return nil +} + +// ReleaseReadSlot releases a concurrent reading slot. +func (rm *ResourceMonitor) ReleaseReadSlot() { + if rm.enabled { + atomic.AddInt64(&rm.concurrentReads, -1) + } +} + +// WaitForRateLimit waits for rate limiting if enabled. +func (rm *ResourceMonitor) WaitForRateLimit(ctx context.Context) error { + if !rm.enabled || rm.rateLimitFilesPerSec <= 0 { + return nil + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-rm.rateLimitChan: + return nil + case <-time.After(time.Second): // Fallback timeout + logrus.Warn("Rate limiting timeout exceeded, continuing without rate limit") + return nil + } +} + +// CheckHardMemoryLimit checks if hard memory limit is exceeded and takes action. +func (rm *ResourceMonitor) CheckHardMemoryLimit() error { + if !rm.enabled || rm.hardMemoryLimitMB <= 0 { + return nil + } + + var m runtime.MemStats + runtime.ReadMemStats(&m) + currentMemory := int64(m.Alloc) + + if currentMemory > rm.hardMemoryLimitBytes { + rm.mu.Lock() + defer rm.mu.Unlock() + + // Log violation if not already logged + violationKey := "hard_memory_limit" + if !rm.violationLogged[violationKey] { + logrus.Errorf("Hard memory limit exceeded: %dMB > %dMB", + currentMemory/1024/1024, rm.hardMemoryLimitMB) + rm.violationLogged[violationKey] = true + } + + if rm.enableGracefulDegr { + // Force garbage collection + runtime.GC() + + // Check again after GC + runtime.ReadMemStats(&m) + currentMemory = int64(m.Alloc) + + if currentMemory > rm.hardMemoryLimitBytes { + // Still over limit, activate emergency stop + rm.emergencyStopRequested = true + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitMemory, + "hard memory limit exceeded, emergency stop activated", + "", + map[string]interface{}{ + "current_memory_mb": currentMemory / 1024 / 1024, + "limit_mb": rm.hardMemoryLimitMB, + "emergency_stop": true, + }, + ) + } else { + // Memory freed by GC, continue with degradation + rm.degradationActive = true + logrus.Info("Memory freed by garbage collection, continuing with degradation mode") + } + } else { + // No graceful degradation, hard stop + return utils.NewStructuredError( + utils.ErrorTypeValidation, + utils.CodeResourceLimitMemory, + "hard memory limit exceeded", + "", + map[string]interface{}{ + "current_memory_mb": currentMemory / 1024 / 1024, + "limit_mb": rm.hardMemoryLimitMB, + }, + ) + } + } + + return nil +} + +// RecordFileProcessed records that a file has been successfully processed. +func (rm *ResourceMonitor) RecordFileProcessed(fileSize int64) { + if rm.enabled { + atomic.AddInt64(&rm.filesProcessed, 1) + atomic.AddInt64(&rm.totalSizeProcessed, fileSize) + } +} + +// GetMetrics returns current resource usage metrics. +func (rm *ResourceMonitor) GetMetrics() ResourceMetrics { + if !rm.enableResourceMon { + return ResourceMetrics{} + } + + rm.mu.RLock() + defer rm.mu.RUnlock() + + var m runtime.MemStats + runtime.ReadMemStats(&m) + + filesProcessed := atomic.LoadInt64(&rm.filesProcessed) + totalSize := atomic.LoadInt64(&rm.totalSizeProcessed) + duration := time.Since(rm.startTime) + + avgFileSize := float64(0) + if filesProcessed > 0 { + avgFileSize = float64(totalSize) / float64(filesProcessed) + } + + processingRate := float64(0) + if duration.Seconds() > 0 { + processingRate = float64(filesProcessed) / duration.Seconds() + } + + // Collect violations + violations := make([]string, 0, len(rm.violationLogged)) + for violation := range rm.violationLogged { + violations = append(violations, violation) + } + + return ResourceMetrics{ + FilesProcessed: filesProcessed, + TotalSizeProcessed: totalSize, + ConcurrentReads: atomic.LoadInt64(&rm.concurrentReads), + ProcessingDuration: duration, + AverageFileSize: avgFileSize, + ProcessingRate: processingRate, + MemoryUsageMB: int64(m.Alloc) / 1024 / 1024, + MaxMemoryUsageMB: int64(rm.hardMemoryLimitMB), + ViolationsDetected: violations, + DegradationActive: rm.degradationActive, + EmergencyStopActive: rm.emergencyStopRequested, + LastUpdated: time.Now(), + } +} + +// IsEmergencyStopActive returns whether emergency stop is active. +func (rm *ResourceMonitor) IsEmergencyStopActive() bool { + rm.mu.RLock() + defer rm.mu.RUnlock() + return rm.emergencyStopRequested +} + +// IsDegradationActive returns whether degradation mode is active. +func (rm *ResourceMonitor) IsDegradationActive() bool { + rm.mu.RLock() + defer rm.mu.RUnlock() + return rm.degradationActive +} + +// LogResourceInfo logs current resource limit configuration. +func (rm *ResourceMonitor) LogResourceInfo() { + if rm.enabled { + logrus.Infof("Resource limits enabled: maxFiles=%d, maxTotalSize=%dMB, fileTimeout=%ds, overallTimeout=%ds", + rm.maxFiles, rm.maxTotalSize/1024/1024, int(rm.fileProcessingTimeout.Seconds()), int(rm.overallTimeout.Seconds())) + logrus.Infof("Resource limits: maxConcurrentReads=%d, rateLimitFPS=%d, hardMemoryMB=%d", + rm.maxConcurrentReads, rm.rateLimitFilesPerSec, rm.hardMemoryLimitMB) + logrus.Infof("Resource features: gracefulDegradation=%v, monitoring=%v", + rm.enableGracefulDegr, rm.enableResourceMon) + } else { + logrus.Info("Resource limits disabled") + } +} + +// Close cleans up the resource monitor. +func (rm *ResourceMonitor) Close() { + if rm.rateLimiter != nil { + rm.rateLimiter.Stop() + } +} + +// rateLimiterRefill refills the rate limiting channel periodically. +func (rm *ResourceMonitor) rateLimiterRefill() { + for range rm.rateLimiter.C { + select { + case rm.rateLimitChan <- struct{}{}: + default: + // Channel is full, skip + } + } +} + +// CreateFileProcessingContext creates a context with file processing timeout. +func (rm *ResourceMonitor) CreateFileProcessingContext(parent context.Context) (context.Context, context.CancelFunc) { + if !rm.enabled || rm.fileProcessingTimeout <= 0 { + return parent, func() {} + } + return context.WithTimeout(parent, rm.fileProcessingTimeout) +} + +// CreateOverallProcessingContext creates a context with overall processing timeout. +func (rm *ResourceMonitor) CreateOverallProcessingContext(parent context.Context) (context.Context, context.CancelFunc) { + if !rm.enabled || rm.overallTimeout <= 0 { + return parent, func() {} + } + return context.WithTimeout(parent, rm.overallTimeout) +} diff --git a/fileproc/resource_monitor_test.go b/fileproc/resource_monitor_test.go new file mode 100644 index 0000000..14348ec --- /dev/null +++ b/fileproc/resource_monitor_test.go @@ -0,0 +1,377 @@ +// Package fileproc provides tests for resource monitoring functionality. +package fileproc + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/spf13/viper" + + "github.com/ivuorinen/gibidify/config" + "github.com/ivuorinen/gibidify/testutil" + "github.com/ivuorinen/gibidify/utils" +) + +func TestResourceMonitor_NewResourceMonitor(t *testing.T) { + // Reset viper for clean test state + testutil.ResetViperConfig(t, "") + + rm := NewResourceMonitor() + if rm == nil { + t.Fatal("NewResourceMonitor() returned nil") + } + + // Test default values are set correctly + if !rm.enabled { + t.Error("Expected resource monitor to be enabled by default") + } + + if rm.maxFiles != config.DefaultMaxFiles { + t.Errorf("Expected maxFiles to be %d, got %d", config.DefaultMaxFiles, rm.maxFiles) + } + + if rm.maxTotalSize != config.DefaultMaxTotalSize { + t.Errorf("Expected maxTotalSize to be %d, got %d", config.DefaultMaxTotalSize, rm.maxTotalSize) + } + + if rm.fileProcessingTimeout != time.Duration(config.DefaultFileProcessingTimeoutSec)*time.Second { + t.Errorf("Expected fileProcessingTimeout to be %v, got %v", + time.Duration(config.DefaultFileProcessingTimeoutSec)*time.Second, rm.fileProcessingTimeout) + } + + // Clean up + rm.Close() +} + +func TestResourceMonitor_DisabledResourceLimits(t *testing.T) { + // Reset viper for clean test state + testutil.ResetViperConfig(t, "") + + // Set resource limits disabled + viper.Set("resourceLimits.enabled", false) + + rm := NewResourceMonitor() + defer rm.Close() + + // Test that validation passes when disabled + err := rm.ValidateFileProcessing("/tmp/test.txt", 1000) + if err != nil { + t.Errorf("Expected no error when resource limits disabled, got %v", err) + } + + // Test that read slot acquisition works when disabled + ctx := context.Background() + err = rm.AcquireReadSlot(ctx) + if err != nil { + t.Errorf("Expected no error when acquiring read slot with disabled limits, got %v", err) + } + rm.ReleaseReadSlot() + + // Test that rate limiting is bypassed when disabled + err = rm.WaitForRateLimit(ctx) + if err != nil { + t.Errorf("Expected no error when rate limiting disabled, got %v", err) + } +} + +func TestResourceMonitor_FileCountLimit(t *testing.T) { + testutil.ResetViperConfig(t, "") + + // Set a very low file count limit for testing + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.maxFiles", 2) + + rm := NewResourceMonitor() + defer rm.Close() + + // First file should pass + err := rm.ValidateFileProcessing("/tmp/file1.txt", 100) + if err != nil { + t.Errorf("Expected no error for first file, got %v", err) + } + rm.RecordFileProcessed(100) + + // Second file should pass + err = rm.ValidateFileProcessing("/tmp/file2.txt", 100) + if err != nil { + t.Errorf("Expected no error for second file, got %v", err) + } + rm.RecordFileProcessed(100) + + // Third file should fail + err = rm.ValidateFileProcessing("/tmp/file3.txt", 100) + if err == nil { + t.Error("Expected error for third file (exceeds limit), got nil") + } + + // Verify it's the correct error type + structErr, ok := err.(*utils.StructuredError) + if !ok { + t.Errorf("Expected StructuredError, got %T", err) + } else if structErr.Code != utils.CodeResourceLimitFiles { + t.Errorf("Expected error code %s, got %s", utils.CodeResourceLimitFiles, structErr.Code) + } +} + +func TestResourceMonitor_TotalSizeLimit(t *testing.T) { + testutil.ResetViperConfig(t, "") + + // Set a low total size limit for testing (1KB) + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.maxTotalSize", 1024) + + rm := NewResourceMonitor() + defer rm.Close() + + // First small file should pass + err := rm.ValidateFileProcessing("/tmp/small.txt", 500) + if err != nil { + t.Errorf("Expected no error for small file, got %v", err) + } + rm.RecordFileProcessed(500) + + // Second small file should pass + err = rm.ValidateFileProcessing("/tmp/small2.txt", 400) + if err != nil { + t.Errorf("Expected no error for second small file, got %v", err) + } + rm.RecordFileProcessed(400) + + // Large file that would exceed limit should fail + err = rm.ValidateFileProcessing("/tmp/large.txt", 200) + if err == nil { + t.Error("Expected error for file that would exceed size limit, got nil") + } + + // Verify it's the correct error type + structErr, ok := err.(*utils.StructuredError) + if !ok { + t.Errorf("Expected StructuredError, got %T", err) + } else if structErr.Code != utils.CodeResourceLimitTotalSize { + t.Errorf("Expected error code %s, got %s", utils.CodeResourceLimitTotalSize, structErr.Code) + } +} + +func TestResourceMonitor_ConcurrentReadsLimit(t *testing.T) { + testutil.ResetViperConfig(t, "") + + // Set a low concurrent reads limit for testing + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.maxConcurrentReads", 2) + + rm := NewResourceMonitor() + defer rm.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + // First read slot should succeed + err := rm.AcquireReadSlot(ctx) + if err != nil { + t.Errorf("Expected no error for first read slot, got %v", err) + } + + // Second read slot should succeed + err = rm.AcquireReadSlot(ctx) + if err != nil { + t.Errorf("Expected no error for second read slot, got %v", err) + } + + // Third read slot should timeout (context deadline exceeded) + err = rm.AcquireReadSlot(ctx) + if err == nil { + t.Error("Expected timeout error for third read slot, got nil") + } + + // Release one slot and try again + rm.ReleaseReadSlot() + + // Create new context for the next attempt + ctx2, cancel2 := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel2() + + err = rm.AcquireReadSlot(ctx2) + if err != nil { + t.Errorf("Expected no error after releasing a slot, got %v", err) + } + + // Clean up remaining slots + rm.ReleaseReadSlot() + rm.ReleaseReadSlot() +} + +func TestResourceMonitor_TimeoutContexts(t *testing.T) { + testutil.ResetViperConfig(t, "") + + // Set short timeouts for testing + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.fileProcessingTimeoutSec", 1) // 1 second + viper.Set("resourceLimits.overallTimeoutSec", 2) // 2 seconds + + rm := NewResourceMonitor() + defer rm.Close() + + parentCtx := context.Background() + + // Test file processing context + fileCtx, fileCancel := rm.CreateFileProcessingContext(parentCtx) + defer fileCancel() + + deadline, ok := fileCtx.Deadline() + if !ok { + t.Error("Expected file processing context to have a deadline") + } else if time.Until(deadline) > time.Second+100*time.Millisecond { + t.Error("File processing timeout appears to be too long") + } + + // Test overall processing context + overallCtx, overallCancel := rm.CreateOverallProcessingContext(parentCtx) + defer overallCancel() + + deadline, ok = overallCtx.Deadline() + if !ok { + t.Error("Expected overall processing context to have a deadline") + } else if time.Until(deadline) > 2*time.Second+100*time.Millisecond { + t.Error("Overall processing timeout appears to be too long") + } +} + +func TestResourceMonitor_RateLimiting(t *testing.T) { + testutil.ResetViperConfig(t, "") + + // Enable rate limiting with a low rate for testing + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.rateLimitFilesPerSec", 5) // 5 files per second + + rm := NewResourceMonitor() + defer rm.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + // First few requests should succeed quickly + start := time.Now() + for i := 0; i < 3; i++ { + err := rm.WaitForRateLimit(ctx) + if err != nil { + t.Errorf("Expected no error for rate limit wait %d, got %v", i, err) + } + } + + // Should have taken some time due to rate limiting + duration := time.Since(start) + if duration < 200*time.Millisecond { + t.Logf("Rate limiting may not be working as expected, took only %v", duration) + } +} + +func TestResourceMonitor_Metrics(t *testing.T) { + testutil.ResetViperConfig(t, "") + + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.enableResourceMonitoring", true) + + rm := NewResourceMonitor() + defer rm.Close() + + // Process some files to generate metrics + rm.RecordFileProcessed(1000) + rm.RecordFileProcessed(2000) + rm.RecordFileProcessed(500) + + metrics := rm.GetMetrics() + + // Verify metrics + if metrics.FilesProcessed != 3 { + t.Errorf("Expected 3 files processed, got %d", metrics.FilesProcessed) + } + + if metrics.TotalSizeProcessed != 3500 { + t.Errorf("Expected total size 3500, got %d", metrics.TotalSizeProcessed) + } + + expectedAvgSize := float64(3500) / float64(3) + if metrics.AverageFileSize != expectedAvgSize { + t.Errorf("Expected average file size %.2f, got %.2f", expectedAvgSize, metrics.AverageFileSize) + } + + if metrics.ProcessingRate <= 0 { + t.Error("Expected positive processing rate") + } + + if !metrics.LastUpdated.After(time.Now().Add(-time.Second)) { + t.Error("Expected recent LastUpdated timestamp") + } +} + +func TestResourceMonitor_Integration(t *testing.T) { + // Create temporary test directory + tempDir := t.TempDir() + + // Create test files + testFiles := []string{"test1.txt", "test2.txt", "test3.txt"} + for _, filename := range testFiles { + testutil.CreateTestFile(t, tempDir, filename, []byte("test content")) + } + + testutil.ResetViperConfig(t, "") + + // Configure resource limits + viper.Set("resourceLimits.enabled", true) + viper.Set("resourceLimits.maxFiles", 5) + viper.Set("resourceLimits.maxTotalSize", 1024*1024) // 1MB + viper.Set("resourceLimits.fileProcessingTimeoutSec", 10) + viper.Set("resourceLimits.maxConcurrentReads", 3) + + rm := NewResourceMonitor() + defer rm.Close() + + ctx := context.Background() + + // Test file processing workflow + for _, filename := range testFiles { + filePath := filepath.Join(tempDir, filename) + fileInfo, err := os.Stat(filePath) + if err != nil { + t.Fatalf("Failed to stat test file %s: %v", filePath, err) + } + + // Validate file can be processed + err = rm.ValidateFileProcessing(filePath, fileInfo.Size()) + if err != nil { + t.Errorf("Failed to validate file %s: %v", filePath, err) + continue + } + + // Acquire read slot + err = rm.AcquireReadSlot(ctx) + if err != nil { + t.Errorf("Failed to acquire read slot for %s: %v", filePath, err) + continue + } + + // Check memory limits + err = rm.CheckHardMemoryLimit() + if err != nil { + t.Errorf("Memory limit check failed for %s: %v", filePath, err) + } + + // Record processing + rm.RecordFileProcessed(fileInfo.Size()) + + // Release read slot + rm.ReleaseReadSlot() + } + + // Verify final metrics + metrics := rm.GetMetrics() + if metrics.FilesProcessed != int64(len(testFiles)) { + t.Errorf("Expected %d files processed, got %d", len(testFiles), metrics.FilesProcessed) + } + + // Test resource limit logging + rm.LogResourceInfo() +} \ No newline at end of file diff --git a/fileproc/writer.go b/fileproc/writer.go index 8858b0e..303aaf2 100644 --- a/fileproc/writer.go +++ b/fileproc/writer.go @@ -18,11 +18,16 @@ func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- stru case "yaml": startYAMLWriter(outFile, writeCh, done, prefix, suffix) default: + context := map[string]interface{}{ + "format": format, + } err := utils.NewStructuredError( utils.ErrorTypeValidation, utils.CodeValidationFormat, fmt.Sprintf("unsupported format: %s", format), - ).WithContext("format", format) + "", + context, + ) utils.LogError("Failed to encode output", err) close(done) } diff --git a/scripts/help.txt b/scripts/help.txt new file mode 100644 index 0000000..1072c99 --- /dev/null +++ b/scripts/help.txt @@ -0,0 +1,25 @@ +Available targets: + install-tools - Install required linting and development tools + lint - Run all linters (Go, Makefile, shell, YAML) + lint-fix - Run linters with auto-fix enabled + lint-verbose - Run linters with verbose output + test - Run tests + coverage - Run tests with coverage + build - Build the application + clean - Clean build artifacts + all - Run lint, test, and build + +Security targets: + security - Run comprehensive security scan + security-full - Run full security analysis with all tools + vuln-check - Check for dependency vulnerabilities + +Benchmark targets: + build-benchmark - Build the benchmark binary + benchmark - Run all benchmarks + benchmark-collection - Run file collection benchmarks + benchmark-processing - Run file processing benchmarks + benchmark-concurrency - Run concurrency benchmarks + benchmark-format - Run format benchmarks + +Run 'make ' to execute a specific target. \ No newline at end of file diff --git a/scripts/lint.sh b/scripts/lint.sh new file mode 100755 index 0000000..0070163 --- /dev/null +++ b/scripts/lint.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +echo "Running golangci-lint..." +golangci-lint run ./... + +echo "Running checkmake..." +checkmake --config=.checkmake Makefile + +echo "Running shfmt check..." +shfmt -d . + +echo "Running yamllint..." +yamllint -c .yamllint . diff --git a/scripts/security-scan.sh b/scripts/security-scan.sh new file mode 100755 index 0000000..71c627d --- /dev/null +++ b/scripts/security-scan.sh @@ -0,0 +1,426 @@ +#!/bin/bash +set -euo pipefail + +# Security Scanning Script for gibidify +# This script runs comprehensive security checks locally and in CI + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +cd "$PROJECT_ROOT" + +echo "🔒 Starting comprehensive security scan for gibidify..." + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Function to print status +print_status() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +print_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +# Check if required tools are installed +check_dependencies() { + print_status "Checking security scanning dependencies..." + + local missing_tools=() + + if ! command -v go &>/dev/null; then + missing_tools+=("go") + fi + + if ! command -v golangci-lint &>/dev/null; then + print_warning "golangci-lint not found, installing..." + go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest + fi + + if ! command -v gosec &>/dev/null; then + print_warning "gosec not found, installing..." + go install github.com/securecodewarrior/gosec/v2/cmd/gosec@latest + fi + + if ! command -v govulncheck &>/dev/null; then + print_warning "govulncheck not found, installing..." + go install golang.org/x/vuln/cmd/govulncheck@latest + fi + + if ! command -v checkmake &>/dev/null; then + print_warning "checkmake not found, installing..." + go install github.com/mrtazz/checkmake/cmd/checkmake@latest + fi + + if ! command -v shfmt &>/dev/null; then + print_warning "shfmt not found, installing..." + go install mvdan.cc/sh/v3/cmd/shfmt@latest + fi + + if ! command -v yamllint &>/dev/null; then + print_warning "yamllint not found, installing..." + go install github.com/excilsploft/yamllint@latest + fi + + if [ ${#missing_tools[@]} -ne 0 ]; then + print_error "Missing required tools: ${missing_tools[*]}" + print_error "Please install the missing tools and try again." + exit 1 + fi + + print_success "All dependencies are available" +} + +# Run gosec security scanner +run_gosec() { + print_status "Running gosec security scanner..." + + if gosec -fmt=json -out=gosec-report.json -stdout -verbose=text ./...; then + print_success "gosec scan completed successfully" + else + print_error "gosec found security issues!" + if [ -f "gosec-report.json" ]; then + echo "Detailed report saved to gosec-report.json" + fi + return 1 + fi +} + +# Run vulnerability check +run_govulncheck() { + print_status "Running govulncheck for dependency vulnerabilities..." + + if govulncheck -json ./... >govulncheck-report.json 2>&1; then + print_success "No known vulnerabilities found in dependencies" + else + if grep -q '"finding"' govulncheck-report.json 2>/dev/null; then + print_error "Vulnerabilities found in dependencies!" + echo "Detailed report saved to govulncheck-report.json" + return 1 + else + print_success "No vulnerabilities found" + fi + fi +} + +# Run enhanced golangci-lint with security focus +run_security_lint() { + print_status "Running security-focused linting..." + + local security_linters="gosec,gocritic,bodyclose,rowserrcheck,misspell,unconvert,unparam,unused,errcheck,ineffassign,staticcheck" + + if golangci-lint run --enable="$security_linters" --timeout=5m; then + print_success "Security linting passed" + else + print_error "Security linting found issues!" + return 1 + fi +} + +# Check for potential secrets +check_secrets() { + print_status "Scanning for potential secrets and sensitive data..." + + local secrets_found=false + + # Common secret patterns + local patterns=( + "password\s*[:=]\s*['\"][^'\"]{3,}['\"]" + "secret\s*[:=]\s*['\"][^'\"]{3,}['\"]" + "key\s*[:=]\s*['\"][^'\"]{8,}['\"]" + "token\s*[:=]\s*['\"][^'\"]{8,}['\"]" + "api_?key\s*[:=]\s*['\"][^'\"]{8,}['\"]" + "aws_?access_?key" + "aws_?secret" + "AKIA[0-9A-Z]{16}" # AWS Access Key pattern + "github_?token" + "private_?key" + ) + + for pattern in "${patterns[@]}"; do + if grep -r -i -E "$pattern" --include="*.go" . 2>/dev/null; then + print_warning "Potential secret pattern found: $pattern" + secrets_found=true + fi + done + + # Check git history for secrets (last 10 commits) + if git log --oneline -10 | grep -i -E "(password|secret|key|token)" >/dev/null 2>&1; then + print_warning "Potential secrets mentioned in recent commit messages" + secrets_found=true + fi + + if [ "$secrets_found" = true ]; then + print_warning "Potential secrets detected. Please review manually." + return 1 + else + print_success "No obvious secrets detected" + fi +} + +# Check for hardcoded network addresses +check_hardcoded_addresses() { + print_status "Checking for hardcoded network addresses..." + + local addresses_found=false + + # Look for IP addresses (excluding common safe ones) + if grep -r -E "([0-9]{1,3}\.){3}[0-9]{1,3}" --include="*.go" . | + grep -v -E "(127\.0\.0\.1|0\.0\.0\.0|255\.255\.255\.255|localhost)" >/dev/null 2>&1; then + print_warning "Hardcoded IP addresses found:" + grep -r -E "([0-9]{1,3}\.){3}[0-9]{1,3}" --include="*.go" . | + grep -v -E "(127\.0\.0\.1|0\.0\.0\.0|255\.255\.255\.255|localhost)" || true + addresses_found=true + fi + + # Look for URLs (excluding documentation examples) + if grep -r -E "https?://[^/\s]+" --include="*.go" . | + grep -v -E "(example\.com|localhost|127\.0\.0\.1|\$\{)" >/dev/null 2>&1; then + print_warning "Hardcoded URLs found:" + grep -r -E "https?://[^/\s]+" --include="*.go" . | + grep -v -E "(example\.com|localhost|127\.0\.0\.1|\$\{)" || true + addresses_found=true + fi + + if [ "$addresses_found" = true ]; then + print_warning "Hardcoded network addresses detected. Please review." + return 1 + else + print_success "No hardcoded network addresses found" + fi +} + +# Check Docker security (if Dockerfile exists) +check_docker_security() { + if [ -f "Dockerfile" ]; then + print_status "Checking Docker security..." + + # Basic Dockerfile security checks + local docker_issues=false + + if grep -q "^USER root" Dockerfile; then + print_warning "Dockerfile runs as root user" + docker_issues=true + fi + + if ! grep -q "^USER " Dockerfile; then + print_warning "Dockerfile doesn't specify a non-root user" + docker_issues=true + fi + + if grep -q "RUN.*wget\|RUN.*curl" Dockerfile && ! grep -q "rm.*wget\|rm.*curl" Dockerfile; then + print_warning "Dockerfile may leave curl/wget installed" + docker_issues=true + fi + + if [ "$docker_issues" = true ]; then + print_warning "Docker security issues detected" + return 1 + else + print_success "Docker security check passed" + fi + else + print_status "No Dockerfile found, skipping Docker security check" + fi +} + +# Check file permissions +check_file_permissions() { + print_status "Checking file permissions..." + + local perm_issues=false + + # Check for overly permissive files + if find . -type f -perm /o+w -not -path "./.git/*" | grep -q .; then + print_warning "World-writable files found:" + find . -type f -perm /o+w -not -path "./.git/*" || true + perm_issues=true + fi + + # Check for executable files that shouldn't be + if find . -type f -name "*.go" -perm /a+x | grep -q .; then + print_warning "Executable Go files found (should not be executable):" + find . -type f -name "*.go" -perm /a+x || true + perm_issues=true + fi + + if [ "$perm_issues" = true ]; then + print_warning "File permission issues detected" + return 1 + else + print_success "File permissions check passed" + fi +} + +# Check Makefile with checkmake +check_makefile() { + if [ -f "Makefile" ]; then + print_status "Checking Makefile with checkmake..." + + if checkmake --config=.checkmake Makefile; then + print_success "Makefile check passed" + else + print_error "Makefile issues detected!" + return 1 + fi + else + print_status "No Makefile found, skipping checkmake" + fi +} + +# Check shell scripts with shfmt +check_shell_scripts() { + print_status "Checking shell script formatting..." + + if find . -name "*.sh" -type f | head -1 | grep -q .; then + if shfmt -d .; then + print_success "Shell script formatting check passed" + else + print_error "Shell script formatting issues detected!" + return 1 + fi + else + print_status "No shell scripts found, skipping shfmt check" + fi +} + +# Check YAML files +check_yaml_files() { + print_status "Checking YAML files..." + + if find . -name "*.yml" -o -name "*.yaml" -type f | head -1 | grep -q .; then + if yamllint -c .yamllint .; then + print_success "YAML files check passed" + else + print_error "YAML file issues detected!" + return 1 + fi + else + print_status "No YAML files found, skipping yamllint check" + fi +} + +# Generate security report +generate_report() { + print_status "Generating security scan report..." + + local report_file="security-report.md" + + cat >"$report_file" < [--destination ] [--format=json|yaml|markdown]") + return NewStructuredError(ErrorTypeCLI, CodeCLIMissingSource, "usage: gibidify -source [--destination ] [--format=json|yaml|markdown]", "", nil) } // NewFileSystemError creates a file system error. func NewFileSystemError(code, message string) *StructuredError { - return NewStructuredError(ErrorTypeFileSystem, code, message) + return NewStructuredError(ErrorTypeFileSystem, code, message, "", nil) } // NewProcessingError creates a processing error. func NewProcessingError(code, message string) *StructuredError { - return NewStructuredError(ErrorTypeProcessing, code, message) + return NewStructuredError(ErrorTypeProcessing, code, message, "", nil) } // NewIOError creates an IO error. func NewIOError(code, message string) *StructuredError { - return NewStructuredError(ErrorTypeIO, code, message) + return NewStructuredError(ErrorTypeIO, code, message, "", nil) } // NewValidationError creates a validation error. func NewValidationError(code, message string) *StructuredError { - return NewStructuredError(ErrorTypeValidation, code, message) + return NewStructuredError(ErrorTypeValidation, code, message, "", nil) } // LogError logs an error with a consistent format if the error is not nil. diff --git a/utils/paths.go b/utils/paths.go index 845d0ca..6aa8ca1 100644 --- a/utils/paths.go +++ b/utils/paths.go @@ -3,7 +3,9 @@ package utils import ( "fmt" + "os" "path/filepath" + "strings" ) // GetAbsolutePath returns the absolute path for the given path. @@ -24,3 +26,142 @@ func GetBaseName(absPath string) string { } return baseName } + +// ValidateSourcePath validates a source directory path for security. +// It ensures the path exists, is a directory, and doesn't contain path traversal attempts. +func ValidateSourcePath(path string) error { + if path == "" { + return NewStructuredError(ErrorTypeValidation, CodeValidationRequired, "source path is required", "", nil) + } + + // Check for path traversal patterns before cleaning + if strings.Contains(path, "..") { + return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "path traversal attempt detected in source path", path, map[string]interface{}{ + "original_path": path, + }) + } + + // Clean and get absolute path + cleaned := filepath.Clean(path) + abs, err := filepath.Abs(cleaned) + if err != nil { + return NewStructuredError(ErrorTypeFileSystem, CodeFSPathResolution, "cannot resolve source path", path, map[string]interface{}{ + "error": err.Error(), + }) + } + + // Get current working directory to ensure we're not escaping it for relative paths + if !filepath.IsAbs(path) { + cwd, err := os.Getwd() + if err != nil { + return NewStructuredError(ErrorTypeFileSystem, CodeFSPathResolution, "cannot get current working directory", path, map[string]interface{}{ + "error": err.Error(), + }) + } + + // Ensure the resolved path is within or below the current working directory + cwdAbs, err := filepath.Abs(cwd) + if err != nil { + return NewStructuredError(ErrorTypeFileSystem, CodeFSPathResolution, "cannot resolve current working directory", path, map[string]interface{}{ + "error": err.Error(), + }) + } + + // Check if the absolute path tries to escape the current working directory + if !strings.HasPrefix(abs, cwdAbs) { + return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "source path attempts to access directories outside current working directory", path, map[string]interface{}{ + "resolved_path": abs, + "working_dir": cwdAbs, + }) + } + } + + // Check if path exists and is a directory + info, err := os.Stat(cleaned) + if err != nil { + if os.IsNotExist(err) { + return NewStructuredError(ErrorTypeFileSystem, CodeFSNotFound, "source directory does not exist", path, nil) + } + return NewStructuredError(ErrorTypeFileSystem, CodeFSAccess, "cannot access source directory", path, map[string]interface{}{ + "error": err.Error(), + }) + } + + if !info.IsDir() { + return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "source path must be a directory", path, map[string]interface{}{ + "is_file": true, + }) + } + + return nil +} + +// ValidateDestinationPath validates a destination file path for security. +// It ensures the path doesn't contain path traversal attempts and the parent directory exists. +func ValidateDestinationPath(path string) error { + if path == "" { + return NewStructuredError(ErrorTypeValidation, CodeValidationRequired, "destination path is required", "", nil) + } + + // Check for path traversal patterns before cleaning + if strings.Contains(path, "..") { + return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "path traversal attempt detected in destination path", path, map[string]interface{}{ + "original_path": path, + }) + } + + // Clean and validate the path + cleaned := filepath.Clean(path) + + // Get absolute path to ensure it's not trying to escape current working directory + abs, err := filepath.Abs(cleaned) + if err != nil { + return NewStructuredError(ErrorTypeFileSystem, CodeFSPathResolution, "cannot resolve destination path", path, map[string]interface{}{ + "error": err.Error(), + }) + } + + // Ensure the destination is not a directory + if info, err := os.Stat(abs); err == nil && info.IsDir() { + return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "destination cannot be a directory", path, map[string]interface{}{ + "is_directory": true, + }) + } + + // Check if parent directory exists and is writable + parentDir := filepath.Dir(abs) + if parentInfo, err := os.Stat(parentDir); err != nil { + if os.IsNotExist(err) { + return NewStructuredError(ErrorTypeFileSystem, CodeFSNotFound, "destination parent directory does not exist", path, map[string]interface{}{ + "parent_dir": parentDir, + }) + } + return NewStructuredError(ErrorTypeFileSystem, CodeFSAccess, "cannot access destination parent directory", path, map[string]interface{}{ + "parent_dir": parentDir, + "error": err.Error(), + }) + } else if !parentInfo.IsDir() { + return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "destination parent is not a directory", path, map[string]interface{}{ + "parent_dir": parentDir, + }) + } + + return nil +} + +// ValidateConfigPath validates a configuration file path for security. +// It ensures the path doesn't contain path traversal attempts. +func ValidateConfigPath(path string) error { + if path == "" { + return nil // Empty path is allowed for config + } + + // Check for path traversal patterns before cleaning + if strings.Contains(path, "..") { + return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "path traversal attempt detected in config path", path, map[string]interface{}{ + "original_path": path, + }) + } + + return nil +}