// Package fileproc provides functions for processing files. package fileproc import ( "context" "fmt" "io" "os" "path/filepath" "strings" "time" "github.com/sirupsen/logrus" "github.com/ivuorinen/gibidify/config" "github.com/ivuorinen/gibidify/utils" ) const ( // StreamChunkSize is the size of chunks when streaming large files (64KB). StreamChunkSize = 65536 // StreamThreshold is the file size above which we use streaming (1MB). StreamThreshold = 1048576 // MaxMemoryBuffer is the maximum memory to use for buffering content (10MB). MaxMemoryBuffer = 10485760 ) // WriteRequest represents the content to be written. type WriteRequest struct { Path string Content string IsStream bool Reader io.Reader } // FileProcessor handles file processing operations. type FileProcessor struct { rootPath string sizeLimit int64 resourceMonitor *ResourceMonitor } // NewFileProcessor creates a new file processor. func NewFileProcessor(rootPath string) *FileProcessor { return &FileProcessor{ rootPath: rootPath, sizeLimit: config.GetFileSizeLimit(), resourceMonitor: NewResourceMonitor(), } } // NewFileProcessorWithMonitor creates a new file processor with a shared resource monitor. func NewFileProcessorWithMonitor(rootPath string, monitor *ResourceMonitor) *FileProcessor { return &FileProcessor{ rootPath: rootPath, sizeLimit: config.GetFileSizeLimit(), resourceMonitor: monitor, } } // ProcessFile reads the file at filePath and sends a formatted output to outCh. // It automatically chooses between loading the entire file or streaming based on file size. func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) { processor := NewFileProcessor(rootPath) ctx := context.Background() processor.ProcessWithContext(ctx, filePath, outCh) } // ProcessFileWithMonitor processes a file using a shared resource monitor. func ProcessFileWithMonitor(ctx context.Context, filePath string, outCh chan<- WriteRequest, rootPath string, monitor *ResourceMonitor) { processor := NewFileProcessorWithMonitor(rootPath, monitor) processor.ProcessWithContext(ctx, filePath, outCh) } // Process handles file processing with the configured settings. func (p *FileProcessor) Process(filePath string, outCh chan<- WriteRequest) { ctx := context.Background() p.ProcessWithContext(ctx, filePath, outCh) } // ProcessWithContext handles file processing with context and resource monitoring. func (p *FileProcessor) ProcessWithContext(ctx context.Context, filePath string, outCh chan<- WriteRequest) { // Create file processing context with timeout fileCtx, fileCancel := p.resourceMonitor.CreateFileProcessingContext(ctx) defer fileCancel() // Wait for rate limiting if err := p.resourceMonitor.WaitForRateLimit(fileCtx); err != nil { if err == context.DeadlineExceeded { utils.LogErrorf( utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing timeout during rate limiting", filePath, nil), "File processing timeout during rate limiting: %s", filePath, ) } return } // Validate file and check resource limits fileInfo, err := p.validateFileWithLimits(fileCtx, filePath) if err != nil { return // Error already logged } // Acquire read slot for concurrent processing if err := p.resourceMonitor.AcquireReadSlot(fileCtx); err != nil { if err == context.DeadlineExceeded { utils.LogErrorf( utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing timeout waiting for read slot", filePath, nil), "File processing timeout waiting for read slot: %s", filePath, ) } return } defer p.resourceMonitor.ReleaseReadSlot() // Check hard memory limits before processing if err := p.resourceMonitor.CheckHardMemoryLimit(); err != nil { utils.LogErrorf(err, "Hard memory limit check failed for file: %s", filePath) return } // Get relative path relPath := p.getRelativePath(filePath) // Process file with timeout processStart := time.Now() defer func() { // Record successful processing p.resourceMonitor.RecordFileProcessed(fileInfo.Size()) logrus.Debugf("File processed in %v: %s", time.Since(processStart), filePath) }() // Choose processing strategy based on file size if fileInfo.Size() <= StreamThreshold { p.processInMemoryWithContext(fileCtx, filePath, relPath, outCh) } else { p.processStreamingWithContext(fileCtx, filePath, relPath, outCh) } } // validateFileWithLimits checks if the file can be processed with resource limits. func (p *FileProcessor) validateFileWithLimits(ctx context.Context, filePath string) (os.FileInfo, error) { // Check context cancellation select { case <-ctx.Done(): return nil, ctx.Err() default: } fileInfo, err := os.Stat(filePath) if err != nil { structErr := utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to stat file").WithFilePath(filePath) utils.LogErrorf(structErr, "Failed to stat file %s", filePath) return nil, err } // Check traditional size limit if fileInfo.Size() > p.sizeLimit { context := map[string]interface{}{ "file_size": fileInfo.Size(), "size_limit": p.sizeLimit, } utils.LogErrorf( utils.NewStructuredError( utils.ErrorTypeValidation, utils.CodeValidationSize, fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", fileInfo.Size(), p.sizeLimit), filePath, context, ), "Skipping large file %s", filePath, ) return nil, fmt.Errorf("file too large") } // Check resource limits if err := p.resourceMonitor.ValidateFileProcessing(filePath, fileInfo.Size()); err != nil { utils.LogErrorf(err, "Resource limit validation failed for file: %s", filePath) return nil, err } return fileInfo, nil } // getRelativePath computes the path relative to rootPath. func (p *FileProcessor) getRelativePath(filePath string) string { relPath, err := filepath.Rel(p.rootPath, filePath) if err != nil { return filePath // Fallback } return relPath } // processInMemoryWithContext loads the entire file into memory with context awareness. func (p *FileProcessor) processInMemoryWithContext(ctx context.Context, filePath, relPath string, outCh chan<- WriteRequest) { // Check context before reading select { case <-ctx.Done(): utils.LogErrorf( utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled", filePath, nil), "File processing cancelled: %s", filePath, ) return default: } content, err := os.ReadFile(filePath) // #nosec G304 - filePath is validated by walker if err != nil { structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to read file").WithFilePath(filePath) utils.LogErrorf(structErr, "Failed to read file %s", filePath) return } // Check context again after reading select { case <-ctx.Done(): utils.LogErrorf( utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled after read", filePath, nil), "File processing cancelled after read: %s", filePath, ) return default: } // Try to send the result, but respect context cancellation select { case <-ctx.Done(): utils.LogErrorf( utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled before output", filePath, nil), "File processing cancelled before output: %s", filePath, ) return case outCh <- WriteRequest{ Path: relPath, Content: p.formatContent(relPath, string(content)), IsStream: false, }: } } // processStreamingWithContext creates a streaming reader for large files with context awareness. func (p *FileProcessor) processStreamingWithContext(ctx context.Context, filePath, relPath string, outCh chan<- WriteRequest) { // Check context before creating reader select { case <-ctx.Done(): utils.LogErrorf( utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "streaming processing cancelled", filePath, nil), "Streaming processing cancelled: %s", filePath, ) return default: } reader := p.createStreamReaderWithContext(ctx, filePath, relPath) if reader == nil { return // Error already logged } // Try to send the result, but respect context cancellation select { case <-ctx.Done(): utils.LogErrorf( utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "streaming processing cancelled before output", filePath, nil), "Streaming processing cancelled before output: %s", filePath, ) return case outCh <- WriteRequest{ Path: relPath, Content: "", // Empty since content is in Reader IsStream: true, Reader: reader, }: } } // createStreamReaderWithContext creates a reader that combines header and file content with context awareness. func (p *FileProcessor) createStreamReaderWithContext(ctx context.Context, filePath, relPath string) io.Reader { // Check context before opening file select { case <-ctx.Done(): return nil default: } file, err := os.Open(filePath) // #nosec G304 - filePath is validated by walker if err != nil { structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to open file for streaming").WithFilePath(filePath) utils.LogErrorf(structErr, "Failed to open file for streaming %s", filePath) return nil } // Note: file will be closed by the writer header := p.formatHeader(relPath) return io.MultiReader(header, file) } // formatContent formats the file content with header. func (p *FileProcessor) formatContent(relPath, content string) string { return fmt.Sprintf("\n---\n%s\n%s\n", relPath, content) } // formatHeader creates a reader for the file header. func (p *FileProcessor) formatHeader(relPath string) io.Reader { return strings.NewReader(fmt.Sprintf("\n---\n%s\n", relPath)) }