32 Commits

Author SHA1 Message Date
db63505fa7 fix(ci): update security.yml 2025-08-13 10:16:24 +03:00
renovate[bot]
87d0a78d38 fix(github-action): update ivuorinen/actions (25.8.4 → 25.8.11) (#44)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-08-13 04:12:56 +00:00
renovate[bot]
dd84267f37 feat(github-action): update ivuorinen/actions (25.7.28 → 25.8.4) (#40)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-08-05 02:36:26 +00:00
eef3ab3761 chore: tweaks, simplification, tests 2025-07-30 19:01:59 +03:00
b369d317b1 feat(security): improve security features, fixes 2025-07-29 13:55:25 +03:00
e35126856d feat: many features, check TODO.md 2025-07-29 13:55:25 +03:00
renovate[bot]
3556b06bb9 fix(github-action): update ivuorinen/actions (25.7.21 → 25.7.28) (#37)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-07-29 04:11:05 +00:00
renovate[bot]
7c738b75de feat(github-action): update docker/setup-buildx-action (v3.10.0 → v3.11.1) (#33)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-07-23 16:23:42 +03:00
renovate[bot]
460f90c03f feat(github-action): update actions/setup-go (v5.4.0 → v5.5.0) (#32)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-07-23 16:06:58 +03:00
renovate[bot]
4c0f17e53d chore(deps): pin actions/checkout action to (#36)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-07-23 15:03:45 +03:00
1e4869b79c fix(ci): remove "arm" from build targets in build-test-publish 2025-07-23 13:30:32 +03:00
166e69fc63 fix(ci): add checkout to pr-lint with creds 2025-07-22 12:50:48 +03:00
renovate[bot]
89d8fc3f51 feat(github-action): update step-security/harden-runner (v2.11.0 → v2.13.0) (#34)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-07-22 09:21:01 +03:00
renovate[bot]
3619a59b3c fix(github-action): update ivuorinen/actions (25.7.14 → 25.7.21) (#35)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-07-22 05:27:20 +00:00
renovate[bot]
ac7d7e3790 feat(github-action): update actions/download-artifact (v4.2.1 → v4.3.0) (#31)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-07-15 15:53:53 +00:00
renovate[bot]
b13b9da7dd fix(github-action): update ivuorinen/actions (25.7.7 → 25.7.14) (#30)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-07-15 04:58:31 +00:00
1d2b68f059 chore(lint): replace docker-based precommit (#29) 2025-07-14 16:48:43 +03:00
c91bfa0ccf feat(ci): simplify workflows, fix renovate.json (#20)
* feat(ci): simplify workflows, fix renovate.json
* fix(ci): replace sarif parsing
* fix(ci): lint fixes, json to sarif
* chore(ci): remove sarif stuff for now
2025-07-14 01:57:48 +03:00
9a2bbda223 test: fix linter package names (#23)
* test: fix linter package names

* chore: pr-lint.yml
2025-07-14 01:48:39 +03:00
renovate[bot]
70fede7635 fix(github-action): update actions/upload-artifact (v4.6.0 → v4.6.2) (#28)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-07-13 19:50:09 +00:00
renovate[bot]
376dd21a8b feat(github-action): update ivuorinen/actions (25.3.19 → 25.7.7) (#27)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-07-13 12:52:06 +00:00
72592fb559 fix: renovate.json 2025-07-13 14:18:36 +03:00
b017814c6d test: check cleanup errors (#22) 2025-07-13 14:08:23 +03:00
ef2296d45e docs: add AGENTS usage guidance (#21) 2025-07-11 18:33:56 +03:00
d752b6d271 fix(lint): linting problems 2025-03-23 22:26:02 +02:00
4b8d66c778 feat(tests): more tests and ci action (#14)
* feat(tests): more tests and ci action
* fix(ci): coverage and pr-lint
* fix(ci): renovate rules, permissions, linting, actions
* fix(lint): editorconfig fixes
* fix(lint): kics.config
* fix(lint): formatting, permissions, pre-commit config
* chore(ci): set workflow to use go 1.23, go mod tidy
* chore(ci): fixes and stuff
* chore(ci): disable GO_GOLANGCI_LINT
* chore(ci): pinning, permissions
2025-03-23 19:41:39 +02:00
2aa2a94a38 chore(deps): update go version 2025-03-17 16:11:59 +02:00
renovate[bot]
48fa5ca422 fix(deps): update module github.com/spf13/viper to v1.20.0 (#18)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-17 12:19:25 +02:00
renovate[bot]
0b31398443 feat(github-action): update go (1.23.7 → 1.24.1) (#16)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-14 13:30:32 +02:00
renovate[bot]
d807e6d659 chore(deps): update module golang.org/x/net to v0.36.0 [security] (#17)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-03-14 10:23:42 +02:00
8a638f0f43 fix(ci): add permissions to ci.yml 2025-03-03 10:53:51 +02:00
renovate[bot]
87855dcbf9 chore(deps): pin dependencies (#15)
Signed-off-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-02-09 17:49:25 +02:00
99 changed files with 10055 additions and 699 deletions

8
.checkmake Normal file
View File

@@ -0,0 +1,8 @@
# checkmake configuration
# See: https://github.com/mrtazz/checkmake#configuration
[rules.timestampexpansion]
disabled = true
[rules.maxbodylength]
disabled = true

View File

@@ -8,5 +8,19 @@ indent_size = 2
indent_style = tab
tab_width = 2
[*.yml]
indent_style = space
[*.md]
trim_trailing_whitespace = false
[*.{yml,yaml,json}]
indent_style = space
max_line_length = 250
[LICENSE]
max_line_length = 80
indent_size = 0
indent_style = space
trim_trailing_whitespace = true

View File

@@ -1,6 +1,4 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
"github>ivuorinen/renovate-config"
]
"extends": ["github>ivuorinen/renovate-config"]
}

155
.github/workflows/build-test-publish.yml vendored Normal file
View File

@@ -0,0 +1,155 @@
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
name: Build, Test, Coverage, and Publish
on:
push:
branches: [main]
pull_request:
branches: [main]
release:
types: [created]
permissions:
contents: read
jobs:
test:
name: Run Tests with Coverage and SARIF
runs-on: ubuntu-latest
permissions:
contents: write
checks: write
pull-requests: write
security-events: write
statuses: write
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0
with:
egress-policy: audit
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Go
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
with:
go-version-file: "./go.mod"
cache: true
- name: Install dependencies
run: go mod tidy
- name: Run tests
run: go test -json ./... > test-results.json
- name: Generate coverage report
run: go test -coverprofile=coverage.out ./...
- name: Check coverage
id: coverage
run: |
coverage="$(go tool cover -func=coverage.out | grep total | awk '{print substr($3, 1, length($3)-1)}')"
echo "total_coverage=$coverage" >> "$GITHUB_ENV"
echo "Coverage: $coverage%"
- name: Upload test results
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: test-results
path: test-results.json
- name: Cleanup
run: rm coverage.out
- name: Fail if coverage is below threshold
run: |
if (( $(echo "$total_coverage < 50" | bc -l) )); then
echo "Coverage ($total_coverage%) is below the threshold (50%)"
exit 1
fi
build:
name: Build Binaries
needs: test
runs-on: ubuntu-latest
permissions:
contents: write
packages: write
strategy:
matrix:
goos: [linux, darwin, windows]
goarch: [amd64, arm64]
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Go
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
with:
go-version-file: "./go.mod"
- name: Run go mod tidy
run: go mod tidy
- name: Build binary for ${{ matrix.goos }}-${{ matrix.goarch }}
run: |
mkdir -p dist
GOOS=${{ matrix.goos }} GOARCH=${{ matrix.goarch }} go build \
-ldflags "-X main.Version=${{ github.ref_name }}" \
-o dist/gibidify-${{ matrix.goos }}-${{ matrix.goarch }}${{ matrix.goos == 'windows' && '.exe' || '' }} \
.
- name: Generate SHA256 checksum
run: |
cd dist
for f in gibidify-*; do
sha256sum "$f" > "$f.sha256"
done
- name: Upload artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: gibidify-${{ matrix.goos }}-${{ matrix.goarch }}
path: dist/*
docker:
name: Build and Publish Docker Image
if: github.event_name == 'release'
needs: build
runs-on: ubuntu-latest
permissions:
contents: write
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Download Linux binaries
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: gibidify-linux-amd64
path: .
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
- name: Log in to GitHub Container Registry
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
- name: Build and push multi-arch Docker image
run: |
chmod +x gibidify-linux-amd64
mv gibidify-linux-amd64 gibidify
docker buildx build --platform linux/amd64,linux/arm64,linux/arm/v7 \
--tag ghcr.io/${{ github.repository }}/gibidify:${{ github.ref_name }} \
--tag ghcr.io/${{ github.repository }}/gibidify:latest \
--push \
--squash .

View File

@@ -1,63 +0,0 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
name: Build and Publish
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
release:
types: [ created ]
jobs:
build:
name: Build Binaries
runs-on: ubuntu-latest
strategy:
matrix:
goos: [ "linux", "darwin" ]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.23'
- name: Build binary for ${{ matrix.goos }}
shell: bash
run: |
GOOS=${{ matrix.goos }} GOARCH=amd64 go build \
-ldflags "-X main.Version=dev-$(date -u +%Y%m%d%H%M)" \
-o gibidify-${{ matrix.goos }} \
.
- name: Upload artifact for ${{ matrix.goos }}
uses: actions/upload-artifact@v4
with:
name: gibidify-${{ matrix.goos }}
path: gibidify-${{ matrix.goos }}
docker:
name: Build and Publish Docker Image
needs: build
runs-on: ubuntu-latest
if: github.event_name == 'release'
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download Linux binary artifact
uses: actions/download-artifact@v4
with:
name: gibidify-linux
path: .
- name: Build Docker image
shell: bash
run: |
cp ./gibidify-linux ./gibidify
chmod +x ./gibidify
docker build -t ghcr.io/${{ github.repository }}/gibidify:${{ github.ref_name }} .

27
.github/workflows/pr-lint.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
name: PR Lint
# yamllint disable-line rule:truthy
on:
push:
branches: [master, main]
pull_request:
branches: [master, main]
permissions: read-all
jobs:
Linter:
name: PR Lint
runs-on: ubuntu-latest
permissions:
contents: write
issues: write
pull-requests: write
statuses: write
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- uses: ivuorinen/actions/pr-lint@fa0232d3c4ba16d087b606296435354a69c01756 # 25.8.11

143
.github/workflows/security.yml vendored Normal file
View File

@@ -0,0 +1,143 @@
name: Security Scan
on:
push:
branches: [main, develop]
pull_request:
branches: [main, develop]
schedule:
# Run security scan weekly on Sundays at 00:00 UTC
- cron: '0 0 * * 0'
permissions: read-all
jobs:
security:
name: Security Analysis
runs-on: ubuntu-latest
permissions:
security-events: write
contents: read
actions: read
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
- name: Cache Go modules
uses: actions/cache@v4
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
# Security Scanning with gosec
- name: Run gosec Security Scanner
uses: securego/gosec@v2
with:
args: '-fmt sarif -out gosec-results.sarif ./...'
- name: Upload gosec results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: gosec-results.sarif
# Dependency Vulnerability Scanning
- name: Run govulncheck
run: |
go install golang.org/x/vuln/cmd/govulncheck@latest
govulncheck -json ./... > govulncheck-results.json || true
- name: Parse govulncheck results
run: |
if [ -s govulncheck-results.json ]; then
echo "::warning::Vulnerability check completed. Check govulncheck-results.json for details."
if grep -q '"finding"' govulncheck-results.json; then
echo "::error::Vulnerabilities found in dependencies!"
cat govulncheck-results.json
exit 1
fi
fi
# Additional Security Linting
- name: Run security-focused golangci-lint
uses: golangci/golangci-lint-action@v6.0.1
with:
args: "--enable=gosec,gocritic,bodyclose,rowserrcheck,misspell,unconvert,unparam,unused --timeout=5m"
# Makefile Linting
- name: Run checkmake on Makefile
uses: Uno-Takashi/checkmake-action@v2
with:
config: .checkmake
# Shell Script Formatting Check
- name: Check shell script formatting
run: |
go install mvdan.cc/sh/v3/cmd/shfmt@latest
shfmt -d .
# YAML Linting
- name: Run YAML linting
run: |
go install github.com/excilsploft/yamllint@latest
yamllint -c .yamllint .
# Secrets Detection (basic patterns)
- name: Run secrets detection
run: |
echo "Scanning for potential secrets..."
# Look for common secret patterns
git log --all --full-history -- . | grep -i -E "(password|secret|key|token|api_key)" || true
find . -type f -name "*.go" -exec grep -H -i -E "(password|secret|key|token|api_key)\s*[:=]" {} \; || true
# Check for hardcoded IPs and URLs
- name: Check for hardcoded network addresses
run: |
echo "Scanning for hardcoded network addresses..."
find . -type f -name "*.go" -exec grep -H -E "([0-9]{1,3}\.){3}[0-9]{1,3}" {} \; || true
find . -type f -name "*.go" -exec grep -H -E "https?://[^/\s]+" {} \; | \
grep -v "example.com|localhost|127.0.0.1" || true
# Docker Security (if Dockerfile exists)
- name: Run Docker security scan
if: hashFiles('Dockerfile') != ''
run: |
docker run --rm -v "$PWD":/workspace \
aquasec/trivy:latest fs --security-checks vuln,config /workspace/Dockerfile || true
# SAST with CodeQL (if available)
- name: Initialize CodeQL
if: github.event_name != 'schedule'
uses: github/codeql-action/init@v3
with:
languages: go
- name: Autobuild
if: github.event_name != 'schedule'
uses: github/codeql-action/autobuild@v3
- name: Perform CodeQL Analysis
if: github.event_name != 'schedule'
uses: github/codeql-action/analyze@v3
# Upload artifacts for review
- name: Upload security scan results
uses: actions/upload-artifact@v4
if: always()
with:
name: security-scan-results
path: |
gosec-results.sarif
govulncheck-results.json
retention-days: 30

25
.github/workflows/sync-labels.yml vendored Normal file
View File

@@ -0,0 +1,25 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
name: Sync labels
permissions: read-all
# yamllint disable-line rule:truthy
on:
push:
paths:
- .github/workflows/sync-labels.yml
- .github/labels.yml
schedule:
- cron: "34 5 * * *"
workflow_call:
workflow_dispatch:
jobs:
SyncLabels:
permissions:
contents: read
issues: write
runs-on: ubuntu-latest
steps:
- uses: ivuorinen/actions/sync-labels@fa0232d3c4ba16d087b606296435354a69c01756 # 25.8.11

5
.gitignore vendored
View File

@@ -7,3 +7,8 @@ gibidify.yaml
output.json
output.txt
output.yaml
coverage.out
megalinter-reports/*
coverage.*
*.out
gibidify-benchmark

1
.go-version Normal file
View File

@@ -0,0 +1 @@
1.23.0

256
.golangci.yml Normal file
View File

@@ -0,0 +1,256 @@
run:
timeout: 5m
tests: true
go: "1.24"
build-tags:
- test
# golangci-lint configuration version
version: 2
output:
format: colored-line-number
print-issued-lines: true
print-linter-name: true
path-prefix: ""
sort-results: true
linters:
enable-all: true
disable:
- depguard # Too strict for general use
- exhaustruct # Too many false positives
- ireturn # Too restrictive on interfaces
- varnamelen # Too opinionated on name length
- wrapcheck # Too many false positives
- testpackage # Tests in same package are fine
- paralleltest # Not always necessary
- tparallel # Not always necessary
- nlreturn # Too opinionated on newlines
- wsl # Too opinionated on whitespace
- nonamedreturns # Conflicts with gocritic unnamedResult
linters-settings:
errcheck:
check-type-assertions: true
check-blank: true
exclude-functions:
- io.Copy
- fmt.Print
- fmt.Printf
- fmt.Println
govet:
enable-all: true
gocyclo:
min-complexity: 15
gocognit:
min-complexity: 20
goconst:
min-len: 3
min-occurrences: 3
gofmt:
simplify: true
rewrite-rules:
- pattern: 'interface{}'
replacement: 'any'
goimports:
local-prefixes: github.com/ivuorinen/gibidify
golint:
min-confidence: 0.8
lll:
line-length: 120
tab-width: 2 # EditorConfig: tab_width = 2
misspell:
locale: US
nakedret:
max-func-lines: 30
prealloc:
simple: true
range-loops: true
for-loops: true
revive:
enable-all-rules: true
rules:
- name: package-comments
disabled: true
- name: file-header
disabled: true
- name: max-public-structs
disabled: true
- name: line-length-limit
arguments: [120]
- name: function-length
arguments: [50, 100]
- name: cognitive-complexity
arguments: [20]
- name: cyclomatic
arguments: [15]
- name: add-constant
arguments:
- maxLitCount: "3"
allowStrs: "\"error\",\"\""
allowInts: "0,1,2"
- name: argument-limit
arguments: [6]
- name: banned-characters
disabled: true
- name: function-result-limit
arguments: [3]
gosec:
excludes:
- G104 # Handled by errcheck
severity: medium
confidence: medium
exclude-generated: true
config:
G301: "0750"
G302: "0640"
G306: "0640"
dupl:
threshold: 150
gocritic:
enabled-tags:
- diagnostic
- experimental
- opinionated
- performance
- style
disabled-checks:
- whyNoLint
- paramTypeCombine
gofumpt:
extra-rules: true
# EditorConfig compliance settings
# These settings enforce .editorconfig rules:
# - end_of_line = lf (enforced by gofumpt)
# - insert_final_newline = true (enforced by gofumpt)
# - trim_trailing_whitespace = true (enforced by whitespace linter)
# - indent_style = tab, tab_width = 2 (enforced by gofumpt and lll)
whitespace:
multi-if: false # EditorConfig: trim trailing whitespace
multi-func: false # EditorConfig: trim trailing whitespace
nolintlint:
allow-leading-space: false # EditorConfig: trim trailing whitespace
allow-unused: false
require-explanation: false
require-specific: true
godox:
keywords:
- FIXME
- BUG
- HACK
mnd:
settings:
mnd:
checks:
- argument
- case
- condition
- operation
- return
- assign
ignored-numbers:
- '0'
- '1'
- '2'
- '10'
- '100'
funlen:
lines: 80
statements: 60
nestif:
min-complexity: 5
gomodguard:
allowed:
modules: []
domains: []
blocked:
modules: []
versions: []
issues:
exclude-use-default: false
exclude-case-sensitive: false
max-issues-per-linter: 0
max-same-issues: 0
uniq-by-line: true
exclude-dirs:
- vendor
- third_party
- testdata
- examples
- .git
exclude-files:
- ".*\\.pb\\.go$"
- ".*\\.gen\\.go$"
exclude-rules:
- path: _test\.go
linters:
- dupl
- gosec
- goconst
- funlen
- gocognit
- gocyclo
- errcheck
- lll
- nestif
- path: main\.go
linters:
- gochecknoglobals
- gochecknoinits
- path: fileproc/filetypes\.go
linters:
- gochecknoglobals # Allow globals for singleton registry pattern
- text: "Using the variable on range scope"
linters:
- scopelint
- text: "should have comment or be unexported"
linters:
- golint
- revive
- text: "don't use ALL_CAPS in Go names"
linters:
- golint
- stylecheck
exclude:
- "Error return value of .* is not checked"
- "exported (type|method|function) .* should have comment"
- "ST1000: at least one file in a package should have a package comment"
severity:
default-severity: error
case-sensitive: false

20
.mega-linter.yml Normal file
View File

@@ -0,0 +1,20 @@
---
# Configuration file for MegaLinter
# See all available variables at
# https://megalinter.io/configuration/ and in linters documentation
APPLY_FIXES: all
SHOW_ELAPSED_TIME: false # Show elapsed time at the end of MegaLinter run
PARALLEL: true
VALIDATE_ALL_CODEBASE: true
FILEIO_REPORTER: false # Generate file.io report
GITHUB_STATUS_REPORTER: true # Generate GitHub status report
IGNORE_GENERATED_FILES: true # Ignore generated files
JAVASCRIPT_DEFAULT_STYLE: prettier # Default style for JavaScript
PRINT_ALPACA: false # Print Alpaca logo in console
SARIF_REPORTER: true # Generate SARIF report
SHOW_SKIPPED_LINTERS: false # Show skipped linters in MegaLinter log
DISABLE_LINTERS:
- REPOSITORY_DEVSKIM
- REPOSITORY_TRIVY

15
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,15 @@
repos:
- repo: https://github.com/golangci/golangci-lint
rev: v1.57.2
hooks:
- id: golangci-lint
args: ["--timeout=5m"]
- repo: https://github.com/tekwizely/pre-commit-golang
rev: v1.0.0-rc.1
hooks:
- id: go-build-mod
alias: build
- id: go-mod-tidy
alias: tidy
- id: go-fmt
alias: fmt

3
.vscode/extensions.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"recommendations": ["esbenp.prettier-vscode", "AquaSecurityOfficial.trivy-vulnerability-scanner", "Bridgecrew.checkov", "exiasr.hadolint", "ms-vscode.Go", "streetsidesoftware.code-spell-checker"]
}

40
.yamllint Normal file
View File

@@ -0,0 +1,40 @@
# yamllint configuration
# See: https://yamllint.readthedocs.io/en/stable/configuration.html
extends: default
# Ignore generated output files
ignore: |
gibidify.yaml
gibidify.yml
output.yaml
output.yml
rules:
# Allow longer lines for URLs and commands in GitHub Actions
line-length:
max: 120
level: warning
# Allow 2-space indentation to match EditorConfig
indentation:
spaces: 2
indent-sequences: true
check-multi-line-strings: false
# Allow truthy values like 'on' in GitHub Actions
truthy:
allowed-values: ['true', 'false', 'on', 'off']
check-keys: false
# Allow empty values in YAML
empty-values:
forbid-in-block-mappings: false
forbid-in-flow-mappings: false
# Relax comments formatting
comments:
min-spaces-from-content: 1
# Allow document start marker to be optional
document-start: disable

47
CLAUDE.md Normal file
View File

@@ -0,0 +1,47 @@
# CLAUDE.md
Go CLI aggregating code files into LLM-optimized output. Supports markdown/JSON/YAML with concurrent processing.
## Architecture (42 files, 8.2K lines)
**Core**: `main.go` (37), `cli/` (4), `fileproc/` (27), `config/` (3), `utils/` (4), `testutil/` (2)
**Modules**: Collection, processing, writers, registry (~63ns cache), resource limits
**Patterns**: Producer-consumer, thread-safe registry, streaming, modular (50-200 lines)
## Commands
```bash
make lint-fix && make lint && make test
./gibidify -source <dir> -format markdown --verbose
```
## Config
`~/.config/gibidify/config.yaml`
Size limit 5MB, ignore dirs, custom types, 100MB memory limit
## Quality
**CRITICAL**: `make lint-fix && make lint` (0 issues), 120 chars, EditorConfig, 30+ linters
## Testing
**Coverage**: 84%+ (utils 90.9%, fileproc 83.8%), race detection, benchmarks
## Standards
EditorConfig (LF, tabs), semantic commits, testing required
## Status
**Health: 10/10** - Production-ready, 84%+ coverage, modular, memory-optimized
**Done**: Errors, benchmarks, config, optimization, modularization, CLI (progress/colors), security (path validation, resource limits, scanning)
**Next**: Documentation, output customization
## Workflow
1. `make lint-fix` first 2. >80% coverage 3. Follow patterns 4. Update docs

View File

@@ -1,5 +1,11 @@
# Use a minimal base image
FROM alpine:latest
FROM alpine:3.21.2
# Add user
RUN useradd -ms /bin/bash gibidify
# Use the new user
USER gibidify
# Copy the gibidify binary into the container
COPY gibidify /usr/local/bin/gibidify

19
LICENSE
View File

@@ -1,7 +1,20 @@
MIT License Copyright (c) 2025 Ismo Vuorinen
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software.
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

147
Makefile Normal file
View File

@@ -0,0 +1,147 @@
.PHONY: help install-tools lint lint-fix lint-verbose test coverage build clean all build-benchmark benchmark benchmark-collection benchmark-processing benchmark-concurrency benchmark-format security security-full vuln-check check-all dev-setup
# Default target shows help
.DEFAULT_GOAL := help
# All target runs full workflow
all: lint test build
# Help target
help:
@cat scripts/help.txt
# Install required tools
install-tools:
@echo "Installing golangci-lint..."
@go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
@echo "Installing gofumpt..."
@go install mvdan.cc/gofumpt@latest
@echo "Installing goimports..."
@go install golang.org/x/tools/cmd/goimports@latest
@echo "Installing staticcheck..."
@go install honnef.co/go/tools/cmd/staticcheck@latest
@echo "Installing gosec..."
@go install github.com/securego/gosec/v2/cmd/gosec@latest
@echo "Installing gocyclo..."
@go install github.com/fzipp/gocyclo/cmd/gocyclo@latest
@echo "Installing checkmake..."
@go install github.com/mrtazz/checkmake/cmd/checkmake@latest
@echo "Installing shfmt..."
@go install mvdan.cc/sh/v3/cmd/shfmt@latest
@echo "Installing yamllint (Go-based)..."
@go install github.com/excilsploft/yamllint@latest
@echo "All tools installed successfully!"
# Run linters
lint:
@./scripts/lint.sh
# Run linters with auto-fix
lint-fix:
@echo "Running gofumpt..."
@gofumpt -l -w .
@echo "Running goimports..."
@goimports -w -local github.com/ivuorinen/gibidify .
@echo "Running go fmt..."
@go fmt ./...
@echo "Running go mod tidy..."
@go mod tidy
@echo "Running shfmt formatting..."
@shfmt -w -i 2 -ci .
@echo "Running golangci-lint with --fix..."
@golangci-lint run --fix ./...
@echo "Auto-fix completed. Running final lint check..."
@golangci-lint run ./...
@echo "Running checkmake..."
@checkmake --config=.checkmake Makefile
@echo "Running yamllint..."
@yamllint -c .yamllint .
# Run linters with verbose output
lint-verbose:
@echo "Running golangci-lint (verbose)..."
@golangci-lint run -v ./...
@echo "Running checkmake (verbose)..."
@checkmake --config=.checkmake --format="{{.Line}}:{{.Rule}}:{{.Violation}}" Makefile
@echo "Running shfmt check (verbose)..."
@shfmt -d .
@echo "Running yamllint (verbose)..."
@yamllint -c .yamllint -f parsable .
# Run tests
test:
@echo "Running tests..."
@go test -race -v ./...
# Run tests with coverage
coverage:
@echo "Running tests with coverage..."
@go test -race -coverprofile=coverage.out -covermode=atomic ./...
@go tool cover -html=coverage.out -o coverage.html
@echo "Coverage report generated: coverage.html"
# Build the application
build:
@echo "Building gibidify..."
@go build -ldflags="-s -w" -o gibidify .
@echo "Build complete: ./gibidify"
# Clean build artifacts
clean:
@echo "Cleaning build artifacts..."
@rm -f gibidify gibidify-benchmark
@rm -f coverage.out coverage.html
@echo "Clean complete"
# CI-specific targets
.PHONY: ci-lint ci-test
ci-lint:
@golangci-lint run --out-format=github-actions ./...
ci-test:
@go test -race -coverprofile=coverage.out -json ./... > test-results.json
# Build benchmark binary
build-benchmark:
@echo "Building gibidify-benchmark..."
@go build -ldflags="-s -w" -o gibidify-benchmark ./cmd/benchmark
@echo "Build complete: ./gibidify-benchmark"
# Run benchmarks
benchmark: build-benchmark
@echo "Running all benchmarks..."
@./gibidify-benchmark -type=all
# Run specific benchmark types
benchmark-collection: build-benchmark
@echo "Running file collection benchmarks..."
@./gibidify-benchmark -type=collection
benchmark-processing: build-benchmark
@echo "Running file processing benchmarks..."
@./gibidify-benchmark -type=processing
benchmark-concurrency: build-benchmark
@echo "Running concurrency benchmarks..."
@./gibidify-benchmark -type=concurrency
benchmark-format: build-benchmark
@echo "Running format benchmarks..."
@./gibidify-benchmark -type=format
# Security targets
security:
@echo "Running comprehensive security scan..."
@./scripts/security-scan.sh
security-full:
@echo "Running full security analysis..."
@./scripts/security-scan.sh
@echo "Running additional security checks..."
@golangci-lint run --enable-all --disable=depguard,exhaustruct,ireturn,varnamelen,wrapcheck --timeout=10m
vuln-check:
@echo "Checking for dependency vulnerabilities..."
@go install golang.org/x/vuln/cmd/govulncheck@latest
@govulncheck ./...

View File

@@ -7,11 +7,16 @@ file sections with separators, and a suffix.
## Features
- Recursive scanning of a source directory.
- File filtering based on size, glob patterns, and .gitignore rules.
- Modular, concurrent file processing with progress bar feedback.
- Configurable logging and configuration via Viper.
- Cross-platform build with Docker packaging support.
- **Recursive directory scanning** with smart file filtering
- **Configurable file type detection** - add/remove extensions and languages
- **Multiple output formats** - markdown, JSON, YAML
- **Memory-optimized processing** - streaming for large files, intelligent back-pressure
- **Concurrent processing** with configurable worker pools
- **Comprehensive configuration** via YAML with validation
- **Production-ready** with structured error handling and benchmarking
- **Modular architecture** - clean, focused codebase with ~63ns registry lookups
- **Enhanced CLI experience** - progress bars, colored output, helpful error messages
- **Cross-platform** with Docker support
## Installation
@@ -26,9 +31,29 @@ go build -o gibidify .
## Usage
```bash
./gibidify -source <source_directory> -destination <output_file> [--prefix="..."] [--suffix="..."]
./gibidify \
-source <source_directory> \
-destination <output_file> \
-format markdown|json|yaml \
-concurrency <num_workers> \
--prefix="..." \
--suffix="..." \
--no-colors \
--no-progress \
--verbose
```
Flags:
- `-source`: directory to scan.
- `-destination`: output file path (optional; defaults to `<source>.<format>`).
- `-format`: output format (`markdown`, `json`, or `yaml`).
- `-concurrency`: number of concurrent workers.
- `--prefix` / `--suffix`: optional text blocks.
- `--no-colors`: disable colored terminal output.
- `--no-progress`: disable progress bars.
- `--verbose`: enable verbose output and detailed logging.
## Docker
A Docker image can be built using the provided Dockerfile:
@@ -41,13 +66,13 @@ Run the Docker container:
```bash
docker run --rm \
-v $(pwd):/workspace \
-v $HOME/.config/gibidify:/config \
ghcr.io/ivuorinen/gibidify:<tag> \
-source /workspace/your_source_directory \
-destination /workspace/output.txt \
--prefix="Your prefix text" \
--suffix="Your suffix text"
-v $(pwd):/workspace \
-v $HOME/.config/gibidify:/config \
ghcr.io/ivuorinen/gibidify:<tag> \
-source /workspace/your_source_directory \
-destination /workspace/output.txt \
--prefix="Your prefix text" \
--suffix="Your suffix text"
```
## Configuration
@@ -69,11 +94,39 @@ ignoreDirectories:
- dist
- build
- target
- bower_components
- cache
- tmp
# FileType customization
fileTypes:
enabled: true
# Add custom file extensions
customImageExtensions:
- .webp
- .avif
customBinaryExtensions:
- .custom
customLanguages:
.zig: zig
.odin: odin
.v: vlang
# Disable default extensions
disabledImageExtensions:
- .bmp
disabledBinaryExtensions:
- .exe
disabledLanguageExtensions:
- .bat
# Memory optimization (back-pressure management)
backpressure:
enabled: true
maxPendingFiles: 1000 # Max files in file channel buffer
maxPendingWrites: 100 # Max writes in write channel buffer
maxMemoryUsage: 104857600 # 100MB max memory usage
memoryCheckInterval: 1000 # Check memory every 1000 files
```
See `config.example.yaml` for a comprehensive configuration example.
## License
This project is licensed under [the MIT License](LICENSE).

46
TODO.md Normal file
View File

@@ -0,0 +1,46 @@
# TODO: gibidify
Prioritized improvements by impact/effort.
## ✅ Completed
**Core**: Testing (84%+), config validation, structured errors, benchmarking ✅
**Architecture**: Modularization (50-200 lines), CLI (progress/colors), security (path validation, resource limits, scanning) ✅
## 🚀 Current Priorities
### Metrics & Profiling
- [ ] Processing stats, timing
### Output Customization
- [ ] Templates, markdown config, metadata
### Documentation
- [ ] API docs, user guides
## 🌟 Future
**Plugins**: Custom handlers, formats
**Git**: Commit filtering, blame
**Rich output**: HTML, PDF, web UI
**Monitoring**: Prometheus, structured logging
## Guidelines
**Before**: `make lint-fix && make lint`, >80% coverage
**Priorities**: Security → UX → Extensions
## Status (2025-07-19)
**Health: 10/10** - Production-ready, 42 files (8.2K lines), 84%+ coverage
**Done**: Testing, config, errors, performance, modularization, CLI, security
**Next**: Documentation → Output customization
### Token Usage
- TODO.md: 171 words (~228 tokens) - 35% reduction ✅
- CLAUDE.md: 160 words (~213 tokens) - 25% reduction ✅
- Total: 331 words (~441 tokens) - 30% reduction ✅
*Optimized from 474 → 331 words while preserving critical information*

405
benchmark/benchmark.go Normal file
View File

@@ -0,0 +1,405 @@
// Package benchmark provides benchmarking infrastructure for gibidify.
package benchmark
import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"sync"
"time"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/utils"
)
// BenchmarkResult represents the results of a benchmark run.
type BenchmarkResult struct {
Name string
Duration time.Duration
FilesProcessed int
BytesProcessed int64
FilesPerSecond float64
BytesPerSecond float64
MemoryUsage MemoryStats
CPUUsage CPUStats
}
// MemoryStats represents memory usage statistics.
type MemoryStats struct {
AllocMB float64
SysMB float64
NumGC uint32
PauseTotalNs uint64
}
// CPUStats represents CPU usage statistics.
type CPUStats struct {
UserTime time.Duration
SystemTime time.Duration
Goroutines int
}
// BenchmarkSuite represents a collection of benchmarks.
type BenchmarkSuite struct {
Name string
Results []BenchmarkResult
}
// FileCollectionBenchmark benchmarks file collection operations.
func FileCollectionBenchmark(sourceDir string, numFiles int) (*BenchmarkResult, error) {
// Load configuration to ensure proper file filtering
config.LoadConfig()
// Create temporary directory with test files if no source is provided
var cleanup func()
if sourceDir == "" {
tempDir, cleanupFunc, err := createBenchmarkFiles(numFiles)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create benchmark files")
}
cleanup = cleanupFunc
defer cleanup()
sourceDir = tempDir
}
// Measure memory before
var memBefore runtime.MemStats
runtime.ReadMemStats(&memBefore)
startTime := time.Now()
// Run the file collection benchmark
files, err := fileproc.CollectFiles(sourceDir)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "benchmark file collection failed")
}
duration := time.Since(startTime)
// Measure memory after
var memAfter runtime.MemStats
runtime.ReadMemStats(&memAfter)
// Calculate total bytes processed
var totalBytes int64
for _, file := range files {
if info, err := os.Stat(file); err == nil {
totalBytes += info.Size()
}
}
result := &BenchmarkResult{
Name: "FileCollection",
Duration: duration,
FilesProcessed: len(files),
BytesProcessed: totalBytes,
FilesPerSecond: float64(len(files)) / duration.Seconds(),
BytesPerSecond: float64(totalBytes) / duration.Seconds(),
MemoryUsage: MemoryStats{
AllocMB: float64(memAfter.Alloc-memBefore.Alloc) / 1024 / 1024,
SysMB: float64(memAfter.Sys-memBefore.Sys) / 1024 / 1024,
NumGC: memAfter.NumGC - memBefore.NumGC,
PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs,
},
CPUUsage: CPUStats{
Goroutines: runtime.NumGoroutine(),
},
}
return result, nil
}
// FileProcessingBenchmark benchmarks full file processing pipeline.
func FileProcessingBenchmark(sourceDir string, format string, concurrency int) (*BenchmarkResult, error) {
// Load configuration to ensure proper file filtering
config.LoadConfig()
var cleanup func()
if sourceDir == "" {
// Create temporary directory with test files
tempDir, cleanupFunc, err := createBenchmarkFiles(100)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create benchmark files")
}
cleanup = cleanupFunc
defer cleanup()
sourceDir = tempDir
}
// Create temporary output file
outputFile, err := os.CreateTemp("", "benchmark_output_*."+format)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create benchmark output file")
}
defer func() {
if err := outputFile.Close(); err != nil {
// Log error but don't fail the benchmark
fmt.Printf("Warning: failed to close benchmark output file: %v\n", err)
}
if err := os.Remove(outputFile.Name()); err != nil {
// Log error but don't fail the benchmark
fmt.Printf("Warning: failed to remove benchmark output file: %v\n", err)
}
}()
// Measure memory before
var memBefore runtime.MemStats
runtime.ReadMemStats(&memBefore)
startTime := time.Now()
// Run the full processing pipeline
files, err := fileproc.CollectFiles(sourceDir)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "benchmark file collection failed")
}
// Process files with concurrency
err = runProcessingPipeline(context.Background(), files, outputFile, format, concurrency, sourceDir)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "benchmark processing pipeline failed")
}
duration := time.Since(startTime)
// Measure memory after
var memAfter runtime.MemStats
runtime.ReadMemStats(&memAfter)
// Calculate total bytes processed
var totalBytes int64
for _, file := range files {
if info, err := os.Stat(file); err == nil {
totalBytes += info.Size()
}
}
result := &BenchmarkResult{
Name: fmt.Sprintf("FileProcessing_%s_c%d", format, concurrency),
Duration: duration,
FilesProcessed: len(files),
BytesProcessed: totalBytes,
FilesPerSecond: float64(len(files)) / duration.Seconds(),
BytesPerSecond: float64(totalBytes) / duration.Seconds(),
MemoryUsage: MemoryStats{
AllocMB: float64(memAfter.Alloc-memBefore.Alloc) / 1024 / 1024,
SysMB: float64(memAfter.Sys-memBefore.Sys) / 1024 / 1024,
NumGC: memAfter.NumGC - memBefore.NumGC,
PauseTotalNs: memAfter.PauseTotalNs - memBefore.PauseTotalNs,
},
CPUUsage: CPUStats{
Goroutines: runtime.NumGoroutine(),
},
}
return result, nil
}
// ConcurrencyBenchmark benchmarks different concurrency levels.
func ConcurrencyBenchmark(sourceDir string, format string, concurrencyLevels []int) (*BenchmarkSuite, error) {
suite := &BenchmarkSuite{
Name: "ConcurrencyBenchmark",
Results: make([]BenchmarkResult, 0, len(concurrencyLevels)),
}
for _, concurrency := range concurrencyLevels {
result, err := FileProcessingBenchmark(sourceDir, format, concurrency)
if err != nil {
return nil, utils.WrapErrorf(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed for level %d", concurrency)
}
suite.Results = append(suite.Results, *result)
}
return suite, nil
}
// FormatBenchmark benchmarks different output formats.
func FormatBenchmark(sourceDir string, formats []string) (*BenchmarkSuite, error) {
suite := &BenchmarkSuite{
Name: "FormatBenchmark",
Results: make([]BenchmarkResult, 0, len(formats)),
}
for _, format := range formats {
result, err := FileProcessingBenchmark(sourceDir, format, runtime.NumCPU())
if err != nil {
return nil, utils.WrapErrorf(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed for format %s", format)
}
suite.Results = append(suite.Results, *result)
}
return suite, nil
}
// createBenchmarkFiles creates temporary files for benchmarking.
func createBenchmarkFiles(numFiles int) (string, func(), error) {
tempDir, err := os.MkdirTemp("", "gibidify_benchmark_*")
if err != nil {
return "", nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create temp directory")
}
cleanup := func() {
if err := os.RemoveAll(tempDir); err != nil {
// Log error but don't fail the benchmark
fmt.Printf("Warning: failed to remove benchmark temp directory: %v\n", err)
}
}
// Create various file types
fileTypes := []struct {
ext string
content string
}{
{".go", "package main\n\nfunc main() {\n\tprintln(\"Hello, World!\")\n}"},
{".js", "console.log('Hello, World!');"},
{".py", "print('Hello, World!')"},
{".java", "public class Hello {\n\tpublic static void main(String[] args) {\n\t\tSystem.out.println(\"Hello, World!\");\n\t}\n}"},
{".cpp", "#include <iostream>\n\nint main() {\n\tstd::cout << \"Hello, World!\" << std::endl;\n\treturn 0;\n}"},
{".rs", "fn main() {\n\tprintln!(\"Hello, World!\");\n}"},
{".rb", "puts 'Hello, World!'"},
{".php", "<?php\necho 'Hello, World!';\n?>"},
{".sh", "#!/bin/bash\necho 'Hello, World!'"},
{".md", "# Hello, World!\n\nThis is a markdown file."},
}
for i := 0; i < numFiles; i++ {
fileType := fileTypes[i%len(fileTypes)]
filename := fmt.Sprintf("file_%d%s", i, fileType.ext)
// Create subdirectories for some files
if i%10 == 0 {
subdir := filepath.Join(tempDir, fmt.Sprintf("subdir_%d", i/10))
if err := os.MkdirAll(subdir, 0o755); err != nil {
cleanup()
return "", nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to create subdirectory")
}
filename = filepath.Join(subdir, filename)
} else {
filename = filepath.Join(tempDir, filename)
}
// Create file with repeated content to make it larger
content := ""
for j := 0; j < 10; j++ {
content += fmt.Sprintf("// Line %d\n%s\n", j, fileType.content)
}
if err := os.WriteFile(filename, []byte(content), 0o644); err != nil {
cleanup()
return "", nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileWrite, "failed to write benchmark file")
}
}
return tempDir, cleanup, nil
}
// runProcessingPipeline runs the processing pipeline similar to main.go.
func runProcessingPipeline(ctx context.Context, files []string, outputFile *os.File, format string, concurrency int, sourceDir string) error {
fileCh := make(chan string, concurrency)
writeCh := make(chan fileproc.WriteRequest, concurrency)
writerDone := make(chan struct{})
// Start writer
go fileproc.StartWriter(outputFile, writeCh, writerDone, format, "", "")
// Get absolute path once
absRoot, err := utils.GetAbsolutePath(sourceDir)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to get absolute path for source directory")
}
// Start workers with proper synchronization
var workersDone sync.WaitGroup
for i := 0; i < concurrency; i++ {
workersDone.Add(1)
go func() {
defer workersDone.Done()
for filePath := range fileCh {
fileproc.ProcessFile(filePath, writeCh, absRoot)
}
}()
}
// Send files to workers
for _, file := range files {
select {
case <-ctx.Done():
close(fileCh)
workersDone.Wait() // Wait for workers to finish
close(writeCh)
<-writerDone
return ctx.Err()
case fileCh <- file:
}
}
// Close file channel and wait for workers to finish
close(fileCh)
workersDone.Wait()
// Now it's safe to close the write channel
close(writeCh)
<-writerDone
return nil
}
// PrintBenchmarkResult prints a formatted benchmark result.
func PrintBenchmarkResult(result *BenchmarkResult) {
fmt.Printf("=== %s ===\n", result.Name)
fmt.Printf("Duration: %v\n", result.Duration)
fmt.Printf("Files Processed: %d\n", result.FilesProcessed)
fmt.Printf("Bytes Processed: %d (%.2f MB)\n", result.BytesProcessed, float64(result.BytesProcessed)/1024/1024)
fmt.Printf("Files/sec: %.2f\n", result.FilesPerSecond)
fmt.Printf("Bytes/sec: %.2f MB/sec\n", result.BytesPerSecond/1024/1024)
fmt.Printf("Memory Usage: +%.2f MB (Sys: +%.2f MB)\n", result.MemoryUsage.AllocMB, result.MemoryUsage.SysMB)
fmt.Printf("GC Runs: %d (Pause: %v)\n", result.MemoryUsage.NumGC, time.Duration(result.MemoryUsage.PauseTotalNs))
fmt.Printf("Goroutines: %d\n", result.CPUUsage.Goroutines)
fmt.Println()
}
// PrintBenchmarkSuite prints all results in a benchmark suite.
func PrintBenchmarkSuite(suite *BenchmarkSuite) {
fmt.Printf("=== %s ===\n", suite.Name)
for _, result := range suite.Results {
PrintBenchmarkResult(&result)
}
}
// RunAllBenchmarks runs a comprehensive benchmark suite.
func RunAllBenchmarks(sourceDir string) error {
fmt.Println("Running gibidify benchmark suite...")
// Load configuration
config.LoadConfig()
// File collection benchmark
fmt.Println("Running file collection benchmark...")
result, err := FileCollectionBenchmark(sourceDir, 1000)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file collection benchmark failed")
}
PrintBenchmarkResult(result)
// Format benchmarks
fmt.Println("Running format benchmarks...")
formatSuite, err := FormatBenchmark(sourceDir, []string{"json", "yaml", "markdown"})
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed")
}
PrintBenchmarkSuite(formatSuite)
// Concurrency benchmarks
fmt.Println("Running concurrency benchmarks...")
concurrencyLevels := []int{1, 2, 4, 8, runtime.NumCPU()}
concurrencySuite, err := ConcurrencyBenchmark(sourceDir, "json", concurrencyLevels)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed")
}
PrintBenchmarkSuite(concurrencySuite)
return nil
}

165
benchmark/benchmark_test.go Normal file
View File

@@ -0,0 +1,165 @@
package benchmark
import (
"runtime"
"testing"
)
// TestFileCollectionBenchmark tests the file collection benchmark.
func TestFileCollectionBenchmark(t *testing.T) {
result, err := FileCollectionBenchmark("", 10)
if err != nil {
t.Fatalf("FileCollectionBenchmark failed: %v", err)
}
if result.Name != "FileCollection" {
t.Errorf("Expected name 'FileCollection', got %s", result.Name)
}
// Debug information
t.Logf("Files processed: %d", result.FilesProcessed)
t.Logf("Duration: %v", result.Duration)
t.Logf("Bytes processed: %d", result.BytesProcessed)
if result.FilesProcessed <= 0 {
t.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
}
if result.Duration <= 0 {
t.Errorf("Expected duration > 0, got %v", result.Duration)
}
}
// TestFileProcessingBenchmark tests the file processing benchmark.
func TestFileProcessingBenchmark(t *testing.T) {
result, err := FileProcessingBenchmark("", "json", 2)
if err != nil {
t.Fatalf("FileProcessingBenchmark failed: %v", err)
}
if result.FilesProcessed <= 0 {
t.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
}
if result.Duration <= 0 {
t.Errorf("Expected duration > 0, got %v", result.Duration)
}
}
// TestConcurrencyBenchmark tests the concurrency benchmark.
func TestConcurrencyBenchmark(t *testing.T) {
concurrencyLevels := []int{1, 2}
suite, err := ConcurrencyBenchmark("", "json", concurrencyLevels)
if err != nil {
t.Fatalf("ConcurrencyBenchmark failed: %v", err)
}
if suite.Name != "ConcurrencyBenchmark" {
t.Errorf("Expected name 'ConcurrencyBenchmark', got %s", suite.Name)
}
if len(suite.Results) != len(concurrencyLevels) {
t.Errorf("Expected %d results, got %d", len(concurrencyLevels), len(suite.Results))
}
for i, result := range suite.Results {
if result.FilesProcessed <= 0 {
t.Errorf("Result %d: Expected files processed > 0, got %d", i, result.FilesProcessed)
}
}
}
// TestFormatBenchmark tests the format benchmark.
func TestFormatBenchmark(t *testing.T) {
formats := []string{"json", "yaml"}
suite, err := FormatBenchmark("", formats)
if err != nil {
t.Fatalf("FormatBenchmark failed: %v", err)
}
if suite.Name != "FormatBenchmark" {
t.Errorf("Expected name 'FormatBenchmark', got %s", suite.Name)
}
if len(suite.Results) != len(formats) {
t.Errorf("Expected %d results, got %d", len(formats), len(suite.Results))
}
for i, result := range suite.Results {
if result.FilesProcessed <= 0 {
t.Errorf("Result %d: Expected files processed > 0, got %d", i, result.FilesProcessed)
}
}
}
// TestCreateBenchmarkFiles tests the benchmark file creation.
func TestCreateBenchmarkFiles(t *testing.T) {
tempDir, cleanup, err := createBenchmarkFiles(5)
if err != nil {
t.Fatalf("createBenchmarkFiles failed: %v", err)
}
defer cleanup()
if tempDir == "" {
t.Error("Expected non-empty temp directory")
}
// Verify files were created
// This is tested indirectly through the benchmark functions
}
// BenchmarkFileCollection benchmarks the file collection process.
func BenchmarkFileCollection(b *testing.B) {
for i := 0; i < b.N; i++ {
result, err := FileCollectionBenchmark("", 50)
if err != nil {
b.Fatalf("FileCollectionBenchmark failed: %v", err)
}
if result.FilesProcessed <= 0 {
b.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
}
}
}
// BenchmarkFileProcessing benchmarks the file processing pipeline.
func BenchmarkFileProcessing(b *testing.B) {
for i := 0; i < b.N; i++ {
result, err := FileProcessingBenchmark("", "json", runtime.NumCPU())
if err != nil {
b.Fatalf("FileProcessingBenchmark failed: %v", err)
}
if result.FilesProcessed <= 0 {
b.Errorf("Expected files processed > 0, got %d", result.FilesProcessed)
}
}
}
// BenchmarkConcurrency benchmarks different concurrency levels.
func BenchmarkConcurrency(b *testing.B) {
concurrencyLevels := []int{1, 2, 4}
for i := 0; i < b.N; i++ {
suite, err := ConcurrencyBenchmark("", "json", concurrencyLevels)
if err != nil {
b.Fatalf("ConcurrencyBenchmark failed: %v", err)
}
if len(suite.Results) != len(concurrencyLevels) {
b.Errorf("Expected %d results, got %d", len(concurrencyLevels), len(suite.Results))
}
}
}
// BenchmarkFormats benchmarks different output formats.
func BenchmarkFormats(b *testing.B) {
formats := []string{"json", "yaml", "markdown"}
for i := 0; i < b.N; i++ {
suite, err := FormatBenchmark("", formats)
if err != nil {
b.Fatalf("FormatBenchmark failed: %v", err)
}
if len(suite.Results) != len(formats) {
b.Errorf("Expected %d results, got %d", len(formats), len(suite.Results))
}
}
}

285
cli/errors.go Normal file
View File

@@ -0,0 +1,285 @@
package cli
import (
"errors"
"os"
"path/filepath"
"strings"
"github.com/ivuorinen/gibidify/utils"
)
// ErrorFormatter handles CLI-friendly error formatting with suggestions.
type ErrorFormatter struct {
ui *UIManager
}
// NewErrorFormatter creates a new error formatter.
func NewErrorFormatter(ui *UIManager) *ErrorFormatter {
return &ErrorFormatter{ui: ui}
}
// FormatError formats an error with context and suggestions.
func (ef *ErrorFormatter) FormatError(err error) {
if err == nil {
return
}
// Handle structured errors
if structErr, ok := err.(*utils.StructuredError); ok {
ef.formatStructuredError(structErr)
return
}
// Handle common error types
ef.formatGenericError(err)
}
// formatStructuredError formats a structured error with context and suggestions.
func (ef *ErrorFormatter) formatStructuredError(err *utils.StructuredError) {
// Print main error
ef.ui.PrintError("Error: %s", err.Message)
// Print error type and code
if err.Type != utils.ErrorTypeUnknown || err.Code != "" {
ef.ui.PrintInfo("Type: %s, Code: %s", err.Type.String(), err.Code)
}
// Print file path if available
if err.FilePath != "" {
ef.ui.PrintInfo("File: %s", err.FilePath)
}
// Print context if available
if len(err.Context) > 0 {
ef.ui.PrintInfo("Context:")
for key, value := range err.Context {
ef.ui.printf(" %s: %v\n", key, value)
}
}
// Provide suggestions based on error type
ef.provideSuggestions(err)
}
// formatGenericError formats a generic error.
func (ef *ErrorFormatter) formatGenericError(err error) {
ef.ui.PrintError("Error: %s", err.Error())
ef.provideGenericSuggestions(err)
}
// provideSuggestions provides helpful suggestions based on the error.
func (ef *ErrorFormatter) provideSuggestions(err *utils.StructuredError) {
switch err.Type {
case utils.ErrorTypeFileSystem:
ef.provideFileSystemSuggestions(err)
case utils.ErrorTypeValidation:
ef.provideValidationSuggestions(err)
case utils.ErrorTypeProcessing:
ef.provideProcessingSuggestions(err)
case utils.ErrorTypeIO:
ef.provideIOSuggestions(err)
default:
ef.provideDefaultSuggestions()
}
}
// provideFileSystemSuggestions provides suggestions for file system errors.
func (ef *ErrorFormatter) provideFileSystemSuggestions(err *utils.StructuredError) {
filePath := err.FilePath
ef.ui.PrintWarning("Suggestions:")
switch err.Code {
case utils.CodeFSAccess:
ef.suggestFileAccess(filePath)
case utils.CodeFSPathResolution:
ef.suggestPathResolution(filePath)
case utils.CodeFSNotFound:
ef.suggestFileNotFound(filePath)
default:
ef.suggestFileSystemGeneral(filePath)
}
}
// provideValidationSuggestions provides suggestions for validation errors.
func (ef *ErrorFormatter) provideValidationSuggestions(err *utils.StructuredError) {
ef.ui.PrintWarning("Suggestions:")
switch err.Code {
case utils.CodeValidationFormat:
ef.ui.printf(" • Use a supported format: markdown, json, yaml\n")
ef.ui.printf(" • Example: -format markdown\n")
case utils.CodeValidationSize:
ef.ui.printf(" • Increase file size limit in config.yaml\n")
ef.ui.printf(" • Use smaller files or exclude large files\n")
default:
ef.ui.printf(" • Check your command line arguments\n")
ef.ui.printf(" • Run with --help for usage information\n")
}
}
// provideProcessingSuggestions provides suggestions for processing errors.
func (ef *ErrorFormatter) provideProcessingSuggestions(err *utils.StructuredError) {
ef.ui.PrintWarning("Suggestions:")
switch err.Code {
case utils.CodeProcessingCollection:
ef.ui.printf(" • Check if the source directory exists and is readable\n")
ef.ui.printf(" • Verify directory permissions\n")
case utils.CodeProcessingFileRead:
ef.ui.printf(" • Check file permissions\n")
ef.ui.printf(" • Verify the file is not corrupted\n")
default:
ef.ui.printf(" • Try reducing concurrency: -concurrency 1\n")
ef.ui.printf(" • Check available system resources\n")
}
}
// provideIOSuggestions provides suggestions for I/O errors.
func (ef *ErrorFormatter) provideIOSuggestions(err *utils.StructuredError) {
ef.ui.PrintWarning("Suggestions:")
switch err.Code {
case utils.CodeIOFileCreate:
ef.ui.printf(" • Check if the destination directory exists\n")
ef.ui.printf(" • Verify write permissions for the output file\n")
ef.ui.printf(" • Ensure sufficient disk space\n")
case utils.CodeIOWrite:
ef.ui.printf(" • Check available disk space\n")
ef.ui.printf(" • Verify write permissions\n")
default:
ef.ui.printf(" • Check file/directory permissions\n")
ef.ui.printf(" • Verify available disk space\n")
}
}
// Helper methods for specific suggestions
func (ef *ErrorFormatter) suggestFileAccess(filePath string) {
ef.ui.printf(" • Check if the path exists: %s\n", filePath)
ef.ui.printf(" • Verify read permissions\n")
if filePath != "" {
if stat, err := os.Stat(filePath); err == nil {
ef.ui.printf(" • Path exists but may not be accessible\n")
ef.ui.printf(" • Mode: %s\n", stat.Mode())
}
}
}
func (ef *ErrorFormatter) suggestPathResolution(filePath string) {
ef.ui.printf(" • Use an absolute path instead of relative\n")
if filePath != "" {
if abs, err := filepath.Abs(filePath); err == nil {
ef.ui.printf(" • Try: %s\n", abs)
}
}
}
func (ef *ErrorFormatter) suggestFileNotFound(filePath string) {
ef.ui.printf(" • Check if the file/directory exists: %s\n", filePath)
if filePath != "" {
dir := filepath.Dir(filePath)
if entries, err := os.ReadDir(dir); err == nil {
ef.ui.printf(" • Similar files in %s:\n", dir)
count := 0
for _, entry := range entries {
if count >= 3 {
break
}
if strings.Contains(entry.Name(), filepath.Base(filePath)) {
ef.ui.printf(" - %s\n", entry.Name())
count++
}
}
}
}
}
func (ef *ErrorFormatter) suggestFileSystemGeneral(filePath string) {
ef.ui.printf(" • Check file/directory permissions\n")
ef.ui.printf(" • Verify the path is correct\n")
if filePath != "" {
ef.ui.printf(" • Path: %s\n", filePath)
}
}
// provideDefaultSuggestions provides general suggestions.
func (ef *ErrorFormatter) provideDefaultSuggestions() {
ef.ui.printf(" • Check your command line arguments\n")
ef.ui.printf(" • Run with --help for usage information\n")
ef.ui.printf(" • Try with -concurrency 1 to reduce resource usage\n")
}
// provideGenericSuggestions provides suggestions for generic errors.
func (ef *ErrorFormatter) provideGenericSuggestions(err error) {
errorMsg := err.Error()
ef.ui.PrintWarning("Suggestions:")
// Pattern matching for common errors
switch {
case strings.Contains(errorMsg, "permission denied"):
ef.ui.printf(" • Check file/directory permissions\n")
ef.ui.printf(" • Try running with appropriate privileges\n")
case strings.Contains(errorMsg, "no such file or directory"):
ef.ui.printf(" • Verify the file/directory path is correct\n")
ef.ui.printf(" • Check if the file exists\n")
case strings.Contains(errorMsg, "flag") && strings.Contains(errorMsg, "redefined"):
ef.ui.printf(" • This is likely a test environment issue\n")
ef.ui.printf(" • Try running the command directly instead of in tests\n")
default:
ef.provideDefaultSuggestions()
}
}
// CLI-specific error types
// CLIMissingSourceError represents a missing source directory error.
type CLIMissingSourceError struct{}
func (e CLIMissingSourceError) Error() string {
return "source directory is required"
}
// NewCLIMissingSourceError creates a new CLI missing source error with suggestions.
func NewCLIMissingSourceError() error {
return &CLIMissingSourceError{}
}
// IsUserError checks if an error is a user input error that should be handled gracefully.
func IsUserError(err error) bool {
if err == nil {
return false
}
// Check for specific user error types
var cliErr *CLIMissingSourceError
if errors.As(err, &cliErr) {
return true
}
// Check for structured errors that are user-facing
if structErr, ok := err.(*utils.StructuredError); ok {
return structErr.Type == utils.ErrorTypeValidation ||
structErr.Code == utils.CodeValidationFormat ||
structErr.Code == utils.CodeValidationSize
}
// Check error message patterns
errMsg := err.Error()
userErrorPatterns := []string{
"flag",
"usage",
"invalid argument",
"file not found",
"permission denied",
}
for _, pattern := range userErrorPatterns {
if strings.Contains(strings.ToLower(errMsg), pattern) {
return true
}
}
return false
}

104
cli/flags.go Normal file
View File

@@ -0,0 +1,104 @@
package cli
import (
"flag"
"runtime"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/utils"
)
// Flags holds CLI flags values.
type Flags struct {
SourceDir string
Destination string
Prefix string
Suffix string
Concurrency int
Format string
NoColors bool
NoProgress bool
Verbose bool
}
var (
flagsParsed bool
globalFlags *Flags
)
// ParseFlags parses and validates CLI flags.
func ParseFlags() (*Flags, error) {
if flagsParsed {
return globalFlags, nil
}
flags := &Flags{}
flag.StringVar(&flags.SourceDir, "source", "", "Source directory to scan recursively")
flag.StringVar(&flags.Destination, "destination", "", "Output file to write aggregated code")
flag.StringVar(&flags.Prefix, "prefix", "", "Text to add at the beginning of the output file")
flag.StringVar(&flags.Suffix, "suffix", "", "Text to add at the end of the output file")
flag.StringVar(&flags.Format, "format", "markdown", "Output format (json, markdown, yaml)")
flag.IntVar(&flags.Concurrency, "concurrency", runtime.NumCPU(),
"Number of concurrent workers (default: number of CPU cores)")
flag.BoolVar(&flags.NoColors, "no-colors", false, "Disable colored output")
flag.BoolVar(&flags.NoProgress, "no-progress", false, "Disable progress bars")
flag.BoolVar(&flags.Verbose, "verbose", false, "Enable verbose output")
flag.Parse()
if err := flags.validate(); err != nil {
return nil, err
}
if err := flags.setDefaultDestination(); err != nil {
return nil, err
}
flagsParsed = true
globalFlags = flags
return flags, nil
}
// validate validates the CLI flags.
func (f *Flags) validate() error {
if f.SourceDir == "" {
return NewCLIMissingSourceError()
}
// Validate source path for security
if err := utils.ValidateSourcePath(f.SourceDir); err != nil {
return err
}
// Validate output format
if err := config.ValidateOutputFormat(f.Format); err != nil {
return err
}
// Validate concurrency
if err := config.ValidateConcurrency(f.Concurrency); err != nil {
return err
}
return nil
}
// setDefaultDestination sets the default destination if not provided.
func (f *Flags) setDefaultDestination() error {
if f.Destination == "" {
absRoot, err := utils.GetAbsolutePath(f.SourceDir)
if err != nil {
return err
}
baseName := utils.GetBaseName(absRoot)
f.Destination = baseName + "." + f.Format
}
// Validate destination path for security
if err := utils.ValidateDestinationPath(f.Destination); err != nil {
return err
}
return nil
}

View File

@@ -0,0 +1,77 @@
package cli
import (
"fmt"
"os"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/utils"
)
// collectFiles collects all files to be processed.
func (p *Processor) collectFiles() ([]string, error) {
files, err := fileproc.CollectFiles(p.flags.SourceDir)
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "error collecting files")
}
logrus.Infof("Found %d files to process", len(files))
return files, nil
}
// validateFileCollection validates the collected files against resource limits.
func (p *Processor) validateFileCollection(files []string) error {
if !config.GetResourceLimitsEnabled() {
return nil
}
// Check file count limit
maxFiles := config.GetMaxFiles()
if len(files) > maxFiles {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitFiles,
fmt.Sprintf("file count (%d) exceeds maximum limit (%d)", len(files), maxFiles),
"",
map[string]interface{}{
"file_count": len(files),
"max_files": maxFiles,
},
)
}
// Check total size limit (estimate)
maxTotalSize := config.GetMaxTotalSize()
totalSize := int64(0)
oversizedFiles := 0
for _, filePath := range files {
if fileInfo, err := os.Stat(filePath); err == nil {
totalSize += fileInfo.Size()
if totalSize > maxTotalSize {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitTotalSize,
fmt.Sprintf("total file size (%d bytes) would exceed maximum limit (%d bytes)", totalSize, maxTotalSize),
"",
map[string]interface{}{
"total_size": totalSize,
"max_total_size": maxTotalSize,
"files_checked": len(files),
},
)
}
} else {
oversizedFiles++
}
}
if oversizedFiles > 0 {
logrus.Warnf("Could not stat %d files during pre-validation", oversizedFiles)
}
logrus.Infof("Pre-validation passed: %d files, %d MB total", len(files), totalSize/1024/1024)
return nil
}

100
cli/processor_processing.go Normal file
View File

@@ -0,0 +1,100 @@
package cli
import (
"context"
"os"
"sync"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/utils"
)
// Process executes the main file processing workflow.
func (p *Processor) Process(ctx context.Context) error {
// Create overall processing context with timeout
overallCtx, overallCancel := p.resourceMonitor.CreateOverallProcessingContext(ctx)
defer overallCancel()
// Configure file type registry
p.configureFileTypes()
// Print startup info with colors
p.ui.PrintHeader("🚀 Starting gibidify")
p.ui.PrintInfo("Format: %s", p.flags.Format)
p.ui.PrintInfo("Source: %s", p.flags.SourceDir)
p.ui.PrintInfo("Destination: %s", p.flags.Destination)
p.ui.PrintInfo("Workers: %d", p.flags.Concurrency)
// Log resource monitoring configuration
p.resourceMonitor.LogResourceInfo()
p.backpressure.LogBackpressureInfo()
// Collect files with progress indication
p.ui.PrintInfo("📁 Collecting files...")
files, err := p.collectFiles()
if err != nil {
return err
}
// Show collection results
p.ui.PrintSuccess("Found %d files to process", len(files))
// Pre-validate file collection against resource limits
if err := p.validateFileCollection(files); err != nil {
return err
}
// Process files with overall timeout
return p.processFiles(overallCtx, files)
}
// processFiles processes the collected files.
func (p *Processor) processFiles(ctx context.Context, files []string) error {
outFile, err := p.createOutputFile()
if err != nil {
return err
}
defer func() {
utils.LogError("Error closing output file", outFile.Close())
}()
// Initialize back-pressure and channels
p.ui.PrintInfo("⚙️ Initializing processing...")
p.backpressure.LogBackpressureInfo()
fileCh, writeCh := p.backpressure.CreateChannels()
writerDone := make(chan struct{})
// Start writer
go fileproc.StartWriter(outFile, writeCh, writerDone, p.flags.Format, p.flags.Prefix, p.flags.Suffix)
// Start workers
var wg sync.WaitGroup
p.startWorkers(ctx, &wg, fileCh, writeCh)
// Start progress bar
p.ui.StartProgress(len(files), "📝 Processing files")
// Send files to workers
if err := p.sendFiles(ctx, files, fileCh); err != nil {
p.ui.FinishProgress()
return err
}
// Wait for completion
p.waitForCompletion(&wg, writeCh, writerDone)
p.ui.FinishProgress()
p.logFinalStats()
p.ui.PrintSuccess("Processing completed. Output saved to %s", p.flags.Destination)
return nil
}
// createOutputFile creates the output file.
func (p *Processor) createOutputFile() (*os.File, error) {
// Destination path has been validated in CLI flags validation for path traversal attempts
outFile, err := os.Create(p.flags.Destination) // #nosec G304 - destination is validated in flags.validate()
if err != nil {
return nil, utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOFileCreate, "failed to create output file").WithFilePath(p.flags.Destination)
}
return outFile, nil
}

40
cli/processor_stats.go Normal file
View File

@@ -0,0 +1,40 @@
package cli
import (
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/config"
)
// logFinalStats logs the final back-pressure and resource monitoring statistics.
func (p *Processor) logFinalStats() {
// Log back-pressure stats
backpressureStats := p.backpressure.GetStats()
if backpressureStats.Enabled {
logrus.Infof("Back-pressure stats: processed=%d files, memory=%dMB/%dMB",
backpressureStats.FilesProcessed, backpressureStats.CurrentMemoryUsage/1024/1024, backpressureStats.MaxMemoryUsage/1024/1024)
}
// Log resource monitoring stats
resourceStats := p.resourceMonitor.GetMetrics()
if config.GetResourceLimitsEnabled() {
logrus.Infof("Resource stats: processed=%d files, totalSize=%dMB, avgFileSize=%.2fKB, rate=%.2f files/sec",
resourceStats.FilesProcessed, resourceStats.TotalSizeProcessed/1024/1024,
resourceStats.AverageFileSize/1024, resourceStats.ProcessingRate)
if len(resourceStats.ViolationsDetected) > 0 {
logrus.Warnf("Resource violations detected: %v", resourceStats.ViolationsDetected)
}
if resourceStats.DegradationActive {
logrus.Warnf("Processing completed with degradation mode active")
}
if resourceStats.EmergencyStopActive {
logrus.Errorf("Processing completed with emergency stop active")
}
}
// Clean up resource monitor
p.resourceMonitor.Close()
}

44
cli/processor_types.go Normal file
View File

@@ -0,0 +1,44 @@
package cli
import (
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/fileproc"
)
// Processor handles the main file processing logic.
type Processor struct {
flags *Flags
backpressure *fileproc.BackpressureManager
resourceMonitor *fileproc.ResourceMonitor
ui *UIManager
}
// NewProcessor creates a new processor with the given flags.
func NewProcessor(flags *Flags) *Processor {
ui := NewUIManager()
// Configure UI based on flags
ui.SetColorOutput(!flags.NoColors)
ui.SetProgressOutput(!flags.NoProgress)
return &Processor{
flags: flags,
backpressure: fileproc.NewBackpressureManager(),
resourceMonitor: fileproc.NewResourceMonitor(),
ui: ui,
}
}
// configureFileTypes configures the file type registry.
func (p *Processor) configureFileTypes() {
if config.GetFileTypesEnabled() {
fileproc.ConfigureFromSettings(
config.GetCustomImageExtensions(),
config.GetCustomBinaryExtensions(),
config.GetCustomLanguages(),
config.GetDisabledImageExtensions(),
config.GetDisabledBinaryExtensions(),
config.GetDisabledLanguageExtensions(),
)
}
}

85
cli/processor_workers.go Normal file
View File

@@ -0,0 +1,85 @@
package cli
import (
"context"
"sync"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/utils"
)
// startWorkers starts the worker goroutines.
func (p *Processor) startWorkers(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) {
for range p.flags.Concurrency {
wg.Add(1)
go p.worker(ctx, wg, fileCh, writeCh)
}
}
// worker is the worker goroutine function.
func (p *Processor) worker(ctx context.Context, wg *sync.WaitGroup, fileCh chan string, writeCh chan fileproc.WriteRequest) {
defer wg.Done()
for {
select {
case <-ctx.Done():
return
case filePath, ok := <-fileCh:
if !ok {
return
}
p.processFile(ctx, filePath, writeCh)
}
}
}
// processFile processes a single file with resource monitoring.
func (p *Processor) processFile(ctx context.Context, filePath string, writeCh chan fileproc.WriteRequest) {
// Check for emergency stop
if p.resourceMonitor.IsEmergencyStopActive() {
logrus.Warnf("Emergency stop active, skipping file: %s", filePath)
return
}
absRoot, err := utils.GetAbsolutePath(p.flags.SourceDir)
if err != nil {
utils.LogError("Failed to get absolute path", err)
return
}
// Use the resource monitor-aware processing
fileproc.ProcessFileWithMonitor(ctx, filePath, writeCh, absRoot, p.resourceMonitor)
// Update progress bar
p.ui.UpdateProgress(1)
}
// sendFiles sends files to the worker channels with back-pressure handling.
func (p *Processor) sendFiles(ctx context.Context, files []string, fileCh chan string) error {
defer close(fileCh)
for _, fp := range files {
// Check if we should apply back-pressure
if p.backpressure.ShouldApplyBackpressure(ctx) {
p.backpressure.ApplyBackpressure(ctx)
}
// Wait for channel space if needed
p.backpressure.WaitForChannelSpace(ctx, fileCh, nil)
select {
case <-ctx.Done():
return ctx.Err()
case fileCh <- fp:
}
}
return nil
}
// waitForCompletion waits for all workers to complete.
func (p *Processor) waitForCompletion(wg *sync.WaitGroup, writeCh chan fileproc.WriteRequest, writerDone chan struct{}) {
wg.Wait()
close(writeCh)
<-writerDone
}

173
cli/ui.go Normal file
View File

@@ -0,0 +1,173 @@
package cli
import (
"fmt"
"io"
"os"
"time"
"github.com/fatih/color"
"github.com/schollz/progressbar/v3"
)
// UIManager handles CLI user interface elements.
type UIManager struct {
enableColors bool
enableProgress bool
progressBar *progressbar.ProgressBar
output io.Writer
}
// NewUIManager creates a new UI manager.
func NewUIManager() *UIManager {
return &UIManager{
enableColors: isColorTerminal(),
enableProgress: isInteractiveTerminal(),
output: os.Stderr, // Progress and colors go to stderr
}
}
// SetColorOutput enables or disables colored output.
func (ui *UIManager) SetColorOutput(enabled bool) {
ui.enableColors = enabled
color.NoColor = !enabled
}
// SetProgressOutput enables or disables progress bars.
func (ui *UIManager) SetProgressOutput(enabled bool) {
ui.enableProgress = enabled
}
// StartProgress initializes a progress bar for file processing.
func (ui *UIManager) StartProgress(total int, description string) {
if !ui.enableProgress || total <= 0 {
return
}
ui.progressBar = progressbar.NewOptions(total,
progressbar.OptionSetWriter(ui.output),
progressbar.OptionSetDescription(description),
progressbar.OptionSetTheme(progressbar.Theme{
Saucer: color.GreenString("█"),
SaucerHead: color.GreenString("█"),
SaucerPadding: " ",
BarStart: "[",
BarEnd: "]",
}),
progressbar.OptionShowCount(),
progressbar.OptionShowIts(),
progressbar.OptionSetWidth(40),
progressbar.OptionThrottle(100*time.Millisecond),
progressbar.OptionOnCompletion(func() {
_, _ = fmt.Fprint(ui.output, "\n")
}),
progressbar.OptionSetRenderBlankState(true),
)
}
// UpdateProgress increments the progress bar.
func (ui *UIManager) UpdateProgress(increment int) {
if ui.progressBar != nil {
_ = ui.progressBar.Add(increment)
}
}
// FinishProgress completes the progress bar.
func (ui *UIManager) FinishProgress() {
if ui.progressBar != nil {
_ = ui.progressBar.Finish()
ui.progressBar = nil
}
}
// PrintSuccess prints a success message in green.
func (ui *UIManager) PrintSuccess(format string, args ...interface{}) {
if ui.enableColors {
color.Green("✓ "+format, args...)
} else {
ui.printf("✓ "+format+"\n", args...)
}
}
// PrintError prints an error message in red.
func (ui *UIManager) PrintError(format string, args ...interface{}) {
if ui.enableColors {
color.Red("✗ "+format, args...)
} else {
ui.printf("✗ "+format+"\n", args...)
}
}
// PrintWarning prints a warning message in yellow.
func (ui *UIManager) PrintWarning(format string, args ...interface{}) {
if ui.enableColors {
color.Yellow("⚠ "+format, args...)
} else {
ui.printf("⚠ "+format+"\n", args...)
}
}
// PrintInfo prints an info message in blue.
func (ui *UIManager) PrintInfo(format string, args ...interface{}) {
if ui.enableColors {
color.Blue(" "+format, args...)
} else {
ui.printf(" "+format+"\n", args...)
}
}
// PrintHeader prints a header message in bold.
func (ui *UIManager) PrintHeader(format string, args ...interface{}) {
if ui.enableColors {
_, _ = color.New(color.Bold).Fprintf(ui.output, format+"\n", args...)
} else {
ui.printf(format+"\n", args...)
}
}
// isColorTerminal checks if the terminal supports colors.
func isColorTerminal() bool {
// Check common environment variables
term := os.Getenv("TERM")
if term == "" || term == "dumb" {
return false
}
// Check for CI environments that typically don't support colors
if os.Getenv("CI") != "" {
// GitHub Actions supports colors
if os.Getenv("GITHUB_ACTIONS") == "true" {
return true
}
// Most other CI systems don't
return false
}
// Check if NO_COLOR is set (https://no-color.org/)
if os.Getenv("NO_COLOR") != "" {
return false
}
// Check if FORCE_COLOR is set
if os.Getenv("FORCE_COLOR") != "" {
return true
}
// Default to true for interactive terminals
return isInteractiveTerminal()
}
// isInteractiveTerminal checks if we're running in an interactive terminal.
func isInteractiveTerminal() bool {
// Check if stderr is a terminal (where we output progress/colors)
fileInfo, err := os.Stderr.Stat()
if err != nil {
return false
}
return (fileInfo.Mode() & os.ModeCharDevice) != 0
}
// printf is a helper that ignores printf errors (for UI output).
func (ui *UIManager) printf(format string, args ...interface{}) {
_, _ = fmt.Fprintf(ui.output, format, args...)
}

145
cmd/benchmark/main.go Normal file
View File

@@ -0,0 +1,145 @@
// Package main provides a CLI for running gibidify benchmarks.
package main
import (
"flag"
"fmt"
"os"
"runtime"
"strings"
"github.com/ivuorinen/gibidify/benchmark"
"github.com/ivuorinen/gibidify/utils"
)
var (
sourceDir = flag.String("source", "", "Source directory to benchmark (uses temp files if empty)")
benchmarkType = flag.String("type", "all", "Benchmark type: all, collection, processing, concurrency, format")
format = flag.String("format", "json", "Output format for processing benchmarks")
concurrency = flag.Int("concurrency", runtime.NumCPU(), "Concurrency level for processing benchmarks")
concurrencyList = flag.String("concurrency-list", "1,2,4,8", "Comma-separated list of concurrency levels")
formatList = flag.String("format-list", "json,yaml,markdown", "Comma-separated list of formats")
numFiles = flag.Int("files", 100, "Number of files to create for benchmarks")
)
func main() {
flag.Parse()
if err := runBenchmarks(); err != nil {
fmt.Fprintf(os.Stderr, "Benchmark failed: %v\n", err)
os.Exit(1)
}
}
func runBenchmarks() error {
fmt.Printf("Running gibidify benchmarks...\n")
fmt.Printf("Source: %s\n", getSourceDescription())
fmt.Printf("Type: %s\n", *benchmarkType)
fmt.Printf("CPU cores: %d\n", runtime.NumCPU())
fmt.Println()
switch *benchmarkType {
case "all":
return benchmark.RunAllBenchmarks(*sourceDir)
case "collection":
return runCollectionBenchmark()
case "processing":
return runProcessingBenchmark()
case "concurrency":
return runConcurrencyBenchmark()
case "format":
return runFormatBenchmark()
default:
return utils.NewValidationError(utils.CodeValidationFormat, "invalid benchmark type: "+*benchmarkType)
}
}
func runCollectionBenchmark() error {
fmt.Println("Running file collection benchmark...")
result, err := benchmark.FileCollectionBenchmark(*sourceDir, *numFiles)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file collection benchmark failed")
}
benchmark.PrintBenchmarkResult(result)
return nil
}
func runProcessingBenchmark() error {
fmt.Printf("Running file processing benchmark (format: %s, concurrency: %d)...\n", *format, *concurrency)
result, err := benchmark.FileProcessingBenchmark(*sourceDir, *format, *concurrency)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "file processing benchmark failed")
}
benchmark.PrintBenchmarkResult(result)
return nil
}
func runConcurrencyBenchmark() error {
concurrencyLevels, err := parseConcurrencyList(*concurrencyList)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeValidation, utils.CodeValidationFormat, "invalid concurrency list")
}
fmt.Printf("Running concurrency benchmark (format: %s, levels: %v)...\n", *format, concurrencyLevels)
suite, err := benchmark.ConcurrencyBenchmark(*sourceDir, *format, concurrencyLevels)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "concurrency benchmark failed")
}
benchmark.PrintBenchmarkSuite(suite)
return nil
}
func runFormatBenchmark() error {
formats := parseFormatList(*formatList)
fmt.Printf("Running format benchmark (formats: %v)...\n", formats)
suite, err := benchmark.FormatBenchmark(*sourceDir, formats)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingCollection, "format benchmark failed")
}
benchmark.PrintBenchmarkSuite(suite)
return nil
}
func getSourceDescription() string {
if *sourceDir == "" {
return fmt.Sprintf("temporary files (%d files)", *numFiles)
}
return *sourceDir
}
func parseConcurrencyList(list string) ([]int, error) {
parts := strings.Split(list, ",")
levels := make([]int, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(part)
var level int
if _, err := fmt.Sscanf(part, "%d", &level); err != nil {
return nil, utils.WrapErrorf(err, utils.ErrorTypeValidation, utils.CodeValidationFormat, "invalid concurrency level: %s", part)
}
if level <= 0 {
return nil, utils.NewValidationError(utils.CodeValidationFormat, "concurrency level must be positive: "+part)
}
levels = append(levels, level)
}
if len(levels) == 0 {
return nil, utils.NewValidationError(utils.CodeValidationFormat, "no valid concurrency levels found")
}
return levels, nil
}
func parseFormatList(list string) []string {
parts := strings.Split(list, ",")
formats := make([]string, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(part)
if part != "" {
formats = append(formats, part)
}
}
return formats
}

84
config.example.yaml Normal file
View File

@@ -0,0 +1,84 @@
# gibidify configuration example
# Place this file in one of these locations:
# - $XDG_CONFIG_HOME/gibidify/config.yaml
# - $HOME/.config/gibidify/config.yaml
# - Current directory (if no gibidify.yaml output file exists)
# File size limit in bytes (default: 5MB)
fileSizeLimit: 5242880
# Directories to ignore during scanning
ignoreDirectories:
- vendor
- node_modules
- .git
- dist
- build
- target
- bower_components
- cache
- tmp
- .next
- .nuxt
# FileType registry configuration
fileTypes:
# Enable/disable file type detection entirely (default: true)
enabled: true
# Add custom image extensions
customImageExtensions:
- .webp
- .avif
- .heic
- .jxl
# Add custom binary extensions
customBinaryExtensions:
- .custom
- .proprietary
- .blob
# Add custom language mappings
customLanguages:
.zig: zig
.odin: odin
.v: vlang
.grain: grain
.gleam: gleam
.roc: roc
.janet: janet
.fennel: fennel
.wast: wast
.wat: wat
# Disable specific default image extensions
disabledImageExtensions:
- .bmp # Disable bitmap support
- .tif # Disable TIFF support
# Disable specific default binary extensions
disabledBinaryExtensions:
- .exe # Don't treat executables as binary
- .dll # Don't treat DLL files as binary
# Disable specific default language extensions
disabledLanguageExtensions:
- .bat # Don't detect batch files
- .cmd # Don't detect command files
# Maximum concurrency (optional)
maxConcurrency: 16
# Supported output formats (optional validation)
supportedFormats:
- json
- yaml
- markdown
# File patterns for filtering (optional)
filePatterns:
- "*.go"
- "*.py"
- "*.js"
- "*.ts"

79
config.yaml.example Normal file
View File

@@ -0,0 +1,79 @@
# Gibidify Configuration Example
# This file demonstrates all available configuration options
# File size limit for individual files (in bytes)
# Default: 5242880 (5MB), Min: 1024 (1KB), Max: 104857600 (100MB)
fileSizeLimit: 5242880
# Directories to ignore during traversal
ignoreDirectories:
- vendor
- node_modules
- .git
- dist
- build
- target
- bower_components
- cache
- tmp
# File type detection and filtering
fileTypes:
enabled: true
customImageExtensions: []
customBinaryExtensions: []
customLanguages: {}
disabledImageExtensions: []
disabledBinaryExtensions: []
disabledLanguageExtensions: []
# Back-pressure management for memory optimization
backpressure:
enabled: true
maxPendingFiles: 1000 # Max files in channel buffer
maxPendingWrites: 100 # Max writes in channel buffer
maxMemoryUsage: 104857600 # 100MB soft memory limit
memoryCheckInterval: 1000 # Check memory every N files
# Resource limits for DoS protection and security
resourceLimits:
enabled: true
# File processing limits
maxFiles: 10000 # Maximum number of files to process
maxTotalSize: 1073741824 # Maximum total size (1GB)
# Timeout limits (in seconds)
fileProcessingTimeoutSec: 30 # Timeout for individual file processing
overallTimeoutSec: 3600 # Overall processing timeout (1 hour)
# Concurrency limits
maxConcurrentReads: 10 # Maximum concurrent file reading operations
# Rate limiting (0 = disabled)
rateLimitFilesPerSec: 0 # Files per second rate limit
# Memory limits
hardMemoryLimitMB: 512 # Hard memory limit (512MB)
# Safety features
enableGracefulDegradation: true # Enable graceful degradation on resource pressure
enableResourceMonitoring: true # Enable detailed resource monitoring
# Optional: Maximum concurrency for workers
# Default: number of CPU cores
# maxConcurrency: 4
# Optional: Supported output formats
# Default: ["json", "yaml", "markdown"]
# supportedFormats:
# - json
# - yaml
# - markdown
# Optional: File patterns to include
# Default: all files (empty list means no pattern filtering)
# filePatterns:
# - "*.go"
# - "*.py"
# - "*.js"

View File

@@ -1,53 +1,5 @@
// Package config handles application configuration using Viper.
// This file contains the main configuration orchestration logic.
package config
import (
"os"
"path/filepath"
"github.com/sirupsen/logrus"
"github.com/spf13/viper"
)
// LoadConfig reads configuration from a YAML file.
// It looks for config in the following order:
// 1. $XDG_CONFIG_HOME/gibidify/config.yaml
// 2. $HOME/.config/gibidify/config.yaml
// 3. The current directory as fallback.
func LoadConfig() {
viper.SetConfigName("config")
viper.SetConfigType("yaml")
if xdgConfig := os.Getenv("XDG_CONFIG_HOME"); xdgConfig != "" {
viper.AddConfigPath(filepath.Join(xdgConfig, "gibidify"))
} else if home, err := os.UserHomeDir(); err == nil {
viper.AddConfigPath(filepath.Join(home, ".config", "gibidify"))
}
viper.AddConfigPath(".")
if err := viper.ReadInConfig(); err != nil {
logrus.Infof("Config file not found, using default values: %v", err)
setDefaultConfig()
} else {
logrus.Infof("Using config file: %s", viper.ConfigFileUsed())
}
}
// setDefaultConfig sets default configuration values.
func setDefaultConfig() {
viper.SetDefault("fileSizeLimit", 5242880) // 5 MB
// Default ignored directories.
viper.SetDefault("ignoreDirectories", []string{
"vendor", "node_modules", ".git", "dist", "build", "target", "bower_components", "cache", "tmp",
})
}
// GetFileSizeLimit returns the file size limit from configuration.
func GetFileSizeLimit() int64 {
return viper.GetInt64("fileSizeLimit")
}
// GetIgnoredDirectories returns the list of directories to ignore.
func GetIgnoredDirectories() []string {
return viper.GetStringSlice("ignoreDirectories")
}
// This file is now a minimal orchestration layer that delegates to the modular components.

View File

@@ -0,0 +1,174 @@
package config
import (
"testing"
"github.com/spf13/viper"
)
// TestFileTypeRegistryConfig tests the FileTypeRegistry configuration functionality.
func TestFileTypeRegistryConfig(t *testing.T) {
// Test default values
t.Run("DefaultValues", func(t *testing.T) {
viper.Reset()
setDefaultConfig()
if !GetFileTypesEnabled() {
t.Error("Expected file types to be enabled by default")
}
if len(GetCustomImageExtensions()) != 0 {
t.Error("Expected custom image extensions to be empty by default")
}
if len(GetCustomBinaryExtensions()) != 0 {
t.Error("Expected custom binary extensions to be empty by default")
}
if len(GetCustomLanguages()) != 0 {
t.Error("Expected custom languages to be empty by default")
}
if len(GetDisabledImageExtensions()) != 0 {
t.Error("Expected disabled image extensions to be empty by default")
}
if len(GetDisabledBinaryExtensions()) != 0 {
t.Error("Expected disabled binary extensions to be empty by default")
}
if len(GetDisabledLanguageExtensions()) != 0 {
t.Error("Expected disabled language extensions to be empty by default")
}
})
// Test configuration setting and getting
t.Run("ConfigurationSetGet", func(t *testing.T) {
viper.Reset()
// Set test values
viper.Set("fileTypes.enabled", false)
viper.Set("fileTypes.customImageExtensions", []string{".webp", ".avif"})
viper.Set("fileTypes.customBinaryExtensions", []string{".custom", ".mybin"})
viper.Set("fileTypes.customLanguages", map[string]string{
".zig": "zig",
".v": "vlang",
})
viper.Set("fileTypes.disabledImageExtensions", []string{".gif", ".bmp"})
viper.Set("fileTypes.disabledBinaryExtensions", []string{".exe", ".dll"})
viper.Set("fileTypes.disabledLanguageExtensions", []string{".rb", ".pl"})
// Test getter functions
if GetFileTypesEnabled() {
t.Error("Expected file types to be disabled")
}
customImages := GetCustomImageExtensions()
expectedImages := []string{".webp", ".avif"}
if len(customImages) != len(expectedImages) {
t.Errorf("Expected %d custom image extensions, got %d", len(expectedImages), len(customImages))
}
for i, ext := range expectedImages {
if customImages[i] != ext {
t.Errorf("Expected custom image extension %s, got %s", ext, customImages[i])
}
}
customBinary := GetCustomBinaryExtensions()
expectedBinary := []string{".custom", ".mybin"}
if len(customBinary) != len(expectedBinary) {
t.Errorf("Expected %d custom binary extensions, got %d", len(expectedBinary), len(customBinary))
}
for i, ext := range expectedBinary {
if customBinary[i] != ext {
t.Errorf("Expected custom binary extension %s, got %s", ext, customBinary[i])
}
}
customLangs := GetCustomLanguages()
expectedLangs := map[string]string{
".zig": "zig",
".v": "vlang",
}
if len(customLangs) != len(expectedLangs) {
t.Errorf("Expected %d custom languages, got %d", len(expectedLangs), len(customLangs))
}
for ext, lang := range expectedLangs {
if customLangs[ext] != lang {
t.Errorf("Expected custom language %s -> %s, got %s", ext, lang, customLangs[ext])
}
}
disabledImages := GetDisabledImageExtensions()
expectedDisabledImages := []string{".gif", ".bmp"}
if len(disabledImages) != len(expectedDisabledImages) {
t.Errorf("Expected %d disabled image extensions, got %d", len(expectedDisabledImages), len(disabledImages))
}
disabledBinary := GetDisabledBinaryExtensions()
expectedDisabledBinary := []string{".exe", ".dll"}
if len(disabledBinary) != len(expectedDisabledBinary) {
t.Errorf("Expected %d disabled binary extensions, got %d", len(expectedDisabledBinary), len(disabledBinary))
}
disabledLangs := GetDisabledLanguageExtensions()
expectedDisabledLangs := []string{".rb", ".pl"}
if len(disabledLangs) != len(expectedDisabledLangs) {
t.Errorf("Expected %d disabled language extensions, got %d", len(expectedDisabledLangs), len(disabledLangs))
}
})
// Test validation
t.Run("ValidationSuccess", func(t *testing.T) {
viper.Reset()
setDefaultConfig()
// Set valid configuration
viper.Set("fileTypes.customImageExtensions", []string{".webp", ".avif"})
viper.Set("fileTypes.customBinaryExtensions", []string{".custom"})
viper.Set("fileTypes.customLanguages", map[string]string{
".zig": "zig",
".v": "vlang",
})
err := ValidateConfig()
if err != nil {
t.Errorf("Expected validation to pass with valid config, got error: %v", err)
}
})
t.Run("ValidationFailure", func(t *testing.T) {
// Test invalid custom image extensions
viper.Reset()
setDefaultConfig()
viper.Set("fileTypes.customImageExtensions", []string{"", "webp"}) // Empty and missing dot
err := ValidateConfig()
if err == nil {
t.Error("Expected validation to fail with invalid custom image extensions")
}
// Test invalid custom binary extensions
viper.Reset()
setDefaultConfig()
viper.Set("fileTypes.customBinaryExtensions", []string{"custom"}) // Missing dot
err = ValidateConfig()
if err == nil {
t.Error("Expected validation to fail with invalid custom binary extensions")
}
// Test invalid custom languages
viper.Reset()
setDefaultConfig()
viper.Set("fileTypes.customLanguages", map[string]string{
"zig": "zig", // Missing dot in extension
".v": "", // Empty language
})
err = ValidateConfig()
if err == nil {
t.Error("Expected validation to fail with invalid custom languages")
}
})
}

61
config/constants.go Normal file
View File

@@ -0,0 +1,61 @@
package config
const (
// DefaultFileSizeLimit is the default maximum file size (5MB).
DefaultFileSizeLimit = 5242880
// MinFileSizeLimit is the minimum allowed file size limit (1KB).
MinFileSizeLimit = 1024
// MaxFileSizeLimit is the maximum allowed file size limit (100MB).
MaxFileSizeLimit = 104857600
// Resource Limit Constants
// DefaultMaxFiles is the default maximum number of files to process.
DefaultMaxFiles = 10000
// MinMaxFiles is the minimum allowed file count limit.
MinMaxFiles = 1
// MaxMaxFiles is the maximum allowed file count limit.
MaxMaxFiles = 1000000
// DefaultMaxTotalSize is the default maximum total size of files (1GB).
DefaultMaxTotalSize = 1073741824
// MinMaxTotalSize is the minimum allowed total size limit (1MB).
MinMaxTotalSize = 1048576
// MaxMaxTotalSize is the maximum allowed total size limit (100GB).
MaxMaxTotalSize = 107374182400
// DefaultFileProcessingTimeoutSec is the default timeout for individual file processing (30 seconds).
DefaultFileProcessingTimeoutSec = 30
// MinFileProcessingTimeoutSec is the minimum allowed file processing timeout (1 second).
MinFileProcessingTimeoutSec = 1
// MaxFileProcessingTimeoutSec is the maximum allowed file processing timeout (300 seconds).
MaxFileProcessingTimeoutSec = 300
// DefaultOverallTimeoutSec is the default timeout for overall processing (3600 seconds = 1 hour).
DefaultOverallTimeoutSec = 3600
// MinOverallTimeoutSec is the minimum allowed overall timeout (10 seconds).
MinOverallTimeoutSec = 10
// MaxOverallTimeoutSec is the maximum allowed overall timeout (86400 seconds = 24 hours).
MaxOverallTimeoutSec = 86400
// DefaultMaxConcurrentReads is the default maximum concurrent file reading operations.
DefaultMaxConcurrentReads = 10
// MinMaxConcurrentReads is the minimum allowed concurrent reads.
MinMaxConcurrentReads = 1
// MaxMaxConcurrentReads is the maximum allowed concurrent reads.
MaxMaxConcurrentReads = 100
// DefaultRateLimitFilesPerSec is the default rate limit for file processing (0 = disabled).
DefaultRateLimitFilesPerSec = 0
// MinRateLimitFilesPerSec is the minimum rate limit.
MinRateLimitFilesPerSec = 0
// MaxRateLimitFilesPerSec is the maximum rate limit.
MaxRateLimitFilesPerSec = 10000
// DefaultHardMemoryLimitMB is the default hard memory limit (512MB).
DefaultHardMemoryLimitMB = 512
// MinHardMemoryLimitMB is the minimum hard memory limit (64MB).
MinHardMemoryLimitMB = 64
// MaxHardMemoryLimitMB is the maximum hard memory limit (8192MB = 8GB).
MaxHardMemoryLimitMB = 8192
)

157
config/getters.go Normal file
View File

@@ -0,0 +1,157 @@
package config
import (
"strings"
"github.com/spf13/viper"
)
// GetFileSizeLimit returns the file size limit from configuration.
func GetFileSizeLimit() int64 {
return viper.GetInt64("fileSizeLimit")
}
// GetIgnoredDirectories returns the list of directories to ignore.
func GetIgnoredDirectories() []string {
return viper.GetStringSlice("ignoreDirectories")
}
// GetMaxConcurrency returns the maximum concurrency level.
func GetMaxConcurrency() int {
return viper.GetInt("maxConcurrency")
}
// GetSupportedFormats returns the list of supported output formats.
func GetSupportedFormats() []string {
return viper.GetStringSlice("supportedFormats")
}
// GetFilePatterns returns the list of file patterns.
func GetFilePatterns() []string {
return viper.GetStringSlice("filePatterns")
}
// IsValidFormat checks if the given format is valid.
func IsValidFormat(format string) bool {
format = strings.ToLower(strings.TrimSpace(format))
supportedFormats := map[string]bool{
"json": true,
"yaml": true,
"markdown": true,
}
return supportedFormats[format]
}
// GetFileTypesEnabled returns whether file types are enabled.
func GetFileTypesEnabled() bool {
return viper.GetBool("fileTypes.enabled")
}
// GetCustomImageExtensions returns custom image extensions.
func GetCustomImageExtensions() []string {
return viper.GetStringSlice("fileTypes.customImageExtensions")
}
// GetCustomBinaryExtensions returns custom binary extensions.
func GetCustomBinaryExtensions() []string {
return viper.GetStringSlice("fileTypes.customBinaryExtensions")
}
// GetCustomLanguages returns custom language mappings.
func GetCustomLanguages() map[string]string {
return viper.GetStringMapString("fileTypes.customLanguages")
}
// GetDisabledImageExtensions returns disabled image extensions.
func GetDisabledImageExtensions() []string {
return viper.GetStringSlice("fileTypes.disabledImageExtensions")
}
// GetDisabledBinaryExtensions returns disabled binary extensions.
func GetDisabledBinaryExtensions() []string {
return viper.GetStringSlice("fileTypes.disabledBinaryExtensions")
}
// GetDisabledLanguageExtensions returns disabled language extensions.
func GetDisabledLanguageExtensions() []string {
return viper.GetStringSlice("fileTypes.disabledLanguageExtensions")
}
// Backpressure getters
// GetBackpressureEnabled returns whether backpressure is enabled.
func GetBackpressureEnabled() bool {
return viper.GetBool("backpressure.enabled")
}
// GetMaxPendingFiles returns the maximum pending files.
func GetMaxPendingFiles() int {
return viper.GetInt("backpressure.maxPendingFiles")
}
// GetMaxPendingWrites returns the maximum pending writes.
func GetMaxPendingWrites() int {
return viper.GetInt("backpressure.maxPendingWrites")
}
// GetMaxMemoryUsage returns the maximum memory usage.
func GetMaxMemoryUsage() int64 {
return viper.GetInt64("backpressure.maxMemoryUsage")
}
// GetMemoryCheckInterval returns the memory check interval.
func GetMemoryCheckInterval() int {
return viper.GetInt("backpressure.memoryCheckInterval")
}
// Resource limits getters
// GetResourceLimitsEnabled returns whether resource limits are enabled.
func GetResourceLimitsEnabled() bool {
return viper.GetBool("resourceLimits.enabled")
}
// GetMaxFiles returns the maximum number of files.
func GetMaxFiles() int {
return viper.GetInt("resourceLimits.maxFiles")
}
// GetMaxTotalSize returns the maximum total size.
func GetMaxTotalSize() int64 {
return viper.GetInt64("resourceLimits.maxTotalSize")
}
// GetFileProcessingTimeoutSec returns the file processing timeout in seconds.
func GetFileProcessingTimeoutSec() int {
return viper.GetInt("resourceLimits.fileProcessingTimeoutSec")
}
// GetOverallTimeoutSec returns the overall timeout in seconds.
func GetOverallTimeoutSec() int {
return viper.GetInt("resourceLimits.overallTimeoutSec")
}
// GetMaxConcurrentReads returns the maximum concurrent reads.
func GetMaxConcurrentReads() int {
return viper.GetInt("resourceLimits.maxConcurrentReads")
}
// GetRateLimitFilesPerSec returns the rate limit files per second.
func GetRateLimitFilesPerSec() int {
return viper.GetInt("resourceLimits.rateLimitFilesPerSec")
}
// GetHardMemoryLimitMB returns the hard memory limit in MB.
func GetHardMemoryLimitMB() int {
return viper.GetInt("resourceLimits.hardMemoryLimitMB")
}
// GetEnableGracefulDegradation returns whether graceful degradation is enabled.
func GetEnableGracefulDegradation() bool {
return viper.GetBool("resourceLimits.enableGracefulDegradation")
}
// GetEnableResourceMonitoring returns whether resource monitoring is enabled.
func GetEnableResourceMonitoring() bool {
return viper.GetBool("resourceLimits.enableResourceMonitoring")
}

90
config/loader.go Normal file
View File

@@ -0,0 +1,90 @@
package config
import (
"os"
"path/filepath"
"github.com/sirupsen/logrus"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/utils"
)
// LoadConfig reads configuration from a YAML file.
// It looks for config in the following order:
// 1. $XDG_CONFIG_HOME/gibidify/config.yaml
// 2. $HOME/.config/gibidify/config.yaml
// 3. The current directory as fallback.
func LoadConfig() {
viper.SetConfigName("config")
viper.SetConfigType("yaml")
if xdgConfig := os.Getenv("XDG_CONFIG_HOME"); xdgConfig != "" {
// Validate XDG_CONFIG_HOME for path traversal attempts
if err := utils.ValidateConfigPath(xdgConfig); err != nil {
logrus.Warnf("Invalid XDG_CONFIG_HOME path, using default config: %v", err)
} else {
configPath := filepath.Join(xdgConfig, "gibidify")
viper.AddConfigPath(configPath)
}
} else if home, err := os.UserHomeDir(); err == nil {
viper.AddConfigPath(filepath.Join(home, ".config", "gibidify"))
}
// Only add current directory if no config file named gibidify.yaml exists
// to avoid conflicts with the project's output file
if _, err := os.Stat("gibidify.yaml"); os.IsNotExist(err) {
viper.AddConfigPath(".")
}
if err := viper.ReadInConfig(); err != nil {
logrus.Infof("Config file not found, using default values: %v", err)
setDefaultConfig()
} else {
logrus.Infof("Using config file: %s", viper.ConfigFileUsed())
// Validate configuration after loading
if err := ValidateConfig(); err != nil {
logrus.Warnf("Configuration validation failed: %v", err)
logrus.Info("Falling back to default configuration")
// Reset viper and set defaults when validation fails
viper.Reset()
setDefaultConfig()
}
}
}
// setDefaultConfig sets default configuration values.
func setDefaultConfig() {
viper.SetDefault("fileSizeLimit", DefaultFileSizeLimit)
// Default ignored directories.
viper.SetDefault("ignoreDirectories", []string{
"vendor", "node_modules", ".git", "dist", "build", "target", "bower_components", "cache", "tmp",
})
// FileTypeRegistry defaults
viper.SetDefault("fileTypes.enabled", true)
viper.SetDefault("fileTypes.customImageExtensions", []string{})
viper.SetDefault("fileTypes.customBinaryExtensions", []string{})
viper.SetDefault("fileTypes.customLanguages", map[string]string{})
viper.SetDefault("fileTypes.disabledImageExtensions", []string{})
viper.SetDefault("fileTypes.disabledBinaryExtensions", []string{})
viper.SetDefault("fileTypes.disabledLanguageExtensions", []string{})
// Back-pressure and memory management defaults
viper.SetDefault("backpressure.enabled", true)
viper.SetDefault("backpressure.maxPendingFiles", 1000) // Max files in file channel buffer
viper.SetDefault("backpressure.maxPendingWrites", 100) // Max writes in write channel buffer
viper.SetDefault("backpressure.maxMemoryUsage", 104857600) // 100MB max memory usage
viper.SetDefault("backpressure.memoryCheckInterval", 1000) // Check memory every 1000 files
// Resource limit defaults
viper.SetDefault("resourceLimits.enabled", true)
viper.SetDefault("resourceLimits.maxFiles", DefaultMaxFiles)
viper.SetDefault("resourceLimits.maxTotalSize", DefaultMaxTotalSize)
viper.SetDefault("resourceLimits.fileProcessingTimeoutSec", DefaultFileProcessingTimeoutSec)
viper.SetDefault("resourceLimits.overallTimeoutSec", DefaultOverallTimeoutSec)
viper.SetDefault("resourceLimits.maxConcurrentReads", DefaultMaxConcurrentReads)
viper.SetDefault("resourceLimits.rateLimitFilesPerSec", DefaultRateLimitFilesPerSec)
viper.SetDefault("resourceLimits.hardMemoryLimitMB", DefaultHardMemoryLimitMB)
viper.SetDefault("resourceLimits.enableGracefulDegradation", true)
viper.SetDefault("resourceLimits.enableResourceMonitoring", true)
}

120
config/loader_test.go Normal file
View File

@@ -0,0 +1,120 @@
package config_test
import (
"os"
"testing"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/testutil"
)
const (
defaultFileSizeLimit = 5242880
testFileSizeLimit = 123456
)
// TestDefaultConfig verifies that if no config file is found,
// the default configuration values are correctly set.
func TestDefaultConfig(t *testing.T) {
// Create a temporary directory to ensure no config file is present.
tmpDir := t.TempDir()
// Point Viper to the temp directory with no config file.
originalConfigPaths := viper.ConfigFileUsed()
testutil.ResetViperConfig(t, tmpDir)
// Check defaults
defaultSizeLimit := config.GetFileSizeLimit()
if defaultSizeLimit != defaultFileSizeLimit {
t.Errorf("Expected default file size limit of 5242880, got %d", defaultSizeLimit)
}
ignoredDirs := config.GetIgnoredDirectories()
if len(ignoredDirs) == 0 {
t.Errorf("Expected some default ignored directories, got none")
}
// Restore Viper state
viper.SetConfigFile(originalConfigPaths)
}
// TestLoadConfigFile verifies that when a valid config file is present,
// viper loads the specified values correctly.
func TestLoadConfigFile(t *testing.T) {
tmpDir := t.TempDir()
// Prepare a minimal config file
configContent := []byte(`---
fileSizeLimit: 123456
ignoreDirectories:
- "testdir1"
- "testdir2"
`)
testutil.CreateTestFile(t, tmpDir, "config.yaml", configContent)
// Reset viper and point to the new config path
viper.Reset()
viper.AddConfigPath(tmpDir)
// Force Viper to read our config file
testutil.MustSucceed(t, viper.ReadInConfig(), "reading config file")
// Validate loaded data
if got := viper.GetInt64("fileSizeLimit"); got != testFileSizeLimit {
t.Errorf("Expected fileSizeLimit=123456, got %d", got)
}
ignored := viper.GetStringSlice("ignoreDirectories")
if len(ignored) != 2 || ignored[0] != "testdir1" || ignored[1] != "testdir2" {
t.Errorf("Expected [\"testdir1\", \"testdir2\"], got %v", ignored)
}
}
// TestLoadConfigWithValidation tests that invalid config files fall back to defaults.
func TestLoadConfigWithValidation(t *testing.T) {
// Create a temporary config file with invalid content
configContent := `
fileSizeLimit: 100
ignoreDirectories:
- node_modules
- ""
- .git
`
tempDir := t.TempDir()
configFile := tempDir + "/config.yaml"
err := os.WriteFile(configFile, []byte(configContent), 0o644)
if err != nil {
t.Fatalf("Failed to write config file: %v", err)
}
// Reset viper and set config path
viper.Reset()
viper.AddConfigPath(tempDir)
// This should load the config but validation should fail and fall back to defaults
config.LoadConfig()
// Should have fallen back to defaults due to validation failure
if config.GetFileSizeLimit() != int64(config.DefaultFileSizeLimit) {
t.Errorf("Expected default file size limit after validation failure, got %d", config.GetFileSizeLimit())
}
if containsString(config.GetIgnoredDirectories(), "") {
t.Errorf("Expected ignored directories not to contain empty string after validation failure, got %v", config.GetIgnoredDirectories())
}
}
// Helper functions
func containsString(slice []string, item string) bool {
for _, s := range slice {
if s == item {
return true
}
}
return false
}

307
config/validation.go Normal file
View File

@@ -0,0 +1,307 @@
package config
import (
"fmt"
"strings"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/utils"
)
// ValidateConfig validates the loaded configuration.
func ValidateConfig() error {
var validationErrors []string
// Validate file size limit
fileSizeLimit := viper.GetInt64("fileSizeLimit")
if fileSizeLimit < MinFileSizeLimit {
validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) is below minimum (%d)", fileSizeLimit, MinFileSizeLimit))
}
if fileSizeLimit > MaxFileSizeLimit {
validationErrors = append(validationErrors, fmt.Sprintf("fileSizeLimit (%d) exceeds maximum (%d)", fileSizeLimit, MaxFileSizeLimit))
}
// Validate ignore directories
ignoreDirectories := viper.GetStringSlice("ignoreDirectories")
for i, dir := range ignoreDirectories {
dir = strings.TrimSpace(dir)
if dir == "" {
validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] is empty", i))
continue
}
if strings.Contains(dir, "/") {
validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) contains path separator - only directory names are allowed", i, dir))
}
if strings.HasPrefix(dir, ".") && dir != ".git" && dir != ".vscode" && dir != ".idea" {
validationErrors = append(validationErrors, fmt.Sprintf("ignoreDirectories[%d] (%s) starts with dot - this may cause unexpected behavior", i, dir))
}
}
// Validate supported output formats if configured
if viper.IsSet("supportedFormats") {
supportedFormats := viper.GetStringSlice("supportedFormats")
validFormats := map[string]bool{"json": true, "yaml": true, "markdown": true}
for i, format := range supportedFormats {
format = strings.ToLower(strings.TrimSpace(format))
if !validFormats[format] {
validationErrors = append(validationErrors, fmt.Sprintf("supportedFormats[%d] (%s) is not a valid format (json, yaml, markdown)", i, format))
}
}
}
// Validate concurrency settings if configured
if viper.IsSet("maxConcurrency") {
maxConcurrency := viper.GetInt("maxConcurrency")
if maxConcurrency < 1 {
validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) must be at least 1", maxConcurrency))
}
if maxConcurrency > 100 {
validationErrors = append(validationErrors, fmt.Sprintf("maxConcurrency (%d) is unreasonably high (max 100)", maxConcurrency))
}
}
// Validate file patterns if configured
if viper.IsSet("filePatterns") {
filePatterns := viper.GetStringSlice("filePatterns")
for i, pattern := range filePatterns {
pattern = strings.TrimSpace(pattern)
if pattern == "" {
validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] is empty", i))
continue
}
// Basic validation - patterns should contain at least one alphanumeric character
if !strings.ContainsAny(pattern, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") {
validationErrors = append(validationErrors, fmt.Sprintf("filePatterns[%d] (%s) appears to be invalid", i, pattern))
}
}
}
// Validate FileTypeRegistry configuration
if viper.IsSet("fileTypes.customImageExtensions") {
customImages := viper.GetStringSlice("fileTypes.customImageExtensions")
for i, ext := range customImages {
ext = strings.TrimSpace(ext)
if ext == "" {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] is empty", i))
continue
}
if !strings.HasPrefix(ext, ".") {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customImageExtensions[%d] (%s) must start with a dot", i, ext))
}
}
}
if viper.IsSet("fileTypes.customBinaryExtensions") {
customBinary := viper.GetStringSlice("fileTypes.customBinaryExtensions")
for i, ext := range customBinary {
ext = strings.TrimSpace(ext)
if ext == "" {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] is empty", i))
continue
}
if !strings.HasPrefix(ext, ".") {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customBinaryExtensions[%d] (%s) must start with a dot", i, ext))
}
}
}
if viper.IsSet("fileTypes.customLanguages") {
customLangs := viper.GetStringMapString("fileTypes.customLanguages")
for ext, lang := range customLangs {
ext = strings.TrimSpace(ext)
lang = strings.TrimSpace(lang)
if ext == "" {
validationErrors = append(validationErrors, "fileTypes.customLanguages contains empty extension key")
continue
}
if !strings.HasPrefix(ext, ".") {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages extension (%s) must start with a dot", ext))
}
if lang == "" {
validationErrors = append(validationErrors, fmt.Sprintf("fileTypes.customLanguages[%s] has empty language value", ext))
}
}
}
// Validate back-pressure configuration
if viper.IsSet("backpressure.maxPendingFiles") {
maxPendingFiles := viper.GetInt("backpressure.maxPendingFiles")
if maxPendingFiles < 1 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) must be at least 1", maxPendingFiles))
}
if maxPendingFiles > 100000 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingFiles (%d) is unreasonably high (max 100000)", maxPendingFiles))
}
}
if viper.IsSet("backpressure.maxPendingWrites") {
maxPendingWrites := viper.GetInt("backpressure.maxPendingWrites")
if maxPendingWrites < 1 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) must be at least 1", maxPendingWrites))
}
if maxPendingWrites > 10000 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxPendingWrites (%d) is unreasonably high (max 10000)", maxPendingWrites))
}
}
if viper.IsSet("backpressure.maxMemoryUsage") {
maxMemoryUsage := viper.GetInt64("backpressure.maxMemoryUsage")
if maxMemoryUsage < 1048576 { // 1MB minimum
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) must be at least 1MB (1048576 bytes)", maxMemoryUsage))
}
if maxMemoryUsage > 10737418240 { // 10GB maximum
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.maxMemoryUsage (%d) is unreasonably high (max 10GB)", maxMemoryUsage))
}
}
if viper.IsSet("backpressure.memoryCheckInterval") {
interval := viper.GetInt("backpressure.memoryCheckInterval")
if interval < 1 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) must be at least 1", interval))
}
if interval > 100000 {
validationErrors = append(validationErrors, fmt.Sprintf("backpressure.memoryCheckInterval (%d) is unreasonably high (max 100000)", interval))
}
}
// Validate resource limits configuration
if viper.IsSet("resourceLimits.maxFiles") {
maxFiles := viper.GetInt("resourceLimits.maxFiles")
if maxFiles < MinMaxFiles {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxFiles (%d) must be at least %d", maxFiles, MinMaxFiles))
}
if maxFiles > MaxMaxFiles {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxFiles (%d) exceeds maximum (%d)", maxFiles, MaxMaxFiles))
}
}
if viper.IsSet("resourceLimits.maxTotalSize") {
maxTotalSize := viper.GetInt64("resourceLimits.maxTotalSize")
if maxTotalSize < MinMaxTotalSize {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxTotalSize (%d) must be at least %d", maxTotalSize, MinMaxTotalSize))
}
if maxTotalSize > MaxMaxTotalSize {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxTotalSize (%d) exceeds maximum (%d)", maxTotalSize, MaxMaxTotalSize))
}
}
if viper.IsSet("resourceLimits.fileProcessingTimeoutSec") {
timeout := viper.GetInt("resourceLimits.fileProcessingTimeoutSec")
if timeout < MinFileProcessingTimeoutSec {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.fileProcessingTimeoutSec (%d) must be at least %d", timeout, MinFileProcessingTimeoutSec))
}
if timeout > MaxFileProcessingTimeoutSec {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.fileProcessingTimeoutSec (%d) exceeds maximum (%d)", timeout, MaxFileProcessingTimeoutSec))
}
}
if viper.IsSet("resourceLimits.overallTimeoutSec") {
timeout := viper.GetInt("resourceLimits.overallTimeoutSec")
if timeout < MinOverallTimeoutSec {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.overallTimeoutSec (%d) must be at least %d", timeout, MinOverallTimeoutSec))
}
if timeout > MaxOverallTimeoutSec {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.overallTimeoutSec (%d) exceeds maximum (%d)", timeout, MaxOverallTimeoutSec))
}
}
if viper.IsSet("resourceLimits.maxConcurrentReads") {
maxReads := viper.GetInt("resourceLimits.maxConcurrentReads")
if maxReads < MinMaxConcurrentReads {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxConcurrentReads (%d) must be at least %d", maxReads, MinMaxConcurrentReads))
}
if maxReads > MaxMaxConcurrentReads {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.maxConcurrentReads (%d) exceeds maximum (%d)", maxReads, MaxMaxConcurrentReads))
}
}
if viper.IsSet("resourceLimits.rateLimitFilesPerSec") {
rateLimit := viper.GetInt("resourceLimits.rateLimitFilesPerSec")
if rateLimit < MinRateLimitFilesPerSec {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.rateLimitFilesPerSec (%d) must be at least %d", rateLimit, MinRateLimitFilesPerSec))
}
if rateLimit > MaxRateLimitFilesPerSec {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.rateLimitFilesPerSec (%d) exceeds maximum (%d)", rateLimit, MaxRateLimitFilesPerSec))
}
}
if viper.IsSet("resourceLimits.hardMemoryLimitMB") {
memLimit := viper.GetInt("resourceLimits.hardMemoryLimitMB")
if memLimit < MinHardMemoryLimitMB {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.hardMemoryLimitMB (%d) must be at least %d", memLimit, MinHardMemoryLimitMB))
}
if memLimit > MaxHardMemoryLimitMB {
validationErrors = append(validationErrors, fmt.Sprintf("resourceLimits.hardMemoryLimitMB (%d) exceeds maximum (%d)", memLimit, MaxHardMemoryLimitMB))
}
}
if len(validationErrors) > 0 {
return utils.NewStructuredError(
utils.ErrorTypeConfiguration,
utils.CodeConfigValidation,
"configuration validation failed: "+strings.Join(validationErrors, "; "),
"",
map[string]interface{}{"validation_errors": validationErrors},
)
}
return nil
}
// ValidateFileSize checks if a file size is within the configured limit.
func ValidateFileSize(size int64) error {
limit := GetFileSizeLimit()
if size > limit {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationSize,
fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", size, limit),
"",
map[string]interface{}{"file_size": size, "size_limit": limit},
)
}
return nil
}
// ValidateOutputFormat checks if an output format is valid.
func ValidateOutputFormat(format string) error {
if !IsValidFormat(format) {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationFormat,
fmt.Sprintf("unsupported output format: %s (supported: json, yaml, markdown)", format),
"",
map[string]interface{}{"format": format},
)
}
return nil
}
// ValidateConcurrency checks if a concurrency level is valid.
func ValidateConcurrency(concurrency int) error {
if concurrency < 1 {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationFormat,
fmt.Sprintf("concurrency (%d) must be at least 1", concurrency),
"",
map[string]interface{}{"concurrency": concurrency},
)
}
if viper.IsSet("maxConcurrency") {
maxConcurrency := GetMaxConcurrency()
if concurrency > maxConcurrency {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationFormat,
fmt.Sprintf("concurrency (%d) exceeds maximum (%d)", concurrency, maxConcurrency),
"",
map[string]interface{}{"concurrency": concurrency, "max_concurrency": maxConcurrency},
)
}
}
return nil
}

245
config/validation_test.go Normal file
View File

@@ -0,0 +1,245 @@
package config_test
import (
"strings"
"testing"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/utils"
)
// TestValidateConfig tests the configuration validation functionality.
func TestValidateConfig(t *testing.T) {
tests := []struct {
name string
config map[string]interface{}
wantErr bool
errContains string
}{
{
name: "valid default config",
config: map[string]interface{}{
"fileSizeLimit": config.DefaultFileSizeLimit,
"ignoreDirectories": []string{"node_modules", ".git"},
},
wantErr: false,
},
{
name: "file size limit too small",
config: map[string]interface{}{
"fileSizeLimit": config.MinFileSizeLimit - 1,
},
wantErr: true,
errContains: "fileSizeLimit",
},
{
name: "file size limit too large",
config: map[string]interface{}{
"fileSizeLimit": config.MaxFileSizeLimit + 1,
},
wantErr: true,
errContains: "fileSizeLimit",
},
{
name: "empty ignore directory",
config: map[string]interface{}{
"ignoreDirectories": []string{"node_modules", "", ".git"},
},
wantErr: true,
errContains: "ignoreDirectories",
},
{
name: "ignore directory with path separator",
config: map[string]interface{}{
"ignoreDirectories": []string{"node_modules", "src/build", ".git"},
},
wantErr: true,
errContains: "path separator",
},
{
name: "invalid supported format",
config: map[string]interface{}{
"supportedFormats": []string{"json", "xml", "yaml"},
},
wantErr: true,
errContains: "not a valid format",
},
{
name: "invalid max concurrency",
config: map[string]interface{}{
"maxConcurrency": 0,
},
wantErr: true,
errContains: "maxConcurrency",
},
{
name: "valid comprehensive config",
config: map[string]interface{}{
"fileSizeLimit": config.DefaultFileSizeLimit,
"ignoreDirectories": []string{"node_modules", ".git", ".vscode"},
"supportedFormats": []string{"json", "yaml", "markdown"},
"maxConcurrency": 8,
"filePatterns": []string{"*.go", "*.js", "*.py"},
},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Reset viper for each test
viper.Reset()
// Set test configuration
for key, value := range tt.config {
viper.Set(key, value)
}
// Load defaults for missing values
config.LoadConfig()
err := config.ValidateConfig()
if tt.wantErr {
if err == nil {
t.Errorf("Expected error but got none")
return
}
if tt.errContains != "" && !strings.Contains(err.Error(), tt.errContains) {
t.Errorf("Expected error to contain %q, got %q", tt.errContains, err.Error())
}
// Check that it's a structured error
var structErr *utils.StructuredError
if !errorAs(err, &structErr) {
t.Errorf("Expected structured error, got %T", err)
return
}
if structErr.Type != utils.ErrorTypeConfiguration {
t.Errorf("Expected error type %v, got %v", utils.ErrorTypeConfiguration, structErr.Type)
}
if structErr.Code != utils.CodeConfigValidation {
t.Errorf("Expected error code %v, got %v", utils.CodeConfigValidation, structErr.Code)
}
} else {
if err != nil {
t.Errorf("Expected no error but got: %v", err)
}
}
})
}
}
// TestValidationFunctions tests individual validation functions.
func TestValidationFunctions(t *testing.T) {
t.Run("IsValidFormat", func(t *testing.T) {
tests := []struct {
format string
valid bool
}{
{"json", true},
{"yaml", true},
{"markdown", true},
{"JSON", true},
{"xml", false},
{"txt", false},
{"", false},
{" json ", true},
}
for _, tt := range tests {
result := config.IsValidFormat(tt.format)
if result != tt.valid {
t.Errorf("IsValidFormat(%q) = %v, want %v", tt.format, result, tt.valid)
}
}
})
t.Run("ValidateFileSize", func(t *testing.T) {
viper.Reset()
viper.Set("fileSizeLimit", config.DefaultFileSizeLimit)
tests := []struct {
name string
size int64
wantErr bool
}{
{"size within limit", config.DefaultFileSizeLimit - 1, false},
{"size at limit", config.DefaultFileSizeLimit, false},
{"size exceeds limit", config.DefaultFileSizeLimit + 1, true},
{"zero size", 0, false},
}
for _, tt := range tests {
err := config.ValidateFileSize(tt.size)
if (err != nil) != tt.wantErr {
t.Errorf("%s: ValidateFileSize(%d) error = %v, wantErr %v", tt.name, tt.size, err, tt.wantErr)
}
}
})
t.Run("ValidateOutputFormat", func(t *testing.T) {
tests := []struct {
format string
wantErr bool
}{
{"json", false},
{"yaml", false},
{"markdown", false},
{"xml", true},
{"txt", true},
{"", true},
}
for _, tt := range tests {
err := config.ValidateOutputFormat(tt.format)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateOutputFormat(%q) error = %v, wantErr %v", tt.format, err, tt.wantErr)
}
}
})
t.Run("ValidateConcurrency", func(t *testing.T) {
tests := []struct {
name string
concurrency int
maxConcurrency int
setMax bool
wantErr bool
}{
{"valid concurrency", 4, 0, false, false},
{"minimum concurrency", 1, 0, false, false},
{"zero concurrency", 0, 0, false, true},
{"negative concurrency", -1, 0, false, true},
{"concurrency within max", 4, 8, true, false},
{"concurrency exceeds max", 16, 8, true, true},
}
for _, tt := range tests {
viper.Reset()
if tt.setMax {
viper.Set("maxConcurrency", tt.maxConcurrency)
}
err := config.ValidateConcurrency(tt.concurrency)
if (err != nil) != tt.wantErr {
t.Errorf("%s: ValidateConcurrency(%d) error = %v, wantErr %v", tt.name, tt.concurrency, err, tt.wantErr)
}
}
})
}
func errorAs(err error, target interface{}) bool {
if err == nil {
return false
}
if structErr, ok := err.(*utils.StructuredError); ok {
if ptr, ok := target.(**utils.StructuredError); ok {
*ptr = structErr
return true
}
}
return false
}

196
fileproc/backpressure.go Normal file
View File

@@ -0,0 +1,196 @@
// Package fileproc provides back-pressure management for memory optimization.
package fileproc
import (
"context"
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/config"
)
// BackpressureManager manages memory usage and applies back-pressure when needed.
type BackpressureManager struct {
enabled bool
maxMemoryUsage int64
memoryCheckInterval int
maxPendingFiles int
maxPendingWrites int
filesProcessed int64
mu sync.RWMutex
memoryWarningLogged bool
lastMemoryCheck time.Time
}
// NewBackpressureManager creates a new back-pressure manager with configuration.
func NewBackpressureManager() *BackpressureManager {
return &BackpressureManager{
enabled: config.GetBackpressureEnabled(),
maxMemoryUsage: config.GetMaxMemoryUsage(),
memoryCheckInterval: config.GetMemoryCheckInterval(),
maxPendingFiles: config.GetMaxPendingFiles(),
maxPendingWrites: config.GetMaxPendingWrites(),
lastMemoryCheck: time.Now(),
}
}
// CreateChannels creates properly sized channels based on back-pressure configuration.
func (bp *BackpressureManager) CreateChannels() (chan string, chan WriteRequest) {
var fileCh chan string
var writeCh chan WriteRequest
if bp.enabled {
// Use buffered channels with configured limits
fileCh = make(chan string, bp.maxPendingFiles)
writeCh = make(chan WriteRequest, bp.maxPendingWrites)
logrus.Debugf("Created buffered channels: files=%d, writes=%d", bp.maxPendingFiles, bp.maxPendingWrites)
} else {
// Use unbuffered channels (default behavior)
fileCh = make(chan string)
writeCh = make(chan WriteRequest)
logrus.Debug("Created unbuffered channels (back-pressure disabled)")
}
return fileCh, writeCh
}
// ShouldApplyBackpressure checks if back-pressure should be applied.
func (bp *BackpressureManager) ShouldApplyBackpressure(ctx context.Context) bool {
if !bp.enabled {
return false
}
// Check if we should evaluate memory usage
filesProcessed := atomic.AddInt64(&bp.filesProcessed, 1)
if int(filesProcessed)%bp.memoryCheckInterval != 0 {
return false
}
// Get current memory usage
var m runtime.MemStats
runtime.ReadMemStats(&m)
currentMemory := int64(m.Alloc)
bp.mu.Lock()
defer bp.mu.Unlock()
bp.lastMemoryCheck = time.Now()
// Check if we're over the memory limit
if currentMemory > bp.maxMemoryUsage {
if !bp.memoryWarningLogged {
logrus.Warnf("Memory usage (%d bytes) exceeds limit (%d bytes), applying back-pressure",
currentMemory, bp.maxMemoryUsage)
bp.memoryWarningLogged = true
}
return true
}
// Reset warning flag if we're back under the limit
if bp.memoryWarningLogged && currentMemory < bp.maxMemoryUsage*8/10 { // 80% of limit
logrus.Infof("Memory usage normalized (%d bytes), removing back-pressure", currentMemory)
bp.memoryWarningLogged = false
}
return false
}
// ApplyBackpressure applies back-pressure by triggering garbage collection and adding delay.
func (bp *BackpressureManager) ApplyBackpressure(ctx context.Context) {
if !bp.enabled {
return
}
// Force garbage collection to free up memory
runtime.GC()
// Add a small delay to allow memory to be freed
select {
case <-ctx.Done():
return
case <-time.After(10 * time.Millisecond):
// Small delay to allow GC to complete
}
// Log memory usage after GC
var m runtime.MemStats
runtime.ReadMemStats(&m)
logrus.Debugf("Applied back-pressure: memory after GC = %d bytes", m.Alloc)
}
// GetStats returns current back-pressure statistics.
func (bp *BackpressureManager) GetStats() BackpressureStats {
bp.mu.RLock()
defer bp.mu.RUnlock()
var m runtime.MemStats
runtime.ReadMemStats(&m)
return BackpressureStats{
Enabled: bp.enabled,
FilesProcessed: atomic.LoadInt64(&bp.filesProcessed),
CurrentMemoryUsage: int64(m.Alloc),
MaxMemoryUsage: bp.maxMemoryUsage,
MemoryWarningActive: bp.memoryWarningLogged,
LastMemoryCheck: bp.lastMemoryCheck,
MaxPendingFiles: bp.maxPendingFiles,
MaxPendingWrites: bp.maxPendingWrites,
}
}
// BackpressureStats represents back-pressure manager statistics.
type BackpressureStats struct {
Enabled bool `json:"enabled"`
FilesProcessed int64 `json:"files_processed"`
CurrentMemoryUsage int64 `json:"current_memory_usage"`
MaxMemoryUsage int64 `json:"max_memory_usage"`
MemoryWarningActive bool `json:"memory_warning_active"`
LastMemoryCheck time.Time `json:"last_memory_check"`
MaxPendingFiles int `json:"max_pending_files"`
MaxPendingWrites int `json:"max_pending_writes"`
}
// WaitForChannelSpace waits for space in channels if they're getting full.
func (bp *BackpressureManager) WaitForChannelSpace(ctx context.Context, fileCh chan string, writeCh chan WriteRequest) {
if !bp.enabled {
return
}
// Check if file channel is getting full (>90% capacity)
if len(fileCh) > bp.maxPendingFiles*9/10 {
logrus.Debugf("File channel is %d%% full, waiting for space", len(fileCh)*100/bp.maxPendingFiles)
// Wait a bit for the channel to drain
select {
case <-ctx.Done():
return
case <-time.After(5 * time.Millisecond):
}
}
// Check if write channel is getting full (>90% capacity)
if len(writeCh) > bp.maxPendingWrites*9/10 {
logrus.Debugf("Write channel is %d%% full, waiting for space", len(writeCh)*100/bp.maxPendingWrites)
// Wait a bit for the channel to drain
select {
case <-ctx.Done():
return
case <-time.After(5 * time.Millisecond):
}
}
}
// LogBackpressureInfo logs back-pressure configuration and status.
func (bp *BackpressureManager) LogBackpressureInfo() {
if bp.enabled {
logrus.Infof("Back-pressure enabled: maxMemory=%dMB, fileBuffer=%d, writeBuffer=%d, checkInterval=%d",
bp.maxMemoryUsage/1024/1024, bp.maxPendingFiles, bp.maxPendingWrites, bp.memoryCheckInterval)
} else {
logrus.Info("Back-pressure disabled")
}
}

127
fileproc/cache.go Normal file
View File

@@ -0,0 +1,127 @@
package fileproc
// getNormalizedExtension efficiently extracts and normalizes the file extension with caching.
func (r *FileTypeRegistry) getNormalizedExtension(filename string) string {
// Try cache first (read lock)
r.cacheMutex.RLock()
if ext, exists := r.extCache[filename]; exists {
r.cacheMutex.RUnlock()
return ext
}
r.cacheMutex.RUnlock()
// Compute normalized extension
ext := normalizeExtension(filename)
// Cache the result (write lock)
r.cacheMutex.Lock()
// Check cache size and clean if needed
if len(r.extCache) >= r.maxCacheSize*2 {
r.clearExtCache()
r.stats.CacheEvictions++
}
r.extCache[filename] = ext
r.cacheMutex.Unlock()
return ext
}
// getFileTypeResult gets cached file type detection result or computes it.
func (r *FileTypeRegistry) getFileTypeResult(filename string) FileTypeResult {
ext := r.getNormalizedExtension(filename)
// Update statistics
r.updateStats(func() {
r.stats.TotalLookups++
})
// Try cache first (read lock)
r.cacheMutex.RLock()
if result, exists := r.resultCache[ext]; exists {
r.cacheMutex.RUnlock()
r.updateStats(func() {
r.stats.CacheHits++
})
return result
}
r.cacheMutex.RUnlock()
// Cache miss
r.updateStats(func() {
r.stats.CacheMisses++
})
// Compute result
result := FileTypeResult{
Extension: ext,
IsImage: r.imageExts[ext],
IsBinary: r.binaryExts[ext],
Language: r.languageMap[ext],
}
// Handle special cases for binary detection (like .DS_Store)
if !result.IsBinary && isSpecialFile(filename, r.binaryExts) {
result.IsBinary = true
}
// Cache the result (write lock)
r.cacheMutex.Lock()
if len(r.resultCache) >= r.maxCacheSize {
r.clearResultCache()
r.stats.CacheEvictions++
}
r.resultCache[ext] = result
r.cacheMutex.Unlock()
return result
}
// clearExtCache clears half of the extension cache (LRU-like behavior).
func (r *FileTypeRegistry) clearExtCache() {
r.clearCache(&r.extCache, r.maxCacheSize)
}
// clearResultCache clears half of the result cache.
func (r *FileTypeRegistry) clearResultCache() {
newCache := make(map[string]FileTypeResult, r.maxCacheSize)
count := 0
for k, v := range r.resultCache {
if count >= r.maxCacheSize/2 {
break
}
newCache[k] = v
count++
}
r.resultCache = newCache
}
// clearCache is a generic cache clearing function.
func (r *FileTypeRegistry) clearCache(cache *map[string]string, maxSize int) {
newCache := make(map[string]string, maxSize)
count := 0
for k, v := range *cache {
if count >= maxSize/2 {
break
}
newCache[k] = v
count++
}
*cache = newCache
}
// invalidateCache clears both caches when the registry is modified.
func (r *FileTypeRegistry) invalidateCache() {
r.cacheMutex.Lock()
defer r.cacheMutex.Unlock()
r.extCache = make(map[string]string, r.maxCacheSize)
r.resultCache = make(map[string]FileTypeResult, r.maxCacheSize)
r.stats.CacheEvictions++
}
// updateStats safely updates statistics.
func (r *FileTypeRegistry) updateStats(fn func()) {
r.cacheMutex.Lock()
fn()
r.cacheMutex.Unlock()
}

View File

@@ -4,6 +4,6 @@ package fileproc
// CollectFiles scans the given root directory using the default walker (ProdWalker)
// and returns a slice of file paths.
func CollectFiles(root string) ([]string, error) {
var w Walker = ProdWalker{}
w := NewProdWalker()
return w.Walk(root)
}

View File

@@ -1,8 +1,10 @@
package fileproc
package fileproc_test
import (
"os"
"testing"
"github.com/ivuorinen/gibidify/fileproc"
)
func TestCollectFilesWithFakeWalker(t *testing.T) {
@@ -11,7 +13,7 @@ func TestCollectFilesWithFakeWalker(t *testing.T) {
"/path/to/file1.txt",
"/path/to/file2.go",
}
fake := FakeWalker{
fake := fileproc.FakeWalker{
Files: expectedFiles,
Err: nil,
}
@@ -35,7 +37,7 @@ func TestCollectFilesWithFakeWalker(t *testing.T) {
func TestCollectFilesError(t *testing.T) {
// Fake walker returns an error.
fake := FakeWalker{
fake := fileproc.FakeWalker{
Files: nil,
Err: os.ErrNotExist,
}

40
fileproc/config.go Normal file
View File

@@ -0,0 +1,40 @@
package fileproc
import "strings"
// ApplyCustomExtensions applies custom extensions from configuration.
func (r *FileTypeRegistry) ApplyCustomExtensions(customImages, customBinary []string, customLanguages map[string]string) {
// Add custom image extensions
r.addExtensions(customImages, r.AddImageExtension)
// Add custom binary extensions
r.addExtensions(customBinary, r.AddBinaryExtension)
// Add custom language mappings
for ext, lang := range customLanguages {
if ext != "" && lang != "" {
r.AddLanguageMapping(strings.ToLower(ext), lang)
}
}
}
// addExtensions is a helper to add multiple extensions.
func (r *FileTypeRegistry) addExtensions(extensions []string, adder func(string)) {
for _, ext := range extensions {
if ext != "" {
adder(strings.ToLower(ext))
}
}
}
// ConfigureFromSettings applies configuration settings to the registry.
// This function is called from main.go after config is loaded to avoid circular imports.
func ConfigureFromSettings(
customImages, customBinary []string,
customLanguages map[string]string,
disabledImages, disabledBinary, disabledLanguages []string,
) {
registry := GetDefaultRegistry()
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
}

99
fileproc/detection.go Normal file
View File

@@ -0,0 +1,99 @@
package fileproc
import "strings"
// Package-level detection functions
// IsImage checks if the file extension indicates an image file.
func IsImage(filename string) bool {
return getRegistry().IsImage(filename)
}
// IsBinary checks if the file extension indicates a binary file.
func IsBinary(filename string) bool {
return getRegistry().IsBinary(filename)
}
// GetLanguage returns the language identifier for the given filename based on its extension.
func GetLanguage(filename string) string {
return getRegistry().GetLanguage(filename)
}
// Registry methods for detection
// IsImage checks if the file extension indicates an image file.
func (r *FileTypeRegistry) IsImage(filename string) bool {
result := r.getFileTypeResult(filename)
return result.IsImage
}
// IsBinary checks if the file extension indicates a binary file.
func (r *FileTypeRegistry) IsBinary(filename string) bool {
result := r.getFileTypeResult(filename)
return result.IsBinary
}
// GetLanguage returns the language identifier for the given filename based on its extension.
func (r *FileTypeRegistry) GetLanguage(filename string) string {
if len(filename) < minExtensionLength {
return ""
}
result := r.getFileTypeResult(filename)
return result.Language
}
// Extension management methods
// AddImageExtension adds a new image extension to the registry.
func (r *FileTypeRegistry) AddImageExtension(ext string) {
r.addExtension(ext, r.imageExts)
}
// AddBinaryExtension adds a new binary extension to the registry.
func (r *FileTypeRegistry) AddBinaryExtension(ext string) {
r.addExtension(ext, r.binaryExts)
}
// AddLanguageMapping adds a new language mapping to the registry.
func (r *FileTypeRegistry) AddLanguageMapping(ext, language string) {
r.languageMap[strings.ToLower(ext)] = language
r.invalidateCache()
}
// addExtension is a helper to add extensions to a map.
func (r *FileTypeRegistry) addExtension(ext string, target map[string]bool) {
target[strings.ToLower(ext)] = true
r.invalidateCache()
}
// removeExtension is a helper to remove extensions from a map.
func (r *FileTypeRegistry) removeExtension(ext string, target map[string]bool) {
delete(target, strings.ToLower(ext))
}
// DisableExtensions removes specified extensions from the registry.
func (r *FileTypeRegistry) DisableExtensions(disabledImages, disabledBinary, disabledLanguages []string) {
// Disable image extensions
for _, ext := range disabledImages {
if ext != "" {
r.removeExtension(ext, r.imageExts)
}
}
// Disable binary extensions
for _, ext := range disabledBinary {
if ext != "" {
r.removeExtension(ext, r.binaryExts)
}
}
// Disable language extensions
for _, ext := range disabledLanguages {
if ext != "" {
delete(r.languageMap, strings.ToLower(ext))
}
}
// Invalidate cache after all modifications
r.invalidateCache()
}

161
fileproc/extensions.go Normal file
View File

@@ -0,0 +1,161 @@
package fileproc
// getImageExtensions returns the default image file extensions.
func getImageExtensions() map[string]bool {
return map[string]bool{
".png": true,
".jpg": true,
".jpeg": true,
".gif": true,
".bmp": true,
".tiff": true,
".tif": true,
".svg": true,
".webp": true,
".ico": true,
}
}
// getBinaryExtensions returns the default binary file extensions.
func getBinaryExtensions() map[string]bool {
return map[string]bool{
// Executables and libraries
".exe": true,
".dll": true,
".so": true,
".dylib": true,
".bin": true,
".o": true,
".a": true,
".lib": true,
// Compiled bytecode
".jar": true,
".class": true,
".pyc": true,
".pyo": true,
// Data files
".dat": true,
".db": true,
".sqlite": true,
".ds_store": true,
// Documents
".pdf": true,
// Archives
".zip": true,
".tar": true,
".gz": true,
".bz2": true,
".xz": true,
".7z": true,
".rar": true,
// Fonts
".ttf": true,
".otf": true,
".woff": true,
".woff2": true,
// Media files
".mp3": true,
".mp4": true,
".avi": true,
".mov": true,
".wmv": true,
".flv": true,
".webm": true,
".ogg": true,
".wav": true,
".flac": true,
}
}
// getLanguageMap returns the default language mappings.
func getLanguageMap() map[string]string {
return map[string]string{
// Systems programming
".go": "go",
".c": "c",
".cpp": "cpp",
".h": "c",
".hpp": "cpp",
".rs": "rust",
// Scripting languages
".py": "python",
".rb": "ruby",
".pl": "perl",
".lua": "lua",
".php": "php",
// Web technologies
".js": "javascript",
".ts": "typescript",
".jsx": "javascript",
".tsx": "typescript",
".html": "html",
".htm": "html",
".css": "css",
".scss": "scss",
".sass": "sass",
".less": "less",
".vue": "vue",
// JVM languages
".java": "java",
".scala": "scala",
".kt": "kotlin",
".clj": "clojure",
// .NET languages
".cs": "csharp",
".vb": "vbnet",
".fs": "fsharp",
// Apple platforms
".swift": "swift",
".m": "objc",
".mm": "objcpp",
// Shell scripts
".sh": "bash",
".bash": "bash",
".zsh": "zsh",
".fish": "fish",
".ps1": "powershell",
".bat": "batch",
".cmd": "batch",
// Data formats
".json": "json",
".yaml": "yaml",
".yml": "yaml",
".toml": "toml",
".xml": "xml",
".sql": "sql",
// Documentation
".md": "markdown",
".rst": "rst",
".tex": "latex",
// Functional languages
".hs": "haskell",
".ml": "ocaml",
".mli": "ocaml",
".elm": "elm",
".ex": "elixir",
".exs": "elixir",
".erl": "erlang",
".hrl": "erlang",
// Other languages
".r": "r",
".dart": "dart",
".nim": "nim",
".nims": "nim",
}
}

View File

@@ -3,12 +3,12 @@ package fileproc
// FakeWalker implements Walker for testing purposes.
type FakeWalker struct {
Files []string
Err error
Files []string
}
// Walk returns predetermined file paths or an error, depending on FakeWalker's configuration.
func (fw FakeWalker) Walk(root string) ([]string, error) {
func (fw FakeWalker) Walk(_ string) ([]string, error) {
if fw.Err != nil {
return nil, fw.Err
}

55
fileproc/file_filters.go Normal file
View File

@@ -0,0 +1,55 @@
package fileproc
import (
"os"
"github.com/ivuorinen/gibidify/config"
)
// FileFilter defines filtering criteria for files and directories.
type FileFilter struct {
ignoredDirs []string
sizeLimit int64
}
// NewFileFilter creates a new file filter with current configuration.
func NewFileFilter() *FileFilter {
return &FileFilter{
ignoredDirs: config.GetIgnoredDirectories(),
sizeLimit: config.GetFileSizeLimit(),
}
}
// shouldSkipEntry determines if an entry should be skipped based on ignore rules and filters.
func (f *FileFilter) shouldSkipEntry(entry os.DirEntry, fullPath string, rules []ignoreRule) bool {
if entry.IsDir() {
return f.shouldSkipDirectory(entry)
}
if f.shouldSkipFile(entry, fullPath) {
return true
}
return matchesIgnoreRules(fullPath, rules)
}
// shouldSkipDirectory checks if a directory should be skipped based on the ignored directories list.
func (f *FileFilter) shouldSkipDirectory(entry os.DirEntry) bool {
for _, d := range f.ignoredDirs {
if entry.Name() == d {
return true
}
}
return false
}
// shouldSkipFile checks if a file should be skipped based on size limit and file type.
func (f *FileFilter) shouldSkipFile(entry os.DirEntry, fullPath string) bool {
// Check if file exceeds the configured size limit.
if info, err := entry.Info(); err == nil && info.Size() > f.sizeLimit {
return true
}
// Apply the default filter to ignore binary and image files.
return IsBinary(fullPath) || IsImage(fullPath)
}

View File

@@ -0,0 +1,105 @@
package fileproc
import (
"fmt"
"sync"
"testing"
)
// TestFileTypeRegistry_ThreadSafety tests thread safety of the FileTypeRegistry.
func TestFileTypeRegistry_ThreadSafety(t *testing.T) {
const numGoroutines = 100
const numOperationsPerGoroutine = 100
var wg sync.WaitGroup
// Test concurrent read operations
t.Run("ConcurrentReads", func(t *testing.T) {
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
registry := GetDefaultRegistry()
for j := 0; j < numOperationsPerGoroutine; j++ {
// Test various file detection operations
_ = registry.IsImage("test.png")
_ = registry.IsBinary("test.exe")
_ = registry.GetLanguage("test.go")
// Test global functions too
_ = IsImage("image.jpg")
_ = IsBinary("binary.dll")
_ = GetLanguage("script.py")
}
}(i)
}
wg.Wait()
})
// Test concurrent registry access (singleton creation)
t.Run("ConcurrentRegistryAccess", func(t *testing.T) {
// Reset the registry to test concurrent initialization
// Note: This is not safe in a real application, but needed for testing
registryOnce = sync.Once{}
registry = nil
registries := make([]*FileTypeRegistry, numGoroutines)
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
registries[id] = GetDefaultRegistry()
}(i)
}
wg.Wait()
// Verify all goroutines got the same registry instance
firstRegistry := registries[0]
for i := 1; i < numGoroutines; i++ {
if registries[i] != firstRegistry {
t.Errorf("Registry %d is different from registry 0", i)
}
}
})
// Test concurrent modifications on separate registry instances
t.Run("ConcurrentModifications", func(t *testing.T) {
// Create separate registry instances for each goroutine to test modification thread safety
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
// Create a new registry instance for this goroutine
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
for j := 0; j < numOperationsPerGoroutine; j++ {
// Add unique extensions for this goroutine
extSuffix := fmt.Sprintf("_%d_%d", id, j)
registry.AddImageExtension(".img" + extSuffix)
registry.AddBinaryExtension(".bin" + extSuffix)
registry.AddLanguageMapping(".lang"+extSuffix, "lang"+extSuffix)
// Verify the additions worked
if !registry.IsImage("test.img" + extSuffix) {
t.Errorf("Failed to add image extension .img%s", extSuffix)
}
if !registry.IsBinary("test.bin" + extSuffix) {
t.Errorf("Failed to add binary extension .bin%s", extSuffix)
}
if registry.GetLanguage("test.lang"+extSuffix) != "lang"+extSuffix {
t.Errorf("Failed to add language mapping .lang%s", extSuffix)
}
}
}(i)
}
wg.Wait()
})
}

View File

@@ -0,0 +1,258 @@
package fileproc
import (
"sync"
"testing"
)
// TestFileTypeRegistry_Configuration tests the configuration functionality.
func TestFileTypeRegistry_Configuration(t *testing.T) {
// Create a new registry instance for testing
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
// Test ApplyCustomExtensions
t.Run("ApplyCustomExtensions", func(t *testing.T) {
customImages := []string{".webp", ".avif", ".heic"}
customBinary := []string{".custom", ".mybin"}
customLanguages := map[string]string{
".zig": "zig",
".odin": "odin",
".v": "vlang",
}
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
// Test custom image extensions
for _, ext := range customImages {
if !registry.IsImage("test" + ext) {
t.Errorf("Expected %s to be recognized as image", ext)
}
}
// Test custom binary extensions
for _, ext := range customBinary {
if !registry.IsBinary("test" + ext) {
t.Errorf("Expected %s to be recognized as binary", ext)
}
}
// Test custom language mappings
for ext, expectedLang := range customLanguages {
if lang := registry.GetLanguage("test" + ext); lang != expectedLang {
t.Errorf("Expected %s to map to %s, got %s", ext, expectedLang, lang)
}
}
})
// Test DisableExtensions
t.Run("DisableExtensions", func(t *testing.T) {
// Add some extensions first
registry.AddImageExtension(".png")
registry.AddImageExtension(".jpg")
registry.AddBinaryExtension(".exe")
registry.AddBinaryExtension(".dll")
registry.AddLanguageMapping(".go", "go")
registry.AddLanguageMapping(".py", "python")
// Verify they work
if !registry.IsImage("test.png") {
t.Error("Expected .png to be image before disabling")
}
if !registry.IsBinary("test.exe") {
t.Error("Expected .exe to be binary before disabling")
}
if registry.GetLanguage("test.go") != "go" {
t.Error("Expected .go to map to go before disabling")
}
// Disable some extensions
disabledImages := []string{".png"}
disabledBinary := []string{".exe"}
disabledLanguages := []string{".go"}
registry.DisableExtensions(disabledImages, disabledBinary, disabledLanguages)
// Test that disabled extensions no longer work
if registry.IsImage("test.png") {
t.Error("Expected .png to not be image after disabling")
}
if registry.IsBinary("test.exe") {
t.Error("Expected .exe to not be binary after disabling")
}
if registry.GetLanguage("test.go") != "" {
t.Error("Expected .go to not map to language after disabling")
}
// Test that non-disabled extensions still work
if !registry.IsImage("test.jpg") {
t.Error("Expected .jpg to still be image after disabling .png")
}
if !registry.IsBinary("test.dll") {
t.Error("Expected .dll to still be binary after disabling .exe")
}
if registry.GetLanguage("test.py") != "python" {
t.Error("Expected .py to still map to python after disabling .go")
}
})
// Test empty values handling
t.Run("EmptyValuesHandling", func(t *testing.T) {
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
// Test with empty values
customImages := []string{"", ".valid", ""}
customBinary := []string{"", ".valid"}
customLanguages := map[string]string{
"": "invalid",
".valid": "",
".good": "good",
}
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
// Only valid entries should be added
if registry.IsImage("test.") {
t.Error("Expected empty extension to not be added as image")
}
if !registry.IsImage("test.valid") {
t.Error("Expected .valid to be added as image")
}
if registry.IsBinary("test.") {
t.Error("Expected empty extension to not be added as binary")
}
if !registry.IsBinary("test.valid") {
t.Error("Expected .valid to be added as binary")
}
if registry.GetLanguage("test.") != "" {
t.Error("Expected empty extension to not be added as language")
}
if registry.GetLanguage("test.valid") != "" {
t.Error("Expected .valid with empty language to not be added")
}
if registry.GetLanguage("test.good") != "good" {
t.Error("Expected .good to map to good")
}
})
// Test case insensitive handling
t.Run("CaseInsensitiveHandling", func(t *testing.T) {
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
customImages := []string{".WEBP", ".Avif"}
customBinary := []string{".CUSTOM", ".MyBin"}
customLanguages := map[string]string{
".ZIG": "zig",
".Odin": "odin",
}
registry.ApplyCustomExtensions(customImages, customBinary, customLanguages)
// Test that both upper and lower case work
if !registry.IsImage("test.webp") {
t.Error("Expected .webp (lowercase) to work after adding .WEBP")
}
if !registry.IsImage("test.WEBP") {
t.Error("Expected .WEBP (uppercase) to work")
}
if !registry.IsBinary("test.custom") {
t.Error("Expected .custom (lowercase) to work after adding .CUSTOM")
}
if !registry.IsBinary("test.CUSTOM") {
t.Error("Expected .CUSTOM (uppercase) to work")
}
if registry.GetLanguage("test.zig") != "zig" {
t.Error("Expected .zig (lowercase) to work after adding .ZIG")
}
if registry.GetLanguage("test.ZIG") != "zig" {
t.Error("Expected .ZIG (uppercase) to work")
}
})
}
// TestConfigureFromSettings tests the global configuration function.
func TestConfigureFromSettings(t *testing.T) {
// Reset registry to ensure clean state
registryOnce = sync.Once{}
registry = nil
// Test configuration application
customImages := []string{".webp", ".avif"}
customBinary := []string{".custom"}
customLanguages := map[string]string{".zig": "zig"}
disabledImages := []string{".gif"} // Disable default extension
disabledBinary := []string{".exe"} // Disable default extension
disabledLanguages := []string{".rb"} // Disable default extension
ConfigureFromSettings(
customImages,
customBinary,
customLanguages,
disabledImages,
disabledBinary,
disabledLanguages,
)
// Test that custom extensions work
if !IsImage("test.webp") {
t.Error("Expected custom image extension .webp to work")
}
if !IsBinary("test.custom") {
t.Error("Expected custom binary extension .custom to work")
}
if GetLanguage("test.zig") != "zig" {
t.Error("Expected custom language .zig to work")
}
// Test that disabled extensions don't work
if IsImage("test.gif") {
t.Error("Expected disabled image extension .gif to not work")
}
if IsBinary("test.exe") {
t.Error("Expected disabled binary extension .exe to not work")
}
if GetLanguage("test.rb") != "" {
t.Error("Expected disabled language extension .rb to not work")
}
// Test that non-disabled defaults still work
if !IsImage("test.png") {
t.Error("Expected non-disabled image extension .png to still work")
}
if !IsBinary("test.dll") {
t.Error("Expected non-disabled binary extension .dll to still work")
}
if GetLanguage("test.go") != "go" {
t.Error("Expected non-disabled language extension .go to still work")
}
// Test multiple calls don't override previous configuration
ConfigureFromSettings(
[]string{".extra"},
[]string{},
map[string]string{},
[]string{},
[]string{},
[]string{},
)
// Previous configuration should still work
if !IsImage("test.webp") {
t.Error("Expected previous configuration to persist")
}
// New configuration should also work
if !IsImage("test.extra") {
t.Error("Expected new configuration to be applied")
}
}

View File

@@ -0,0 +1,226 @@
package fileproc
import (
"testing"
)
// TestFileTypeRegistry_LanguageDetection tests the language detection functionality.
func TestFileTypeRegistry_LanguageDetection(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected string
}{
// Programming languages
{"main.go", "go"},
{"script.py", "python"},
{"app.js", "javascript"},
{"component.tsx", "typescript"},
{"service.ts", "typescript"},
{"App.java", "java"},
{"program.c", "c"},
{"program.cpp", "cpp"},
{"header.h", "c"},
{"header.hpp", "cpp"},
{"main.rs", "rust"},
{"script.rb", "ruby"},
{"index.php", "php"},
{"app.swift", "swift"},
{"MainActivity.kt", "kotlin"},
{"Main.scala", "scala"},
{"analysis.r", "r"},
{"ViewController.m", "objc"},
{"ViewController.mm", "objcpp"},
{"Program.cs", "csharp"},
{"Module.vb", "vbnet"},
{"program.fs", "fsharp"},
{"script.lua", "lua"},
{"script.pl", "perl"},
// Shell scripts
{"script.sh", "bash"},
{"script.bash", "bash"},
{"script.zsh", "zsh"},
{"script.fish", "fish"},
{"script.ps1", "powershell"},
{"script.bat", "batch"},
{"script.cmd", "batch"},
// Data and markup
{"query.sql", "sql"},
{"index.html", "html"},
{"page.htm", "html"},
{"data.xml", "xml"},
{"style.css", "css"},
{"style.scss", "scss"},
{"style.sass", "sass"},
{"style.less", "less"},
{"config.json", "json"},
{"config.yaml", "yaml"},
{"config.yml", "yaml"},
{"data.toml", "toml"},
{"page.md", "markdown"},
{"readme.markdown", ""},
{"doc.rst", "rst"},
{"book.tex", "latex"},
// Configuration files
{"Dockerfile", ""},
{"Makefile", ""},
{"GNUmakefile", ""},
// Case sensitivity tests
{"MAIN.GO", "go"},
{"SCRIPT.PY", "python"},
{"APP.JS", "javascript"},
// Unknown extensions
{"unknown.xyz", ""},
{"file.unknown", ""},
{"noextension", ""},
{"", ""},
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.GetLanguage(tt.filename)
if result != tt.expected {
t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected)
}
})
}
}
// TestFileTypeRegistry_ImageDetection tests the image detection functionality.
func TestFileTypeRegistry_ImageDetection(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected bool
}{
// Common image formats
{"photo.png", true},
{"image.jpg", true},
{"picture.jpeg", true},
{"animation.gif", true},
{"bitmap.bmp", true},
{"image.tiff", true},
{"scan.tif", true},
{"vector.svg", true},
{"modern.webp", true},
{"favicon.ico", true},
// Case sensitivity tests
{"PHOTO.PNG", true},
{"IMAGE.JPG", true},
{"PICTURE.JPEG", true},
// Non-image files
{"document.txt", false},
{"script.js", false},
{"data.json", false},
{"archive.zip", false},
{"executable.exe", false},
// Edge cases
{"", false}, // Empty filename
{"image", false}, // No extension
{".png", true}, // Just extension
{"file.png.bak", false}, // Multiple extensions
{"image.unknown", false}, // Unknown extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.IsImage(tt.filename)
if result != tt.expected {
t.Errorf("IsImage(%q) = %t, expected %t", tt.filename, result, tt.expected)
}
})
}
}
// TestFileTypeRegistry_BinaryDetection tests the binary detection functionality.
func TestFileTypeRegistry_BinaryDetection(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected bool
}{
// Executable files
{"program.exe", true},
{"library.dll", true},
{"libfoo.so", true},
{"framework.dylib", true},
{"data.bin", true},
// Object and library files
{"object.o", true},
{"archive.a", true},
{"library.lib", true},
{"application.jar", true},
{"bytecode.class", true},
{"compiled.pyc", true},
{"optimized.pyo", true},
// System files
{".DS_Store", true},
// Document files (treated as binary)
{"document.pdf", true},
// Archive files
{"archive.zip", true},
{"backup.tar", true},
{"compressed.gz", true},
{"data.bz2", true},
{"package.xz", true},
{"archive.7z", true},
{"backup.rar", true},
// Font files
{"font.ttf", true},
{"font.otf", true},
{"font.woff", true},
{"font.woff2", true},
// Media files (video/audio)
{"video.mp4", true},
{"movie.avi", true},
{"clip.mov", true},
{"song.mp3", true},
{"audio.wav", true},
{"music.flac", true},
// Case sensitivity tests
{"PROGRAM.EXE", true},
{"LIBRARY.DLL", true},
{"ARCHIVE.ZIP", true},
// Non-binary files
{"document.txt", false},
{"script.py", false},
{"config.json", false},
{"style.css", false},
{"page.html", false},
// Edge cases
{"", false}, // Empty filename
{"binary", false}, // No extension
{".exe", true}, // Just extension
{"file.exe.txt", false}, // Multiple extensions
{"file.unknown", false}, // Unknown extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.IsBinary(tt.filename)
if result != tt.expected {
t.Errorf("IsBinary(%q) = %t, expected %t", tt.filename, result, tt.expected)
}
})
}
}

View File

@@ -0,0 +1,128 @@
package fileproc
import (
"testing"
)
// TestFileTypeRegistry_EdgeCases tests edge cases and boundary conditions.
func TestFileTypeRegistry_EdgeCases(t *testing.T) {
registry := GetDefaultRegistry()
// Test various edge cases for filename handling
edgeCases := []struct {
name string
filename string
desc string
}{
{"empty", "", "empty filename"},
{"single_char", "a", "single character filename"},
{"just_dot", ".", "just a dot"},
{"double_dot", "..", "double dot"},
{"hidden_file", ".hidden", "hidden file"},
{"hidden_with_ext", ".hidden.txt", "hidden file with extension"},
{"multiple_dots", "file.tar.gz", "multiple extensions"},
{"trailing_dot", "file.", "trailing dot"},
{"unicode", "файл.txt", "unicode filename"},
{"spaces", "my file.txt", "filename with spaces"},
{"special_chars", "file@#$.txt", "filename with special characters"},
{"very_long", "very_long_filename_with_many_characters_in_it.extension", "very long filename"},
{"no_basename", ".gitignore", "dotfile with no basename"},
{"case_mixed", "FiLe.ExT", "mixed case"},
}
for _, tc := range edgeCases {
t.Run(tc.name, func(t *testing.T) {
// These should not panic
_ = registry.IsImage(tc.filename)
_ = registry.IsBinary(tc.filename)
_ = registry.GetLanguage(tc.filename)
// Global functions should also not panic
_ = IsImage(tc.filename)
_ = IsBinary(tc.filename)
_ = GetLanguage(tc.filename)
})
}
}
// TestFileTypeRegistry_MinimumExtensionLength tests the minimum extension length requirement.
func TestFileTypeRegistry_MinimumExtensionLength(t *testing.T) {
registry := GetDefaultRegistry()
tests := []struct {
filename string
expected string
}{
{"", ""}, // Empty filename
{"a", ""}, // Single character (less than minExtensionLength)
{"ab", ""}, // Two characters, no extension
{"a.b", ""}, // Extension too short, but filename too short anyway
{"ab.c", "c"}, // Valid: filename >= minExtensionLength and .c is valid extension
{"a.go", "go"}, // Valid extension
{"ab.py", "python"}, // Valid extension
{"a.unknown", ""}, // Valid length but unknown extension
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
result := registry.GetLanguage(tt.filename)
if result != tt.expected {
t.Errorf("GetLanguage(%q) = %q, expected %q", tt.filename, result, tt.expected)
}
})
}
}
// Benchmark tests for performance validation
func BenchmarkFileTypeRegistry_IsImage(b *testing.B) {
registry := GetDefaultRegistry()
filename := "test.png"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = registry.IsImage(filename)
}
}
func BenchmarkFileTypeRegistry_IsBinary(b *testing.B) {
registry := GetDefaultRegistry()
filename := "test.exe"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = registry.IsBinary(filename)
}
}
func BenchmarkFileTypeRegistry_GetLanguage(b *testing.B) {
registry := GetDefaultRegistry()
filename := "test.go"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = registry.GetLanguage(filename)
}
}
func BenchmarkFileTypeRegistry_GlobalFunctions(b *testing.B) {
filename := "test.go"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = IsImage(filename)
_ = IsBinary(filename)
_ = GetLanguage(filename)
}
}
func BenchmarkFileTypeRegistry_ConcurrentAccess(b *testing.B) {
filename := "test.go"
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = IsImage(filename)
_ = IsBinary(filename)
_ = GetLanguage(filename)
}
})
}

View File

@@ -0,0 +1,137 @@
package fileproc
import (
"testing"
)
// TestFileTypeRegistry_ModificationMethods tests the modification methods of FileTypeRegistry.
func TestFileTypeRegistry_ModificationMethods(t *testing.T) {
// Create a new registry instance for testing
registry := &FileTypeRegistry{
imageExts: make(map[string]bool),
binaryExts: make(map[string]bool),
languageMap: make(map[string]string),
}
// Test AddImageExtension
t.Run("AddImageExtension", func(t *testing.T) {
// Add a new image extension
registry.AddImageExtension(".webp")
if !registry.IsImage("test.webp") {
t.Errorf("Expected .webp to be recognized as image after adding")
}
// Test case insensitive addition
registry.AddImageExtension(".AVIF")
if !registry.IsImage("test.avif") {
t.Errorf("Expected .avif to be recognized as image after adding .AVIF")
}
if !registry.IsImage("test.AVIF") {
t.Errorf("Expected .AVIF to be recognized as image")
}
// Test with dot prefix
registry.AddImageExtension("heic")
if registry.IsImage("test.heic") {
t.Errorf("Expected extension without dot to not work")
}
// Test with proper dot prefix
registry.AddImageExtension(".heic")
if !registry.IsImage("test.heic") {
t.Errorf("Expected .heic to be recognized as image")
}
})
// Test AddBinaryExtension
t.Run("AddBinaryExtension", func(t *testing.T) {
// Add a new binary extension
registry.AddBinaryExtension(".custom")
if !registry.IsBinary("file.custom") {
t.Errorf("Expected .custom to be recognized as binary after adding")
}
// Test case insensitive addition
registry.AddBinaryExtension(".SPECIAL")
if !registry.IsBinary("file.special") {
t.Errorf("Expected .special to be recognized as binary after adding .SPECIAL")
}
if !registry.IsBinary("file.SPECIAL") {
t.Errorf("Expected .SPECIAL to be recognized as binary")
}
// Test with dot prefix
registry.AddBinaryExtension("bin")
if registry.IsBinary("file.bin") {
t.Errorf("Expected extension without dot to not work")
}
// Test with proper dot prefix
registry.AddBinaryExtension(".bin")
if !registry.IsBinary("file.bin") {
t.Errorf("Expected .bin to be recognized as binary")
}
})
// Test AddLanguageMapping
t.Run("AddLanguageMapping", func(t *testing.T) {
// Add a new language mapping
registry.AddLanguageMapping(".xyz", "CustomLang")
if lang := registry.GetLanguage("file.xyz"); lang != "CustomLang" {
t.Errorf("Expected CustomLang, got %s", lang)
}
// Test case insensitive addition
registry.AddLanguageMapping(".ABC", "UpperLang")
if lang := registry.GetLanguage("file.abc"); lang != "UpperLang" {
t.Errorf("Expected UpperLang, got %s", lang)
}
if lang := registry.GetLanguage("file.ABC"); lang != "UpperLang" {
t.Errorf("Expected UpperLang for uppercase, got %s", lang)
}
// Test with dot prefix
registry.AddLanguageMapping("nolang", "NoLang")
if lang := registry.GetLanguage("file.nolang"); lang == "NoLang" {
t.Errorf("Expected extension without dot to not work")
}
// Test with proper dot prefix
registry.AddLanguageMapping(".nolang", "NoLang")
if lang := registry.GetLanguage("file.nolang"); lang != "NoLang" {
t.Errorf("Expected NoLang, got %s", lang)
}
// Test overriding existing mapping
registry.AddLanguageMapping(".xyz", "NewCustomLang")
if lang := registry.GetLanguage("file.xyz"); lang != "NewCustomLang" {
t.Errorf("Expected NewCustomLang after override, got %s", lang)
}
})
}
// TestFileTypeRegistry_DefaultRegistryConsistency tests default registry behavior.
func TestFileTypeRegistry_DefaultRegistryConsistency(t *testing.T) {
registry := GetDefaultRegistry()
// Test that registry methods work consistently
if !registry.IsImage("test.png") {
t.Error("Expected .png to be recognized as image")
}
if !registry.IsBinary("test.exe") {
t.Error("Expected .exe to be recognized as binary")
}
if lang := registry.GetLanguage("test.go"); lang != "go" {
t.Errorf("Expected go, got %s", lang)
}
// Test that multiple calls return consistent results
for i := 0; i < 5; i++ {
if !registry.IsImage("test.jpg") {
t.Errorf("Iteration %d: Expected .jpg to be recognized as image", i)
}
if registry.IsBinary("test.txt") {
t.Errorf("Iteration %d: Expected .txt to not be recognized as binary", i)
}
}
}

28
fileproc/formats.go Normal file
View File

@@ -0,0 +1,28 @@
package fileproc
// FileData represents a single file's path and content.
type FileData struct {
Path string `json:"path" yaml:"path"`
Content string `json:"content" yaml:"content"`
Language string `json:"language" yaml:"language"`
}
// OutputData represents the full output structure.
type OutputData struct {
Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
Files []FileData `json:"files" yaml:"files"`
}
// FormatWriter defines the interface for format-specific writers.
type FormatWriter interface {
Start(prefix, suffix string) error
WriteFile(req WriteRequest) error
Close() error
}
// detectLanguage tries to infer the code block language from the file extension.
func detectLanguage(filePath string) string {
registry := GetDefaultRegistry()
return registry.GetLanguage(filePath)
}

66
fileproc/ignore_rules.go Normal file
View File

@@ -0,0 +1,66 @@
package fileproc
import (
"os"
"path/filepath"
ignore "github.com/sabhiram/go-gitignore"
)
// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
type ignoreRule struct {
gi *ignore.GitIgnore
base string
}
// loadIgnoreRules loads ignore rules from the current directory and combines them with parent rules.
func loadIgnoreRules(currentDir string, parentRules []ignoreRule) []ignoreRule {
// Pre-allocate for parent rules plus possible .gitignore and .ignore
const expectedIgnoreFiles = 2
rules := make([]ignoreRule, 0, len(parentRules)+expectedIgnoreFiles)
rules = append(rules, parentRules...)
// Check for .gitignore and .ignore files in the current directory.
for _, fileName := range []string{".gitignore", ".ignore"} {
if rule := tryLoadIgnoreFile(currentDir, fileName); rule != nil {
rules = append(rules, *rule)
}
}
return rules
}
// tryLoadIgnoreFile attempts to load an ignore file from the given directory.
func tryLoadIgnoreFile(dir, fileName string) *ignoreRule {
ignorePath := filepath.Join(dir, fileName)
if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
if gi, err := ignore.CompileIgnoreFile(ignorePath); err == nil {
return &ignoreRule{
base: dir,
gi: gi,
}
}
}
return nil
}
// matchesIgnoreRules checks if a path matches any of the ignore rules.
func matchesIgnoreRules(fullPath string, rules []ignoreRule) bool {
for _, rule := range rules {
if matchesRule(fullPath, rule) {
return true
}
}
return false
}
// matchesRule checks if a path matches a specific ignore rule.
func matchesRule(fullPath string, rule ignoreRule) bool {
// Compute the path relative to the base where the ignore rule was defined.
rel, err := filepath.Rel(rule.base, fullPath)
if err != nil {
return false
}
// If the rule matches, skip this entry.
return rule.gi.MatchesPath(rel)
}

158
fileproc/json_writer.go Normal file
View File

@@ -0,0 +1,158 @@
package fileproc
import (
"encoding/json"
"fmt"
"io"
"os"
"github.com/ivuorinen/gibidify/utils"
)
// JSONWriter handles JSON format output with streaming support.
type JSONWriter struct {
outFile *os.File
firstFile bool
}
// NewJSONWriter creates a new JSON writer.
func NewJSONWriter(outFile *os.File) *JSONWriter {
return &JSONWriter{
outFile: outFile,
firstFile: true,
}
}
// Start writes the JSON header.
func (w *JSONWriter) Start(prefix, suffix string) error {
// Start JSON structure
if _, err := w.outFile.WriteString(`{"prefix":"`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON start")
}
// Write escaped prefix
escapedPrefix := utils.EscapeForJSON(prefix)
if err := utils.WriteWithErrorWrap(w.outFile, escapedPrefix, "failed to write JSON prefix", ""); err != nil {
return err
}
if _, err := w.outFile.WriteString(`","suffix":"`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON middle")
}
// Write escaped suffix
escapedSuffix := utils.EscapeForJSON(suffix)
if err := utils.WriteWithErrorWrap(w.outFile, escapedSuffix, "failed to write JSON suffix", ""); err != nil {
return err
}
if _, err := w.outFile.WriteString(`","files":[`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON files start")
}
return nil
}
// WriteFile writes a file entry in JSON format.
func (w *JSONWriter) WriteFile(req WriteRequest) error {
if !w.firstFile {
if _, err := w.outFile.WriteString(","); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON separator")
}
}
w.firstFile = false
if req.IsStream {
return w.writeStreaming(req)
}
return w.writeInline(req)
}
// Close writes the JSON footer.
func (w *JSONWriter) Close() error {
// Close JSON structure
if _, err := w.outFile.WriteString("]}"); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON end")
}
return nil
}
// writeStreaming writes a large file as JSON in streaming chunks.
func (w *JSONWriter) writeStreaming(req WriteRequest) error {
defer utils.SafeCloseReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write file start
escapedPath := utils.EscapeForJSON(req.Path)
if _, err := fmt.Fprintf(w.outFile, `{"path":"%s","language":"%s","content":"`, escapedPath, language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file start").WithFilePath(req.Path)
}
// Stream content with JSON escaping
if err := w.streamJSONContent(req.Reader, req.Path); err != nil {
return err
}
// Write file end
if _, err := w.outFile.WriteString(`"}`); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file end").WithFilePath(req.Path)
}
return nil
}
// writeInline writes a small file directly as JSON.
func (w *JSONWriter) writeInline(req WriteRequest) error {
language := detectLanguage(req.Path)
fileData := FileData{
Path: req.Path,
Content: req.Content,
Language: language,
}
encoded, err := json.Marshal(fileData)
if err != nil {
return utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingEncode, "failed to marshal JSON").WithFilePath(req.Path)
}
if _, err := w.outFile.Write(encoded); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write JSON file").WithFilePath(req.Path)
}
return nil
}
// streamJSONContent streams content with JSON escaping.
func (w *JSONWriter) streamJSONContent(reader io.Reader, path string) error {
return utils.StreamContent(reader, w.outFile, StreamChunkSize, path, func(chunk []byte) []byte {
escaped := utils.EscapeForJSON(string(chunk))
return []byte(escaped)
})
}
// startJSONWriter handles JSON format output with streaming support.
func startJSONWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
writer := NewJSONWriter(outFile)
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write JSON start", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write JSON file", err)
}
}
// Close writer
if err := writer.Close(); err != nil {
utils.LogError("Failed to write JSON end", err)
}
}

139
fileproc/markdown_writer.go Normal file
View File

@@ -0,0 +1,139 @@
package fileproc
import (
"fmt"
"io"
"os"
"github.com/ivuorinen/gibidify/utils"
)
// MarkdownWriter handles markdown format output with streaming support.
type MarkdownWriter struct {
outFile *os.File
}
// NewMarkdownWriter creates a new markdown writer.
func NewMarkdownWriter(outFile *os.File) *MarkdownWriter {
return &MarkdownWriter{outFile: outFile}
}
// Start writes the markdown header.
func (w *MarkdownWriter) Start(prefix, suffix string) error {
if prefix != "" {
if _, err := fmt.Fprintf(w.outFile, "# %s\n\n", prefix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write prefix")
}
}
return nil
}
// WriteFile writes a file entry in markdown format.
func (w *MarkdownWriter) WriteFile(req WriteRequest) error {
if req.IsStream {
return w.writeStreaming(req)
}
return w.writeInline(req)
}
// Close writes the markdown footer.
func (w *MarkdownWriter) Close(suffix string) error {
if suffix != "" {
if _, err := fmt.Fprintf(w.outFile, "\n# %s\n", suffix); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write suffix")
}
}
return nil
}
// writeStreaming writes a large file in streaming chunks.
func (w *MarkdownWriter) writeStreaming(req WriteRequest) error {
defer w.closeReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write file header
if _, err := fmt.Fprintf(w.outFile, "## File: `%s`\n```%s\n", req.Path, language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file header").WithFilePath(req.Path)
}
// Stream file content in chunks
if err := w.streamContent(req.Reader, req.Path); err != nil {
return err
}
// Write file footer
if _, err := w.outFile.WriteString("\n```\n\n"); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write file footer").WithFilePath(req.Path)
}
return nil
}
// writeInline writes a small file directly from content.
func (w *MarkdownWriter) writeInline(req WriteRequest) error {
language := detectLanguage(req.Path)
formatted := fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", req.Path, language, req.Content)
if _, err := w.outFile.WriteString(formatted); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write inline content").WithFilePath(req.Path)
}
return nil
}
// streamContent streams file content in chunks.
func (w *MarkdownWriter) streamContent(reader io.Reader, path string) error {
buf := make([]byte, StreamChunkSize)
for {
n, err := reader.Read(buf)
if n > 0 {
if _, writeErr := w.outFile.Write(buf[:n]); writeErr != nil {
return utils.WrapError(writeErr, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write chunk").WithFilePath(path)
}
}
if err == io.EOF {
break
}
if err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to read chunk").WithFilePath(path)
}
}
return nil
}
// closeReader safely closes a reader if it implements io.Closer.
func (w *MarkdownWriter) closeReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
utils.LogError(
"Failed to close file reader",
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// startMarkdownWriter handles markdown format output with streaming support.
func startMarkdownWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
writer := NewMarkdownWriter(outFile)
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write markdown prefix", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write markdown file", err)
}
}
// Close writer
if err := writer.Close(suffix); err != nil {
utils.LogError("Failed to write markdown suffix", err)
}
}

View File

@@ -2,35 +2,308 @@
package fileproc
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/utils"
)
const (
// StreamChunkSize is the size of chunks when streaming large files (64KB).
StreamChunkSize = 65536
// StreamThreshold is the file size above which we use streaming (1MB).
StreamThreshold = 1048576
// MaxMemoryBuffer is the maximum memory to use for buffering content (10MB).
MaxMemoryBuffer = 10485760
)
// WriteRequest represents the content to be written.
type WriteRequest struct {
Path string
Content string
Path string
Content string
IsStream bool
Reader io.Reader
}
// FileProcessor handles file processing operations.
type FileProcessor struct {
rootPath string
sizeLimit int64
resourceMonitor *ResourceMonitor
}
// NewFileProcessor creates a new file processor.
func NewFileProcessor(rootPath string) *FileProcessor {
return &FileProcessor{
rootPath: rootPath,
sizeLimit: config.GetFileSizeLimit(),
resourceMonitor: NewResourceMonitor(),
}
}
// NewFileProcessorWithMonitor creates a new file processor with a shared resource monitor.
func NewFileProcessorWithMonitor(rootPath string, monitor *ResourceMonitor) *FileProcessor {
return &FileProcessor{
rootPath: rootPath,
sizeLimit: config.GetFileSizeLimit(),
resourceMonitor: monitor,
}
}
// ProcessFile reads the file at filePath and sends a formatted output to outCh.
// It automatically chooses between loading the entire file or streaming based on file size.
func ProcessFile(filePath string, outCh chan<- WriteRequest, rootPath string) {
content, err := os.ReadFile(filePath)
if err != nil {
logrus.Errorf("Failed to read file %s: %v", filePath, err)
processor := NewFileProcessor(rootPath)
ctx := context.Background()
processor.ProcessWithContext(ctx, filePath, outCh)
}
// ProcessFileWithMonitor processes a file using a shared resource monitor.
func ProcessFileWithMonitor(ctx context.Context, filePath string, outCh chan<- WriteRequest, rootPath string, monitor *ResourceMonitor) {
processor := NewFileProcessorWithMonitor(rootPath, monitor)
processor.ProcessWithContext(ctx, filePath, outCh)
}
// Process handles file processing with the configured settings.
func (p *FileProcessor) Process(filePath string, outCh chan<- WriteRequest) {
ctx := context.Background()
p.ProcessWithContext(ctx, filePath, outCh)
}
// ProcessWithContext handles file processing with context and resource monitoring.
func (p *FileProcessor) ProcessWithContext(ctx context.Context, filePath string, outCh chan<- WriteRequest) {
// Create file processing context with timeout
fileCtx, fileCancel := p.resourceMonitor.CreateFileProcessingContext(ctx)
defer fileCancel()
// Wait for rate limiting
if err := p.resourceMonitor.WaitForRateLimit(fileCtx); err != nil {
if err == context.DeadlineExceeded {
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing timeout during rate limiting", filePath, nil),
"File processing timeout during rate limiting: %s", filePath,
)
}
return
}
// Compute path relative to rootPath, so /a/b/c/d.c becomes c/d.c
relPath, err := filepath.Rel(rootPath, filePath)
// Validate file and check resource limits
fileInfo, err := p.validateFileWithLimits(fileCtx, filePath)
if err != nil {
// Fallback if something unexpected happens
relPath = filePath
return // Error already logged
}
// Format: separator, then relative path, then content
formatted := fmt.Sprintf("\n---\n%s\n%s\n", relPath, string(content))
outCh <- WriteRequest{Path: relPath, Content: formatted}
// Acquire read slot for concurrent processing
if err := p.resourceMonitor.AcquireReadSlot(fileCtx); err != nil {
if err == context.DeadlineExceeded {
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing timeout waiting for read slot", filePath, nil),
"File processing timeout waiting for read slot: %s", filePath,
)
}
return
}
defer p.resourceMonitor.ReleaseReadSlot()
// Check hard memory limits before processing
if err := p.resourceMonitor.CheckHardMemoryLimit(); err != nil {
utils.LogErrorf(err, "Hard memory limit check failed for file: %s", filePath)
return
}
// Get relative path
relPath := p.getRelativePath(filePath)
// Process file with timeout
processStart := time.Now()
defer func() {
// Record successful processing
p.resourceMonitor.RecordFileProcessed(fileInfo.Size())
logrus.Debugf("File processed in %v: %s", time.Since(processStart), filePath)
}()
// Choose processing strategy based on file size
if fileInfo.Size() <= StreamThreshold {
p.processInMemoryWithContext(fileCtx, filePath, relPath, outCh)
} else {
p.processStreamingWithContext(fileCtx, filePath, relPath, outCh)
}
}
// validateFileWithLimits checks if the file can be processed with resource limits.
func (p *FileProcessor) validateFileWithLimits(ctx context.Context, filePath string) (os.FileInfo, error) {
// Check context cancellation
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
fileInfo, err := os.Stat(filePath)
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to stat file").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to stat file %s", filePath)
return nil, err
}
// Check traditional size limit
if fileInfo.Size() > p.sizeLimit {
context := map[string]interface{}{
"file_size": fileInfo.Size(),
"size_limit": p.sizeLimit,
}
utils.LogErrorf(
utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationSize,
fmt.Sprintf("file size (%d bytes) exceeds limit (%d bytes)", fileInfo.Size(), p.sizeLimit),
filePath,
context,
),
"Skipping large file %s", filePath,
)
return nil, fmt.Errorf("file too large")
}
// Check resource limits
if err := p.resourceMonitor.ValidateFileProcessing(filePath, fileInfo.Size()); err != nil {
utils.LogErrorf(err, "Resource limit validation failed for file: %s", filePath)
return nil, err
}
return fileInfo, nil
}
// getRelativePath computes the path relative to rootPath.
func (p *FileProcessor) getRelativePath(filePath string) string {
relPath, err := filepath.Rel(p.rootPath, filePath)
if err != nil {
return filePath // Fallback
}
return relPath
}
// processInMemoryWithContext loads the entire file into memory with context awareness.
func (p *FileProcessor) processInMemoryWithContext(ctx context.Context, filePath, relPath string, outCh chan<- WriteRequest) {
// Check context before reading
select {
case <-ctx.Done():
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled", filePath, nil),
"File processing cancelled: %s", filePath,
)
return
default:
}
content, err := os.ReadFile(filePath) // #nosec G304 - filePath is validated by walker
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to read file").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to read file %s", filePath)
return
}
// Check context again after reading
select {
case <-ctx.Done():
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled after read", filePath, nil),
"File processing cancelled after read: %s", filePath,
)
return
default:
}
// Try to send the result, but respect context cancellation
select {
case <-ctx.Done():
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "file processing cancelled before output", filePath, nil),
"File processing cancelled before output: %s", filePath,
)
return
case outCh <- WriteRequest{
Path: relPath,
Content: p.formatContent(relPath, string(content)),
IsStream: false,
}:
}
}
// processStreamingWithContext creates a streaming reader for large files with context awareness.
func (p *FileProcessor) processStreamingWithContext(ctx context.Context, filePath, relPath string, outCh chan<- WriteRequest) {
// Check context before creating reader
select {
case <-ctx.Done():
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "streaming processing cancelled", filePath, nil),
"Streaming processing cancelled: %s", filePath,
)
return
default:
}
reader := p.createStreamReaderWithContext(ctx, filePath, relPath)
if reader == nil {
return // Error already logged
}
// Try to send the result, but respect context cancellation
select {
case <-ctx.Done():
utils.LogErrorf(
utils.NewStructuredError(utils.ErrorTypeValidation, utils.CodeResourceLimitTimeout, "streaming processing cancelled before output", filePath, nil),
"Streaming processing cancelled before output: %s", filePath,
)
return
case outCh <- WriteRequest{
Path: relPath,
Content: "", // Empty since content is in Reader
IsStream: true,
Reader: reader,
}:
}
}
// createStreamReaderWithContext creates a reader that combines header and file content with context awareness.
func (p *FileProcessor) createStreamReaderWithContext(ctx context.Context, filePath, relPath string) io.Reader {
// Check context before opening file
select {
case <-ctx.Done():
return nil
default:
}
file, err := os.Open(filePath) // #nosec G304 - filePath is validated by walker
if err != nil {
structErr := utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingFileRead, "failed to open file for streaming").WithFilePath(filePath)
utils.LogErrorf(structErr, "Failed to open file for streaming %s", filePath)
return nil
}
// Note: file will be closed by the writer
header := p.formatHeader(relPath)
return io.MultiReader(header, file)
}
// formatContent formats the file content with header.
func (p *FileProcessor) formatContent(relPath, content string) string {
return fmt.Sprintf("\n---\n%s\n%s\n", relPath, content)
}
// formatHeader creates a reader for the file header.
func (p *FileProcessor) formatHeader(relPath string) io.Reader {
return strings.NewReader(fmt.Sprintf("\n---\n%s\n", relPath))
}

View File

@@ -1,15 +1,20 @@
package fileproc
package fileproc_test
import (
"os"
"strings"
"sync"
"testing"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/testutil"
)
func TestProcessFile(t *testing.T) {
// Reset and load default config to ensure proper file size limits
testutil.ResetViperConfig(t, "")
// Create a temporary file with known content.
tmpFile, err := os.CreateTemp("", "testfile")
tmpFile, err := os.CreateTemp(t.TempDir(), "testfile")
if err != nil {
t.Fatal(err)
}
@@ -30,12 +35,12 @@ func TestProcessFile(t *testing.T) {
return
}
ch := make(chan WriteRequest, 1)
ch := make(chan fileproc.WriteRequest, 1)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
ProcessFile(tmpFile.Name(), ch, "")
fileproc.ProcessFile(tmpFile.Name(), ch, "")
}()
wg.Wait()
close(ch)

107
fileproc/registry.go Normal file
View File

@@ -0,0 +1,107 @@
// Package fileproc provides file processing utilities.
package fileproc
import (
"path/filepath"
"strings"
"sync"
)
const minExtensionLength = 2
var (
registry *FileTypeRegistry
registryOnce sync.Once
)
// FileTypeRegistry manages file type detection and classification.
type FileTypeRegistry struct {
imageExts map[string]bool
binaryExts map[string]bool
languageMap map[string]string
// Cache for frequent lookups to avoid repeated string operations
extCache map[string]string // filename -> normalized extension
resultCache map[string]FileTypeResult // extension -> cached result
cacheMutex sync.RWMutex
maxCacheSize int
// Performance statistics
stats RegistryStats
}
// RegistryStats tracks performance metrics for the registry.
type RegistryStats struct {
TotalLookups uint64
CacheHits uint64
CacheMisses uint64
CacheEvictions uint64
}
// FileTypeResult represents cached file type detection results.
type FileTypeResult struct {
IsImage bool
IsBinary bool
Language string
Extension string
}
// initRegistry initializes the default file type registry with common extensions.
func initRegistry() *FileTypeRegistry {
return &FileTypeRegistry{
imageExts: getImageExtensions(),
binaryExts: getBinaryExtensions(),
languageMap: getLanguageMap(),
extCache: make(map[string]string, 1000), // Cache for extension normalization
resultCache: make(map[string]FileTypeResult, 500), // Cache for type results
maxCacheSize: 500,
}
}
// getRegistry returns the singleton file type registry, creating it if necessary.
func getRegistry() *FileTypeRegistry {
registryOnce.Do(func() {
registry = initRegistry()
})
return registry
}
// GetDefaultRegistry returns the default file type registry.
func GetDefaultRegistry() *FileTypeRegistry {
return getRegistry()
}
// GetStats returns a copy of the current registry statistics.
func (r *FileTypeRegistry) GetStats() RegistryStats {
r.cacheMutex.RLock()
defer r.cacheMutex.RUnlock()
return r.stats
}
// GetCacheInfo returns current cache size information.
func (r *FileTypeRegistry) GetCacheInfo() (extCacheSize, resultCacheSize, maxCacheSize int) {
r.cacheMutex.RLock()
defer r.cacheMutex.RUnlock()
return len(r.extCache), len(r.resultCache), r.maxCacheSize
}
// ResetRegistryForTesting resets the registry to its initial state.
// This function should only be used in tests.
func ResetRegistryForTesting() {
registryOnce = sync.Once{}
registry = nil
}
// normalizeExtension extracts and normalizes the file extension.
func normalizeExtension(filename string) string {
return strings.ToLower(filepath.Ext(filename))
}
// isSpecialFile checks if the filename matches special cases like .DS_Store.
func isSpecialFile(filename string, extensions map[string]bool) bool {
if filepath.Ext(filename) == "" {
basename := strings.ToLower(filepath.Base(filename))
return extensions[basename]
}
return false
}

View File

@@ -0,0 +1,59 @@
package fileproc
import (
"context"
"sync/atomic"
"time"
)
// AcquireReadSlot attempts to acquire a slot for concurrent file reading.
func (rm *ResourceMonitor) AcquireReadSlot(ctx context.Context) error {
if !rm.enabled {
return nil
}
// Wait for available read slot
for {
currentReads := atomic.LoadInt64(&rm.concurrentReads)
if currentReads < int64(rm.maxConcurrentReads) {
if atomic.CompareAndSwapInt64(&rm.concurrentReads, currentReads, currentReads+1) {
break
}
// CAS failed, retry
continue
}
// Wait and retry
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(time.Millisecond):
// Continue loop
}
}
return nil
}
// ReleaseReadSlot releases a concurrent reading slot.
func (rm *ResourceMonitor) ReleaseReadSlot() {
if rm.enabled {
atomic.AddInt64(&rm.concurrentReads, -1)
}
}
// CreateFileProcessingContext creates a context with file processing timeout.
func (rm *ResourceMonitor) CreateFileProcessingContext(parent context.Context) (context.Context, context.CancelFunc) {
if !rm.enabled || rm.fileProcessingTimeout <= 0 {
return parent, func() {}
}
return context.WithTimeout(parent, rm.fileProcessingTimeout)
}
// CreateOverallProcessingContext creates a context with overall processing timeout.
func (rm *ResourceMonitor) CreateOverallProcessingContext(parent context.Context) (context.Context, context.CancelFunc) {
if !rm.enabled || rm.overallTimeout <= 0 {
return parent, func() {}
}
return context.WithTimeout(parent, rm.overallTimeout)
}

View File

@@ -0,0 +1,95 @@
package fileproc
import (
"context"
"testing"
"time"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/testutil"
)
func TestResourceMonitor_ConcurrentReadsLimit(t *testing.T) {
testutil.ResetViperConfig(t, "")
// Set a low concurrent reads limit for testing
viper.Set("resourceLimits.enabled", true)
viper.Set("resourceLimits.maxConcurrentReads", 2)
rm := NewResourceMonitor()
defer rm.Close()
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
// First read slot should succeed
err := rm.AcquireReadSlot(ctx)
if err != nil {
t.Errorf("Expected no error for first read slot, got %v", err)
}
// Second read slot should succeed
err = rm.AcquireReadSlot(ctx)
if err != nil {
t.Errorf("Expected no error for second read slot, got %v", err)
}
// Third read slot should timeout (context deadline exceeded)
err = rm.AcquireReadSlot(ctx)
if err == nil {
t.Error("Expected timeout error for third read slot, got nil")
}
// Release one slot and try again
rm.ReleaseReadSlot()
// Create new context for the next attempt
ctx2, cancel2 := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel2()
err = rm.AcquireReadSlot(ctx2)
if err != nil {
t.Errorf("Expected no error after releasing a slot, got %v", err)
}
// Clean up remaining slots
rm.ReleaseReadSlot()
rm.ReleaseReadSlot()
}
func TestResourceMonitor_TimeoutContexts(t *testing.T) {
testutil.ResetViperConfig(t, "")
// Set short timeouts for testing
viper.Set("resourceLimits.enabled", true)
viper.Set("resourceLimits.fileProcessingTimeoutSec", 1) // 1 second
viper.Set("resourceLimits.overallTimeoutSec", 2) // 2 seconds
rm := NewResourceMonitor()
defer rm.Close()
parentCtx := context.Background()
// Test file processing context
fileCtx, fileCancel := rm.CreateFileProcessingContext(parentCtx)
defer fileCancel()
deadline, ok := fileCtx.Deadline()
if !ok {
t.Error("Expected file processing context to have a deadline")
} else if time.Until(deadline) > time.Second+100*time.Millisecond {
t.Error("File processing timeout appears to be too long")
}
// Test overall processing context
overallCtx, overallCancel := rm.CreateOverallProcessingContext(parentCtx)
defer overallCancel()
deadline, ok = overallCtx.Deadline()
if !ok {
t.Error("Expected overall processing context to have a deadline")
} else if time.Until(deadline) > 2*time.Second+100*time.Millisecond {
t.Error("Overall processing timeout appears to be too long")
}
}

View File

@@ -0,0 +1,81 @@
package fileproc
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/testutil"
)
func TestResourceMonitor_Integration(t *testing.T) {
// Create temporary test directory
tempDir := t.TempDir()
// Create test files
testFiles := []string{"test1.txt", "test2.txt", "test3.txt"}
for _, filename := range testFiles {
testutil.CreateTestFile(t, tempDir, filename, []byte("test content"))
}
testutil.ResetViperConfig(t, "")
// Configure resource limits
viper.Set("resourceLimits.enabled", true)
viper.Set("resourceLimits.maxFiles", 5)
viper.Set("resourceLimits.maxTotalSize", 1024*1024) // 1MB
viper.Set("resourceLimits.fileProcessingTimeoutSec", 10)
viper.Set("resourceLimits.maxConcurrentReads", 3)
rm := NewResourceMonitor()
defer rm.Close()
ctx := context.Background()
// Test file processing workflow
for _, filename := range testFiles {
filePath := filepath.Join(tempDir, filename)
fileInfo, err := os.Stat(filePath)
if err != nil {
t.Fatalf("Failed to stat test file %s: %v", filePath, err)
}
// Validate file can be processed
err = rm.ValidateFileProcessing(filePath, fileInfo.Size())
if err != nil {
t.Errorf("Failed to validate file %s: %v", filePath, err)
continue
}
// Acquire read slot
err = rm.AcquireReadSlot(ctx)
if err != nil {
t.Errorf("Failed to acquire read slot for %s: %v", filePath, err)
continue
}
// Check memory limits
err = rm.CheckHardMemoryLimit()
if err != nil {
t.Errorf("Memory limit check failed for %s: %v", filePath, err)
}
// Record processing
rm.RecordFileProcessed(fileInfo.Size())
// Release read slot
rm.ReleaseReadSlot()
}
// Verify final metrics
metrics := rm.GetMetrics()
if metrics.FilesProcessed != int64(len(testFiles)) {
t.Errorf("Expected %d files processed, got %d", len(testFiles), metrics.FilesProcessed)
}
// Test resource limit logging
rm.LogResourceInfo()
}

View File

@@ -0,0 +1,79 @@
package fileproc
import (
"runtime"
"sync/atomic"
"time"
"github.com/sirupsen/logrus"
)
// RecordFileProcessed records that a file has been successfully processed.
func (rm *ResourceMonitor) RecordFileProcessed(fileSize int64) {
if rm.enabled {
atomic.AddInt64(&rm.filesProcessed, 1)
atomic.AddInt64(&rm.totalSizeProcessed, fileSize)
}
}
// GetMetrics returns current resource usage metrics.
func (rm *ResourceMonitor) GetMetrics() ResourceMetrics {
if !rm.enableResourceMon {
return ResourceMetrics{}
}
rm.mu.RLock()
defer rm.mu.RUnlock()
var m runtime.MemStats
runtime.ReadMemStats(&m)
filesProcessed := atomic.LoadInt64(&rm.filesProcessed)
totalSize := atomic.LoadInt64(&rm.totalSizeProcessed)
duration := time.Since(rm.startTime)
avgFileSize := float64(0)
if filesProcessed > 0 {
avgFileSize = float64(totalSize) / float64(filesProcessed)
}
processingRate := float64(0)
if duration.Seconds() > 0 {
processingRate = float64(filesProcessed) / duration.Seconds()
}
// Collect violations
violations := make([]string, 0, len(rm.violationLogged))
for violation := range rm.violationLogged {
violations = append(violations, violation)
}
return ResourceMetrics{
FilesProcessed: filesProcessed,
TotalSizeProcessed: totalSize,
ConcurrentReads: atomic.LoadInt64(&rm.concurrentReads),
ProcessingDuration: duration,
AverageFileSize: avgFileSize,
ProcessingRate: processingRate,
MemoryUsageMB: int64(m.Alloc) / 1024 / 1024,
MaxMemoryUsageMB: int64(rm.hardMemoryLimitMB),
ViolationsDetected: violations,
DegradationActive: rm.degradationActive,
EmergencyStopActive: rm.emergencyStopRequested,
LastUpdated: time.Now(),
}
}
// LogResourceInfo logs current resource limit configuration.
func (rm *ResourceMonitor) LogResourceInfo() {
if rm.enabled {
logrus.Infof("Resource limits enabled: maxFiles=%d, maxTotalSize=%dMB, fileTimeout=%ds, overallTimeout=%ds",
rm.maxFiles, rm.maxTotalSize/1024/1024, int(rm.fileProcessingTimeout.Seconds()), int(rm.overallTimeout.Seconds()))
logrus.Infof("Resource limits: maxConcurrentReads=%d, rateLimitFPS=%d, hardMemoryMB=%d",
rm.maxConcurrentReads, rm.rateLimitFilesPerSec, rm.hardMemoryLimitMB)
logrus.Infof("Resource features: gracefulDegradation=%v, monitoring=%v",
rm.enableGracefulDegr, rm.enableResourceMon)
} else {
logrus.Info("Resource limits disabled")
}
}

View File

@@ -0,0 +1,49 @@
package fileproc
import (
"testing"
"time"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/testutil"
)
func TestResourceMonitor_Metrics(t *testing.T) {
testutil.ResetViperConfig(t, "")
viper.Set("resourceLimits.enabled", true)
viper.Set("resourceLimits.enableResourceMonitoring", true)
rm := NewResourceMonitor()
defer rm.Close()
// Process some files to generate metrics
rm.RecordFileProcessed(1000)
rm.RecordFileProcessed(2000)
rm.RecordFileProcessed(500)
metrics := rm.GetMetrics()
// Verify metrics
if metrics.FilesProcessed != 3 {
t.Errorf("Expected 3 files processed, got %d", metrics.FilesProcessed)
}
if metrics.TotalSizeProcessed != 3500 {
t.Errorf("Expected total size 3500, got %d", metrics.TotalSizeProcessed)
}
expectedAvgSize := float64(3500) / float64(3)
if metrics.AverageFileSize != expectedAvgSize {
t.Errorf("Expected average file size %.2f, got %.2f", expectedAvgSize, metrics.AverageFileSize)
}
if metrics.ProcessingRate <= 0 {
t.Error("Expected positive processing rate")
}
if !metrics.LastUpdated.After(time.Now().Add(-time.Second)) {
t.Error("Expected recent LastUpdated timestamp")
}
}

View File

@@ -0,0 +1,36 @@
package fileproc
import (
"context"
"time"
"github.com/sirupsen/logrus"
)
// WaitForRateLimit waits for rate limiting if enabled.
func (rm *ResourceMonitor) WaitForRateLimit(ctx context.Context) error {
if !rm.enabled || rm.rateLimitFilesPerSec <= 0 {
return nil
}
select {
case <-ctx.Done():
return ctx.Err()
case <-rm.rateLimitChan:
return nil
case <-time.After(time.Second): // Fallback timeout
logrus.Warn("Rate limiting timeout exceeded, continuing without rate limit")
return nil
}
}
// rateLimiterRefill refills the rate limiting channel periodically.
func (rm *ResourceMonitor) rateLimiterRefill() {
for range rm.rateLimiter.C {
select {
case rm.rateLimitChan <- struct{}{}:
default:
// Channel is full, skip
}
}
}

View File

@@ -0,0 +1,40 @@
package fileproc
import (
"context"
"testing"
"time"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/testutil"
)
func TestResourceMonitor_RateLimiting(t *testing.T) {
testutil.ResetViperConfig(t, "")
// Enable rate limiting with a low rate for testing
viper.Set("resourceLimits.enabled", true)
viper.Set("resourceLimits.rateLimitFilesPerSec", 5) // 5 files per second
rm := NewResourceMonitor()
defer rm.Close()
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
// First few requests should succeed quickly
start := time.Now()
for i := 0; i < 3; i++ {
err := rm.WaitForRateLimit(ctx)
if err != nil {
t.Errorf("Expected no error for rate limit wait %d, got %v", i, err)
}
}
// Should have taken some time due to rate limiting
duration := time.Since(start)
if duration < 200*time.Millisecond {
t.Logf("Rate limiting may not be working as expected, took only %v", duration)
}
}

View File

@@ -0,0 +1,22 @@
package fileproc
// IsEmergencyStopActive returns whether emergency stop is active.
func (rm *ResourceMonitor) IsEmergencyStopActive() bool {
rm.mu.RLock()
defer rm.mu.RUnlock()
return rm.emergencyStopRequested
}
// IsDegradationActive returns whether degradation mode is active.
func (rm *ResourceMonitor) IsDegradationActive() bool {
rm.mu.RLock()
defer rm.mu.RUnlock()
return rm.degradationActive
}
// Close cleans up the resource monitor.
func (rm *ResourceMonitor) Close() {
if rm.rateLimiter != nil {
rm.rateLimiter.Stop()
}
}

View File

@@ -0,0 +1,108 @@
package fileproc
import (
"sync"
"time"
"github.com/ivuorinen/gibidify/config"
)
// ResourceMonitor monitors resource usage and enforces limits to prevent DoS attacks.
type ResourceMonitor struct {
enabled bool
maxFiles int
maxTotalSize int64
fileProcessingTimeout time.Duration
overallTimeout time.Duration
maxConcurrentReads int
rateLimitFilesPerSec int
hardMemoryLimitMB int
enableGracefulDegr bool
enableResourceMon bool
// Current state tracking
filesProcessed int64
totalSizeProcessed int64
concurrentReads int64
startTime time.Time
lastRateLimitCheck time.Time
hardMemoryLimitBytes int64
// Rate limiting
rateLimiter *time.Ticker
rateLimitChan chan struct{}
// Synchronization
mu sync.RWMutex
violationLogged map[string]bool
degradationActive bool
emergencyStopRequested bool
}
// ResourceMetrics holds comprehensive resource usage metrics.
type ResourceMetrics struct {
FilesProcessed int64 `json:"files_processed"`
TotalSizeProcessed int64 `json:"total_size_processed"`
ConcurrentReads int64 `json:"concurrent_reads"`
ProcessingDuration time.Duration `json:"processing_duration"`
AverageFileSize float64 `json:"average_file_size"`
ProcessingRate float64 `json:"processing_rate_files_per_sec"`
MemoryUsageMB int64 `json:"memory_usage_mb"`
MaxMemoryUsageMB int64 `json:"max_memory_usage_mb"`
ViolationsDetected []string `json:"violations_detected"`
DegradationActive bool `json:"degradation_active"`
EmergencyStopActive bool `json:"emergency_stop_active"`
LastUpdated time.Time `json:"last_updated"`
}
// ResourceViolation represents a detected resource limit violation.
type ResourceViolation struct {
Type string `json:"type"`
Message string `json:"message"`
Current interface{} `json:"current"`
Limit interface{} `json:"limit"`
Timestamp time.Time `json:"timestamp"`
Context map[string]interface{} `json:"context"`
}
// NewResourceMonitor creates a new resource monitor with configuration.
func NewResourceMonitor() *ResourceMonitor {
rm := &ResourceMonitor{
enabled: config.GetResourceLimitsEnabled(),
maxFiles: config.GetMaxFiles(),
maxTotalSize: config.GetMaxTotalSize(),
fileProcessingTimeout: time.Duration(config.GetFileProcessingTimeoutSec()) * time.Second,
overallTimeout: time.Duration(config.GetOverallTimeoutSec()) * time.Second,
maxConcurrentReads: config.GetMaxConcurrentReads(),
rateLimitFilesPerSec: config.GetRateLimitFilesPerSec(),
hardMemoryLimitMB: config.GetHardMemoryLimitMB(),
enableGracefulDegr: config.GetEnableGracefulDegradation(),
enableResourceMon: config.GetEnableResourceMonitoring(),
startTime: time.Now(),
lastRateLimitCheck: time.Now(),
violationLogged: make(map[string]bool),
hardMemoryLimitBytes: int64(config.GetHardMemoryLimitMB()) * 1024 * 1024,
}
// Initialize rate limiter if rate limiting is enabled
if rm.enabled && rm.rateLimitFilesPerSec > 0 {
interval := time.Second / time.Duration(rm.rateLimitFilesPerSec)
rm.rateLimiter = time.NewTicker(interval)
rm.rateLimitChan = make(chan struct{}, rm.rateLimitFilesPerSec)
// Pre-fill the rate limit channel
for i := 0; i < rm.rateLimitFilesPerSec; i++ {
select {
case rm.rateLimitChan <- struct{}{}:
default:
goto rateLimitFull
}
}
rateLimitFull:
// Start rate limiter refill goroutine
go rm.rateLimiterRefill()
}
return rm
}

View File

@@ -0,0 +1,74 @@
package fileproc
import (
"context"
"testing"
"time"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/testutil"
)
func TestResourceMonitor_NewResourceMonitor(t *testing.T) {
// Reset viper for clean test state
testutil.ResetViperConfig(t, "")
rm := NewResourceMonitor()
if rm == nil {
t.Fatal("NewResourceMonitor() returned nil")
}
// Test default values are set correctly
if !rm.enabled {
t.Error("Expected resource monitor to be enabled by default")
}
if rm.maxFiles != config.DefaultMaxFiles {
t.Errorf("Expected maxFiles to be %d, got %d", config.DefaultMaxFiles, rm.maxFiles)
}
if rm.maxTotalSize != config.DefaultMaxTotalSize {
t.Errorf("Expected maxTotalSize to be %d, got %d", config.DefaultMaxTotalSize, rm.maxTotalSize)
}
if rm.fileProcessingTimeout != time.Duration(config.DefaultFileProcessingTimeoutSec)*time.Second {
t.Errorf("Expected fileProcessingTimeout to be %v, got %v",
time.Duration(config.DefaultFileProcessingTimeoutSec)*time.Second, rm.fileProcessingTimeout)
}
// Clean up
rm.Close()
}
func TestResourceMonitor_DisabledResourceLimits(t *testing.T) {
// Reset viper for clean test state
testutil.ResetViperConfig(t, "")
// Set resource limits disabled
viper.Set("resourceLimits.enabled", false)
rm := NewResourceMonitor()
defer rm.Close()
// Test that validation passes when disabled
err := rm.ValidateFileProcessing("/tmp/test.txt", 1000)
if err != nil {
t.Errorf("Expected no error when resource limits disabled, got %v", err)
}
// Test that read slot acquisition works when disabled
ctx := context.Background()
err = rm.AcquireReadSlot(ctx)
if err != nil {
t.Errorf("Expected no error when acquiring read slot with disabled limits, got %v", err)
}
rm.ReleaseReadSlot()
// Test that rate limiting is bypassed when disabled
err = rm.WaitForRateLimit(ctx)
if err != nil {
t.Errorf("Expected no error when rate limiting disabled, got %v", err)
}
}

View File

@@ -0,0 +1,148 @@
package fileproc
import (
"runtime"
"sync/atomic"
"time"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/utils"
)
// ValidateFileProcessing checks if a file can be processed based on resource limits.
func (rm *ResourceMonitor) ValidateFileProcessing(filePath string, fileSize int64) error {
if !rm.enabled {
return nil
}
rm.mu.RLock()
defer rm.mu.RUnlock()
// Check if emergency stop is active
if rm.emergencyStopRequested {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitMemory,
"processing stopped due to emergency memory condition",
filePath,
map[string]interface{}{
"emergency_stop_active": true,
},
)
}
// Check file count limit
currentFiles := atomic.LoadInt64(&rm.filesProcessed)
if int(currentFiles) >= rm.maxFiles {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitFiles,
"maximum file count limit exceeded",
filePath,
map[string]interface{}{
"current_files": currentFiles,
"max_files": rm.maxFiles,
},
)
}
// Check total size limit
currentTotalSize := atomic.LoadInt64(&rm.totalSizeProcessed)
if currentTotalSize+fileSize > rm.maxTotalSize {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitTotalSize,
"maximum total size limit would be exceeded",
filePath,
map[string]interface{}{
"current_total_size": currentTotalSize,
"file_size": fileSize,
"max_total_size": rm.maxTotalSize,
},
)
}
// Check overall timeout
if time.Since(rm.startTime) > rm.overallTimeout {
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitTimeout,
"overall processing timeout exceeded",
filePath,
map[string]interface{}{
"processing_duration": time.Since(rm.startTime),
"overall_timeout": rm.overallTimeout,
},
)
}
return nil
}
// CheckHardMemoryLimit checks if hard memory limit is exceeded and takes action.
func (rm *ResourceMonitor) CheckHardMemoryLimit() error {
if !rm.enabled || rm.hardMemoryLimitMB <= 0 {
return nil
}
var m runtime.MemStats
runtime.ReadMemStats(&m)
currentMemory := int64(m.Alloc)
if currentMemory > rm.hardMemoryLimitBytes {
rm.mu.Lock()
defer rm.mu.Unlock()
// Log violation if not already logged
violationKey := "hard_memory_limit"
if !rm.violationLogged[violationKey] {
logrus.Errorf("Hard memory limit exceeded: %dMB > %dMB",
currentMemory/1024/1024, rm.hardMemoryLimitMB)
rm.violationLogged[violationKey] = true
}
if rm.enableGracefulDegr {
// Force garbage collection
runtime.GC()
// Check again after GC
runtime.ReadMemStats(&m)
currentMemory = int64(m.Alloc)
if currentMemory > rm.hardMemoryLimitBytes {
// Still over limit, activate emergency stop
rm.emergencyStopRequested = true
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitMemory,
"hard memory limit exceeded, emergency stop activated",
"",
map[string]interface{}{
"current_memory_mb": currentMemory / 1024 / 1024,
"limit_mb": rm.hardMemoryLimitMB,
"emergency_stop": true,
},
)
} else {
// Memory freed by GC, continue with degradation
rm.degradationActive = true
logrus.Info("Memory freed by garbage collection, continuing with degradation mode")
}
} else {
// No graceful degradation, hard stop
return utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeResourceLimitMemory,
"hard memory limit exceeded",
"",
map[string]interface{}{
"current_memory_mb": currentMemory / 1024 / 1024,
"limit_mb": rm.hardMemoryLimitMB,
},
)
}
}
return nil
}

View File

@@ -0,0 +1,88 @@
package fileproc
import (
"testing"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/testutil"
"github.com/ivuorinen/gibidify/utils"
)
func TestResourceMonitor_FileCountLimit(t *testing.T) {
testutil.ResetViperConfig(t, "")
// Set a very low file count limit for testing
viper.Set("resourceLimits.enabled", true)
viper.Set("resourceLimits.maxFiles", 2)
rm := NewResourceMonitor()
defer rm.Close()
// First file should pass
err := rm.ValidateFileProcessing("/tmp/file1.txt", 100)
if err != nil {
t.Errorf("Expected no error for first file, got %v", err)
}
rm.RecordFileProcessed(100)
// Second file should pass
err = rm.ValidateFileProcessing("/tmp/file2.txt", 100)
if err != nil {
t.Errorf("Expected no error for second file, got %v", err)
}
rm.RecordFileProcessed(100)
// Third file should fail
err = rm.ValidateFileProcessing("/tmp/file3.txt", 100)
if err == nil {
t.Error("Expected error for third file (exceeds limit), got nil")
}
// Verify it's the correct error type
structErr, ok := err.(*utils.StructuredError)
if !ok {
t.Errorf("Expected StructuredError, got %T", err)
} else if structErr.Code != utils.CodeResourceLimitFiles {
t.Errorf("Expected error code %s, got %s", utils.CodeResourceLimitFiles, structErr.Code)
}
}
func TestResourceMonitor_TotalSizeLimit(t *testing.T) {
testutil.ResetViperConfig(t, "")
// Set a low total size limit for testing (1KB)
viper.Set("resourceLimits.enabled", true)
viper.Set("resourceLimits.maxTotalSize", 1024)
rm := NewResourceMonitor()
defer rm.Close()
// First small file should pass
err := rm.ValidateFileProcessing("/tmp/small.txt", 500)
if err != nil {
t.Errorf("Expected no error for small file, got %v", err)
}
rm.RecordFileProcessed(500)
// Second small file should pass
err = rm.ValidateFileProcessing("/tmp/small2.txt", 400)
if err != nil {
t.Errorf("Expected no error for second small file, got %v", err)
}
rm.RecordFileProcessed(400)
// Large file that would exceed limit should fail
err = rm.ValidateFileProcessing("/tmp/large.txt", 200)
if err == nil {
t.Error("Expected error for file that would exceed size limit, got nil")
}
// Verify it's the correct error type
structErr, ok := err.(*utils.StructuredError)
if !ok {
t.Errorf("Expected StructuredError, got %T", err)
} else if structErr.Code != utils.CodeResourceLimitTotalSize {
t.Errorf("Expected error code %s, got %s", utils.CodeResourceLimitTotalSize, structErr.Code)
}
}

View File

@@ -4,10 +4,8 @@ package fileproc
import (
"os"
"path/filepath"
"strings"
"github.com/ivuorinen/gibidify/config"
ignore "github.com/sabhiram/go-gitignore"
"github.com/ivuorinen/gibidify/utils"
)
// Walker defines an interface for scanning directories.
@@ -18,22 +16,25 @@ type Walker interface {
// ProdWalker implements Walker using a custom directory walker that
// respects .gitignore and .ignore files, configuration-defined ignore directories,
// and ignores binary and image files by default.
type ProdWalker struct{}
type ProdWalker struct {
filter *FileFilter
}
// ignoreRule holds an ignore matcher along with the base directory where it was loaded.
type ignoreRule struct {
base string
gi *ignore.GitIgnore
// NewProdWalker creates a new production walker with current configuration.
func NewProdWalker() *ProdWalker {
return &ProdWalker{
filter: NewFileFilter(),
}
}
// Walk scans the given root directory recursively and returns a slice of file paths
// that are not ignored based on .gitignore/.ignore files, the configuration, or the default binary/image filter.
func (pw ProdWalker) Walk(root string) ([]string, error) {
absRoot, err := filepath.Abs(root)
func (w *ProdWalker) Walk(root string) ([]string, error) {
absRoot, err := utils.GetAbsolutePath(root)
if err != nil {
return nil, err
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSPathResolution, "failed to resolve root path").WithFilePath(root)
}
return walkDir(absRoot, absRoot, []ignoreRule{})
return w.walkDir(absRoot, []ignoreRule{})
}
// walkDir recursively walks the directory tree starting at currentDir.
@@ -41,122 +42,34 @@ func (pw ProdWalker) Walk(root string) ([]string, error) {
// appends the corresponding rules to the inherited list. Each file/directory is
// then checked against the accumulated ignore rules, the configuration's list of ignored directories,
// and a default filter that ignores binary and image files.
func walkDir(root string, currentDir string, parentRules []ignoreRule) ([]string, error) {
func (w *ProdWalker) walkDir(currentDir string, parentRules []ignoreRule) ([]string, error) {
var results []string
entries, err := os.ReadDir(currentDir)
if err != nil {
return nil, err
return nil, utils.WrapError(err, utils.ErrorTypeFileSystem, utils.CodeFSAccess, "failed to read directory").WithFilePath(currentDir)
}
// Start with the parent's ignore rules.
rules := make([]ignoreRule, len(parentRules))
copy(rules, parentRules)
// Check for .gitignore and .ignore files in the current directory.
for _, fileName := range []string{".gitignore", ".ignore"} {
ignorePath := filepath.Join(currentDir, fileName)
if info, err := os.Stat(ignorePath); err == nil && !info.IsDir() {
gi, err := ignore.CompileIgnoreFile(ignorePath)
if err == nil {
rules = append(rules, ignoreRule{
base: currentDir,
gi: gi,
})
}
}
}
// Get the list of directories to ignore from configuration.
ignoredDirs := config.GetIgnoredDirectories()
sizeLimit := config.GetFileSizeLimit() // e.g., 5242880 for 5 MB
rules := loadIgnoreRules(currentDir, parentRules)
for _, entry := range entries {
fullPath := filepath.Join(currentDir, entry.Name())
// For directories, check if its name is in the config ignore list.
if entry.IsDir() {
for _, d := range ignoredDirs {
if entry.Name() == d {
// Skip this directory entirely.
goto SkipEntry
}
}
} else {
// Check if file exceeds the configured size limit.
info, err := entry.Info()
if err == nil && info.Size() > sizeLimit {
goto SkipEntry
}
// For files, apply the default filter to ignore binary and image files.
if isBinaryOrImage(fullPath) {
goto SkipEntry
}
if w.filter.shouldSkipEntry(entry, fullPath, rules) {
continue
}
// Check accumulated ignore rules.
for _, rule := range rules {
// Compute the path relative to the base where the ignore rule was defined.
rel, err := filepath.Rel(rule.base, fullPath)
if err != nil {
continue
}
// If the rule matches, skip this entry.
if rule.gi.MatchesPath(rel) {
goto SkipEntry
}
}
// If not ignored, then process the entry.
// Process entry
if entry.IsDir() {
subFiles, err := walkDir(root, fullPath, rules)
subFiles, err := w.walkDir(fullPath, rules)
if err != nil {
return nil, err
return nil, utils.WrapError(err, utils.ErrorTypeProcessing, utils.CodeProcessingTraversal, "failed to traverse subdirectory").WithFilePath(fullPath)
}
results = append(results, subFiles...)
} else {
results = append(results, fullPath)
}
SkipEntry:
continue
}
return results, nil
}
// isBinaryOrImage checks if a file should be considered binary or an image based on its extension.
// The check is case-insensitive.
func isBinaryOrImage(filePath string) bool {
ext := strings.ToLower(filepath.Ext(filePath))
// Common image file extensions.
imageExtensions := map[string]bool{
".png": true,
".jpg": true,
".jpeg": true,
".gif": true,
".bmp": true,
".tiff": true,
".ico": true,
".svg": true,
".webp": true,
}
// Common binary file extensions.
binaryExtensions := map[string]bool{
".exe": true,
".dll": true,
".so": true,
".bin": true,
".dat": true,
".zip": true,
".tar": true,
".gz": true,
".7z": true,
".rar": true,
".DS_Store": true,
}
if imageExtensions[ext] || binaryExtensions[ext] {
return true
}
return false
}

103
fileproc/walker_test.go Normal file
View File

@@ -0,0 +1,103 @@
package fileproc_test
import (
"path/filepath"
"testing"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/ivuorinen/gibidify/testutil"
)
func TestProdWalkerWithIgnore(t *testing.T) {
// Create a temporary directory structure.
rootDir := t.TempDir()
subDir := testutil.CreateTestDirectory(t, rootDir, "vendor")
// Write sample files
testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{
{Name: "file1.go", Content: "content"},
{Name: "file2.txt", Content: "content"},
})
testutil.CreateTestFile(t, subDir, "file_in_vendor.txt", []byte("content")) // should be ignored
// .gitignore that ignores *.txt and itself
gitignoreContent := `*.txt
.gitignore
`
testutil.CreateTestFile(t, rootDir, ".gitignore", []byte(gitignoreContent))
// Initialize config to ignore "vendor" directory
testutil.ResetViperConfig(t, "")
viper.Set("ignoreDirectories", []string{"vendor"})
// Run walker
w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir)
testutil.MustSucceed(t, err, "walking directory")
// We expect only file1.go to appear
if len(found) != 1 {
t.Errorf("Expected 1 file to pass filters, got %d: %v", len(found), found)
}
if len(found) == 1 && filepath.Base(found[0]) != "file1.go" {
t.Errorf("Expected file1.go, got %s", found[0])
}
}
func TestProdWalkerBinaryCheck(t *testing.T) {
rootDir := t.TempDir()
// Create test files
testutil.CreateTestFiles(t, rootDir, []testutil.FileSpec{
{Name: "somefile.exe", Content: "fake-binary-content"},
{Name: "keep.go", Content: "package main"},
})
// Reset and load default config
testutil.ResetViperConfig(t, "")
// Reset FileTypeRegistry to ensure clean state
fileproc.ResetRegistryForTesting()
// Run walker
w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir)
testutil.MustSucceed(t, err, "walking directory")
// Only "keep.go" should be returned
if len(found) != 1 {
t.Errorf("Expected 1 file, got %d: %v", len(found), found)
}
if len(found) == 1 && filepath.Base(found[0]) != "keep.go" {
t.Errorf("Expected keep.go in results, got %s", found[0])
}
}
func TestProdWalkerSizeLimit(t *testing.T) {
rootDir := t.TempDir()
// Create test files
largeFileData := make([]byte, 6*1024*1024) // 6 MB
testutil.CreateTestFile(t, rootDir, "largefile.txt", largeFileData)
testutil.CreateTestFile(t, rootDir, "smallfile.go", []byte("package main"))
// Reset and load default config, which sets size limit to 5 MB
testutil.ResetViperConfig(t, "")
w := fileproc.NewProdWalker()
found, err := w.Walk(rootDir)
if err != nil {
t.Fatalf("Walk returned error: %v", err)
}
// We should only get the small file
if len(found) != 1 {
t.Errorf("Expected 1 file under size limit, got %d", len(found))
}
if len(found) == 1 && filepath.Base(found[0]) != "smallfile.go" {
t.Errorf("Expected smallfile.go, got %s", found[0])
}
}

View File

@@ -1,94 +1,34 @@
// Package fileproc provides a writer for the output of the file processor.
package fileproc
import (
"encoding/json"
"fmt"
"os"
"github.com/sirupsen/logrus"
"gopkg.in/yaml.v3"
"github.com/ivuorinen/gibidify/utils"
)
// FileData represents a single file's path and content.
type FileData struct {
Path string `json:"path" yaml:"path"`
Content string `json:"content" yaml:"content"`
}
// OutputData represents the full output structure.
type OutputData struct {
Prefix string `json:"prefix,omitempty" yaml:"prefix,omitempty"`
Files []FileData `json:"files" yaml:"files"`
Suffix string `json:"suffix,omitempty" yaml:"suffix,omitempty"`
}
// StartWriter writes the output in the specified format.
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format string, prefix, suffix string) {
var files []FileData
// Read from channel until closed
for req := range writeCh {
files = append(files, FileData{Path: req.Path, Content: req.Content})
}
// Create output struct
output := OutputData{Prefix: prefix, Files: files, Suffix: suffix}
// Serialize based on format
var outputData []byte
var err error
// StartWriter writes the output in the specified format with memory optimization.
func StartWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, format, prefix, suffix string) {
switch format {
case "json":
outputData, err = json.MarshalIndent(output, "", " ")
case "yaml":
outputData, err = yaml.Marshal(output)
case "markdown":
outputData = []byte(formatMarkdown(output))
startMarkdownWriter(outFile, writeCh, done, prefix, suffix)
case "json":
startJSONWriter(outFile, writeCh, done, prefix, suffix)
case "yaml":
startYAMLWriter(outFile, writeCh, done, prefix, suffix)
default:
err = fmt.Errorf("unsupported format: %s", format)
}
if err != nil {
logrus.Errorf("Error encoding output: %v", err)
context := map[string]interface{}{
"format": format,
}
err := utils.NewStructuredError(
utils.ErrorTypeValidation,
utils.CodeValidationFormat,
fmt.Sprintf("unsupported format: %s", format),
"",
context,
)
utils.LogError("Failed to encode output", err)
close(done)
return
}
// Write to file
if _, err := outFile.Write(outputData); err != nil {
logrus.Errorf("Error writing to file: %v", err)
}
close(done)
}
func formatMarkdown(output OutputData) string {
markdown := "# " + output.Prefix + "\n\n"
for _, file := range output.Files {
markdown += fmt.Sprintf("## File: `%s`\n```%s\n%s\n```\n\n", file.Path, detectLanguage(file.Path), file.Content)
}
markdown += "# " + output.Suffix
return markdown
}
// detectLanguage tries to infer code block language from file extension.
func detectLanguage(filename string) string {
if len(filename) < 3 {
return ""
}
switch {
case len(filename) >= 3 && filename[len(filename)-3:] == ".go":
return "go"
case len(filename) >= 3 && filename[len(filename)-3:] == ".py":
return "python"
case len(filename) >= 2 && filename[len(filename)-2:] == ".c":
return "c"
case len(filename) >= 3 && filename[len(filename)-3:] == ".js":
return "javascript"
default:
return ""
}
}

View File

@@ -1,45 +1,127 @@
package fileproc
package fileproc_test
import (
"encoding/json"
"os"
"strings"
"sync"
"testing"
"gopkg.in/yaml.v3"
"github.com/ivuorinen/gibidify/fileproc"
)
func TestStartWriter_JSONOutput(t *testing.T) {
outFile, err := os.CreateTemp("", "output.json")
if err != nil {
t.Fatal(err)
}
defer func(name string) {
err := os.Remove(name)
if err != nil {
t.Fatal(err)
}
}(outFile.Name())
writeCh := make(chan WriteRequest)
done := make(chan struct{})
go StartWriter(outFile, writeCh, done, "json", "Prefix", "Suffix")
writeCh <- WriteRequest{Path: "file1.go", Content: "package main"}
writeCh <- WriteRequest{Path: "file2.py", Content: "def hello(): print('Hello')"}
close(writeCh)
<-done
data, err := os.ReadFile(outFile.Name())
if err != nil {
t.Fatal(err)
func TestStartWriter_Formats(t *testing.T) {
// Define table-driven test cases
tests := []struct {
name string
format string
expectError bool
}{
{"JSON format", "json", false},
{"YAML format", "yaml", false},
{"Markdown format", "markdown", false},
{"Invalid format", "invalid", true},
}
var output OutputData
if err := json.Unmarshal(data, &output); err != nil {
t.Fatalf("JSON output is invalid: %v", err)
}
if len(output.Files) != 2 {
t.Errorf("Expected 2 files, got %d", len(output.Files))
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
data := runWriterTest(t, tc.format)
if tc.expectError {
verifyErrorOutput(t, data)
} else {
verifyValidOutput(t, data, tc.format)
verifyPrefixSuffix(t, data)
}
})
}
}
// runWriterTest executes the writer with the given format and returns the output data.
func runWriterTest(t *testing.T, format string) []byte {
t.Helper()
outFile, err := os.CreateTemp(t.TempDir(), "gibidify_test_output")
if err != nil {
t.Fatalf("Failed to create temp file: %v", err)
}
defer func() {
if closeErr := outFile.Close(); closeErr != nil {
t.Errorf("close temp file: %v", closeErr)
}
if removeErr := os.Remove(outFile.Name()); removeErr != nil {
t.Errorf("remove temp file: %v", removeErr)
}
}()
// Prepare channels
writeCh := make(chan fileproc.WriteRequest, 2)
doneCh := make(chan struct{})
// Write a couple of sample requests
writeCh <- fileproc.WriteRequest{Path: "sample.go", Content: "package main"}
writeCh <- fileproc.WriteRequest{Path: "example.py", Content: "def foo(): pass"}
close(writeCh)
// Start the writer
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
fileproc.StartWriter(outFile, writeCh, doneCh, format, "PREFIX", "SUFFIX")
}()
// Wait until writer signals completion
wg.Wait()
<-doneCh // make sure all writes finished
// Read output
data, err := os.ReadFile(outFile.Name())
if err != nil {
t.Fatalf("Error reading output file: %v", err)
}
return data
}
// verifyErrorOutput checks that error cases produce no output.
func verifyErrorOutput(t *testing.T, data []byte) {
t.Helper()
if len(data) != 0 {
t.Errorf("Expected no output for invalid format, got:\n%s", data)
}
}
// verifyValidOutput checks format-specific output validity.
func verifyValidOutput(t *testing.T, data []byte, format string) {
t.Helper()
content := string(data)
switch format {
case "json":
var outStruct fileproc.OutputData
if err := json.Unmarshal(data, &outStruct); err != nil {
t.Errorf("JSON unmarshal failed: %v", err)
}
case "yaml":
var outStruct fileproc.OutputData
if err := yaml.Unmarshal(data, &outStruct); err != nil {
t.Errorf("YAML unmarshal failed: %v", err)
}
case "markdown":
if !strings.Contains(content, "```") {
t.Error("Expected markdown code fences not found")
}
}
}
// verifyPrefixSuffix checks that output contains expected prefix and suffix.
func verifyPrefixSuffix(t *testing.T, data []byte) {
t.Helper()
content := string(data)
if !strings.Contains(content, "PREFIX") {
t.Errorf("Missing prefix in output: %s", data)
}
if !strings.Contains(content, "SUFFIX") {
t.Errorf("Missing suffix in output: %s", data)
}
}

148
fileproc/yaml_writer.go Normal file
View File

@@ -0,0 +1,148 @@
package fileproc
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"github.com/ivuorinen/gibidify/utils"
)
// YAMLWriter handles YAML format output with streaming support.
type YAMLWriter struct {
outFile *os.File
}
// NewYAMLWriter creates a new YAML writer.
func NewYAMLWriter(outFile *os.File) *YAMLWriter {
return &YAMLWriter{outFile: outFile}
}
// Start writes the YAML header.
func (w *YAMLWriter) Start(prefix, suffix string) error {
// Write YAML header
if _, err := fmt.Fprintf(w.outFile, "prefix: %s\nsuffix: %s\nfiles:\n", yamlQuoteString(prefix), yamlQuoteString(suffix)); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML header")
}
return nil
}
// WriteFile writes a file entry in YAML format.
func (w *YAMLWriter) WriteFile(req WriteRequest) error {
if req.IsStream {
return w.writeStreaming(req)
}
return w.writeInline(req)
}
// Close writes the YAML footer (no footer needed for YAML).
func (w *YAMLWriter) Close() error {
return nil
}
// writeStreaming writes a large file as YAML in streaming chunks.
func (w *YAMLWriter) writeStreaming(req WriteRequest) error {
defer w.closeReader(req.Reader, req.Path)
language := detectLanguage(req.Path)
// Write YAML file entry start
if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(req.Path), language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML file start").WithFilePath(req.Path)
}
// Stream content with YAML indentation
return w.streamYAMLContent(req.Reader, req.Path)
}
// writeInline writes a small file directly as YAML.
func (w *YAMLWriter) writeInline(req WriteRequest) error {
language := detectLanguage(req.Path)
fileData := FileData{
Path: req.Path,
Content: req.Content,
Language: language,
}
// Write YAML entry
if _, err := fmt.Fprintf(w.outFile, " - path: %s\n language: %s\n content: |\n", yamlQuoteString(fileData.Path), fileData.Language); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML entry start").WithFilePath(req.Path)
}
// Write indented content
lines := strings.Split(fileData.Content, "\n")
for _, line := range lines {
if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML content line").WithFilePath(req.Path)
}
}
return nil
}
// streamYAMLContent streams content with YAML indentation.
func (w *YAMLWriter) streamYAMLContent(reader io.Reader, path string) error {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if _, err := fmt.Fprintf(w.outFile, " %s\n", line); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOWrite, "failed to write YAML line").WithFilePath(path)
}
}
if err := scanner.Err(); err != nil {
return utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIORead, "failed to scan YAML content").WithFilePath(path)
}
return nil
}
// closeReader safely closes a reader if it implements io.Closer.
func (w *YAMLWriter) closeReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
utils.LogError(
"Failed to close file reader",
utils.WrapError(err, utils.ErrorTypeIO, utils.CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// yamlQuoteString quotes a string for YAML output if needed.
func yamlQuoteString(s string) string {
if s == "" {
return `""`
}
// Simple YAML quoting - use double quotes if string contains special characters
if strings.ContainsAny(s, "\n\r\t:\"'\\") {
return fmt.Sprintf(`"%s"`, strings.ReplaceAll(s, `"`, `\"`))
}
return s
}
// startYAMLWriter handles YAML format output with streaming support.
func startYAMLWriter(outFile *os.File, writeCh <-chan WriteRequest, done chan<- struct{}, prefix, suffix string) {
defer close(done)
writer := NewYAMLWriter(outFile)
// Start writing
if err := writer.Start(prefix, suffix); err != nil {
utils.LogError("Failed to write YAML header", err)
return
}
// Process files
for req := range writeCh {
if err := writer.WriteFile(req); err != nil {
utils.LogError("Failed to write YAML file", err)
}
}
// Close writer
if err := writer.Close(); err != nil {
utils.LogError("Failed to write YAML end", err)
}
}

39
go.mod
View File

@@ -1,33 +1,32 @@
module github.com/ivuorinen/gibidify
go 1.23
go 1.24.1
require (
github.com/fatih/color v1.18.0
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
github.com/schollz/progressbar/v3 v3.18.0
github.com/sirupsen/logrus v1.9.3
github.com/spf13/viper v1.19.0
github.com/spf13/viper v1.20.0
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/fsnotify/fsnotify v1.8.0 // indirect
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/sagikazarmark/locafero v0.8.0 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/spf13/afero v1.11.0 // indirect
github.com/spf13/cast v1.6.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/spf13/afero v1.14.0 // indirect
github.com/spf13/cast v1.7.1 // indirect
github.com/spf13/pflag v1.0.6 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
go.uber.org/atomic v1.9.0 // indirect
go.uber.org/multierr v1.9.0 // indirect
golang.org/x/crypto v0.31.0 // indirect
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/sys v0.29.0 // indirect
golang.org/x/text v0.21.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/sys v0.31.0 // indirect
golang.org/x/term v0.28.0 // indirect
golang.org/x/text v0.23.0 // indirect
)

106
go.sum
View File

@@ -1,86 +1,78 @@
github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM=
github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss=
github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI=
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs=
github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ=
github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4=
github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
github.com/sagikazarmark/locafero v0.8.0 h1:mXaMVw7IqxNBxfv3LdWt9MDmcWDQ1fagDH918lOdVaQ=
github.com/sagikazarmark/locafero v0.8.0/go.mod h1:UBUyz37V+EdMS3hDF3QWIiVr/2dPrx49OMO0Bn0hJqk=
github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA=
github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=
github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8=
github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY=
github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0=
github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI=
github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg=
github.com/spf13/afero v1.14.0 h1:9tH6MapGnn/j0eb0yIXiLjERO8RB6xIVZRDCX7PtqWA=
github.com/spf13/afero v1.14.0/go.mod h1:acJQ8t0ohCGuMN3O+Pv0V0hgMxNYDlvdk+VTfyZmbYo=
github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.20.0 h1:zrxIyR3RQIOsarIrgL8+sAvALXul9jeEPa06Y0Ph6vY=
github.com/spf13/viper v1.20.0/go.mod h1:P9Mdzt1zoHIG8m2eZQinpiBjo6kCmZSKBClNNqjJvu4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE=
go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI=
go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ=
golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g=
golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k=
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg=
golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek=
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,120 +0,0 @@
package main
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
// TestIntegrationFullCLI simulates a full run of the CLI application using adaptive concurrency.
func TestIntegrationFullCLI(t *testing.T) {
// Create a temporary source directory and populate it with test files.
srcDir, err := ioutil.TempDir("", "gibidify_src")
if err != nil {
t.Fatalf("Failed to create temp source directory: %v", err)
}
defer os.RemoveAll(srcDir)
// Create two test files.
file1 := filepath.Join(srcDir, "file1.txt")
if err := ioutil.WriteFile(file1, []byte("Hello World"), 0644); err != nil {
t.Fatalf("Failed to write file1: %v", err)
}
file2 := filepath.Join(srcDir, "file2.go")
if err := ioutil.WriteFile(file2, []byte("package main\nfunc main() {}"), 0644); err != nil {
t.Fatalf("Failed to write file2: %v", err)
}
// Create a temporary output file.
outFile, err := ioutil.TempFile("", "gibidify_output.txt")
if err != nil {
t.Fatalf("Failed to create temp output file: %v", err)
}
outFilePath := outFile.Name()
outFile.Close()
defer os.Remove(outFilePath)
// Set up CLI arguments.
os.Args = []string{
"gibidify",
"-source", srcDir,
"-destination", outFilePath,
"-prefix", "PREFIX",
"-suffix", "SUFFIX",
"-concurrency", "2", // For testing, set concurrency to 2.
}
// Run the application with a background context.
ctx := context.Background()
if err := Run(ctx); err != nil {
t.Fatalf("Run failed: %v", err)
}
// Verify the output file contains the expected prefix, file contents, and suffix.
data, err := ioutil.ReadFile(outFilePath)
if err != nil {
t.Fatalf("Failed to read output file: %v", err)
}
output := string(data)
if !strings.Contains(output, "PREFIX") {
t.Error("Output missing prefix")
}
if !strings.Contains(output, "Hello World") {
t.Error("Output missing content from file1.txt")
}
if !strings.Contains(output, "SUFFIX") {
t.Error("Output missing suffix")
}
}
// TestIntegrationCancellation verifies that the application correctly cancels processing when the context times out.
func TestIntegrationCancellation(t *testing.T) {
// Create a temporary source directory with many files to simulate a long-running process.
srcDir, err := ioutil.TempDir("", "gibidify_src_long")
if err != nil {
t.Fatalf("Failed to create temp source directory: %v", err)
}
defer os.RemoveAll(srcDir)
// Create a large number of small files.
for i := 0; i < 1000; i++ {
filePath := filepath.Join(srcDir, fmt.Sprintf("file%d.txt", i))
if err := ioutil.WriteFile(filePath, []byte("Content"), 0644); err != nil {
t.Fatalf("Failed to write %s: %v", filePath, err)
}
}
// Create a temporary output file.
outFile, err := ioutil.TempFile("", "gibidify_output.txt")
if err != nil {
t.Fatalf("Failed to create temp output file: %v", err)
}
outFilePath := outFile.Name()
outFile.Close()
defer os.Remove(outFilePath)
// Set up CLI arguments.
os.Args = []string{
"gibidify",
"-source", srcDir,
"-destination", outFilePath,
"-prefix", "PREFIX",
"-suffix", "SUFFIX",
"-concurrency", "2",
}
// Create a context with a very short timeout to force cancellation.
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
defer cancel()
// Run the application; we expect an error due to cancellation.
err = Run(ctx)
if err == nil {
t.Error("Expected Run to fail due to cancellation, but it succeeded")
}
}

4
kics.config Normal file
View File

@@ -0,0 +1,4 @@
# vim: ft=yaml
log-level: WARN
exclude-severities: 'info,low'

151
main.go
View File

@@ -4,139 +4,46 @@ package main
import (
"context"
"flag"
"fmt"
"os"
"path/filepath"
"runtime"
"sync"
"github.com/ivuorinen/gibidify/config"
"github.com/ivuorinen/gibidify/fileproc"
"github.com/sirupsen/logrus"
"github.com/ivuorinen/gibidify/cli"
"github.com/ivuorinen/gibidify/config"
)
var (
sourceDir string
destination string
prefix string
suffix string
concurrency int
format string
)
func main() {
// Initialize UI for error handling
ui := cli.NewUIManager()
errorFormatter := cli.NewErrorFormatter(ui)
func init() {
flag.StringVar(&sourceDir, "source", "", "Source directory to scan recursively")
flag.StringVar(&destination, "destination", "", "Output file to write aggregated code")
flag.StringVar(&prefix, "prefix", "", "Text to add at the beginning of the output file")
flag.StringVar(&suffix, "suffix", "", "Text to add at the end of the output file")
flag.StringVar(&format, "format", "json", "Output format (json, markdown, yaml)")
flag.IntVar(&concurrency, "concurrency", runtime.NumCPU(), "Number of concurrent workers (default: number of CPU cores)")
// In production, use a background context.
if err := run(context.Background()); err != nil {
// Handle errors with better formatting and suggestions
if cli.IsUserError(err) {
errorFormatter.FormatError(err)
os.Exit(1)
} else {
// System errors still go to logrus for debugging
logrus.Errorf("System error: %v", err)
ui.PrintError("An unexpected error occurred. Please check the logs.")
os.Exit(2)
}
}
}
// Run executes the main logic of the CLI application using the provided context.
func Run(ctx context.Context) error {
flag.Parse()
// We need at least a source directory
if sourceDir == "" {
return fmt.Errorf("usage: gibidify -source <source_directory> [--destination <output_file>] [--format=json|yaml|markdown] ")
}
// If destination is not specified, auto-generate it using the base name of sourceDir + "." + format
if destination == "" {
absRoot, err := filepath.Abs(sourceDir)
if err != nil {
return fmt.Errorf("failed to get absolute path for %s: %w", sourceDir, err)
}
baseName := filepath.Base(absRoot)
// If sourceDir ends with a slash, baseName might be "." so handle that case as needed
if baseName == "." || baseName == "" {
baseName = "output"
}
destination = baseName + "." + format
func run(ctx context.Context) error {
// Parse CLI flags
flags, err := cli.ParseFlags()
if err != nil {
return err
}
// Load configuration
config.LoadConfig()
logrus.Infof("Starting gibidify. Format: %s, Source: %s, Destination: %s, Workers: %d", format, sourceDir, destination, concurrency)
// Collect files
files, err := fileproc.CollectFiles(sourceDir)
if err != nil {
return fmt.Errorf("error collecting files: %w", err)
}
logrus.Infof("Found %d files to process", len(files))
// Open output file
outFile, err := os.Create(destination)
if err != nil {
return fmt.Errorf("failed to create output file %s: %w", destination, err)
}
defer func(outFile *os.File) {
if err := outFile.Close(); err != nil {
logrus.Errorf("Error closing output file: %v", err)
}
}(outFile)
// Create channels
fileCh := make(chan string)
writeCh := make(chan fileproc.WriteRequest)
writerDone := make(chan struct{})
// Start writer goroutine
go fileproc.StartWriter(outFile, writeCh, writerDone, format, prefix, suffix)
var wg sync.WaitGroup
// Start worker goroutines with context cancellation
for i := 0; i < concurrency; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for {
select {
case <-ctx.Done():
return
case filePath, ok := <-fileCh:
if !ok {
return
}
// Pass sourceDir to ProcessFile so it knows the 'root'
absRoot, err := filepath.Abs(sourceDir)
if err != nil {
logrus.Errorf("Failed to get absolute path for %s: %v", sourceDir, err)
return
}
fileproc.ProcessFile(filePath, writeCh, absRoot)
}
}
}()
}
// Feed files to worker pool while checking for cancellation
for _, fp := range files {
select {
case <-ctx.Done():
close(fileCh)
return ctx.Err()
case fileCh <- fp:
}
}
close(fileCh)
wg.Wait()
close(writeCh)
<-writerDone
logrus.Infof("Processing completed. Output saved to %s", destination)
return nil
}
func main() {
// In production, use a background context.
if err := Run(context.Background()); err != nil {
fmt.Println("Error:", err)
os.Exit(1)
}
// Create and run processor
processor := cli.NewProcessor(flags)
return processor.Process(ctx)
}

106
main_test.go Normal file
View File

@@ -0,0 +1,106 @@
package main
import (
"context"
"fmt"
"os"
"testing"
"time"
"github.com/ivuorinen/gibidify/testutil"
)
const (
testFileCount = 1000
)
// TestIntegrationFullCLI simulates a full run of the CLI application using adaptive concurrency.
func TestIntegrationFullCLI(t *testing.T) {
srcDir := setupTestFiles(t)
outFilePath := setupOutputFile(t)
setupCLIArgs(srcDir, outFilePath)
// Run the application with a background context.
ctx := t.Context()
if runErr := run(ctx); runErr != nil {
t.Fatalf("Run failed: %v", runErr)
}
verifyOutput(t, outFilePath)
}
// setupTestFiles creates test files and returns the source directory.
func setupTestFiles(t *testing.T) string {
t.Helper()
srcDir := t.TempDir()
// Create two test files.
testutil.CreateTestFiles(t, srcDir, []testutil.FileSpec{
{Name: "file1.txt", Content: "Hello World"},
{Name: "file2.go", Content: "package main\nfunc main() {}"},
})
return srcDir
}
// setupOutputFile creates a temporary output file and returns its path.
func setupOutputFile(t *testing.T) string {
t.Helper()
outFile, outFilePath := testutil.CreateTempOutputFile(t, "gibidify_output.txt")
testutil.CloseFile(t, outFile)
return outFilePath
}
// setupCLIArgs configures the CLI arguments for testing.
func setupCLIArgs(srcDir, outFilePath string) {
testutil.SetupCLIArgs(srcDir, outFilePath, "PREFIX", "SUFFIX", 2)
}
// verifyOutput checks that the output file contains expected content.
func verifyOutput(t *testing.T, outFilePath string) {
t.Helper()
data, err := os.ReadFile(outFilePath)
if err != nil {
t.Fatalf("Failed to read output file: %v", err)
}
output := string(data)
testutil.VerifyContentContains(t, output, []string{"PREFIX", "Hello World", "SUFFIX"})
}
// TestIntegrationCancellation verifies that the application correctly cancels processing when the context times out.
func TestIntegrationCancellation(t *testing.T) {
// Create a temporary source directory with many files to simulate a long-running process.
srcDir := t.TempDir()
// Create a large number of small files.
for i := range testFileCount {
fileName := fmt.Sprintf("file%d.txt", i)
testutil.CreateTestFile(t, srcDir, fileName, []byte("Content"))
}
// Create a temporary output file.
outFile, outFilePath := testutil.CreateTempOutputFile(t, "gibidify_output.txt")
testutil.CloseFile(t, outFile)
defer func() {
if removeErr := os.Remove(outFilePath); removeErr != nil {
t.Fatalf("cleanup output file: %v", removeErr)
}
}()
// Set up CLI arguments.
testutil.SetupCLIArgs(srcDir, outFilePath, "PREFIX", "SUFFIX", 2)
// Create a context with a very short timeout to force cancellation.
ctx, cancel := context.WithTimeout(
t.Context(),
1*time.Millisecond,
)
defer cancel()
// Run the application; we expect an error due to cancellation.
runErr := run(ctx)
if runErr == nil {
t.Error("Expected Run to fail due to cancellation, but it succeeded")
}
}

25
scripts/help.txt Normal file
View File

@@ -0,0 +1,25 @@
Available targets:
install-tools - Install required linting and development tools
lint - Run all linters (Go, Makefile, shell, YAML)
lint-fix - Run linters with auto-fix enabled
lint-verbose - Run linters with verbose output
test - Run tests
coverage - Run tests with coverage
build - Build the application
clean - Clean build artifacts
all - Run lint, test, and build
Security targets:
security - Run comprehensive security scan
security-full - Run full security analysis with all tools
vuln-check - Check for dependency vulnerabilities
Benchmark targets:
build-benchmark - Build the benchmark binary
benchmark - Run all benchmarks
benchmark-collection - Run file collection benchmarks
benchmark-processing - Run file processing benchmarks
benchmark-concurrency - Run concurrency benchmarks
benchmark-format - Run format benchmarks
Run 'make <target>' to execute a specific target.

14
scripts/lint.sh Executable file
View File

@@ -0,0 +1,14 @@
#!/bin/bash
set -e
echo "Running golangci-lint..."
golangci-lint run ./...
echo "Running checkmake..."
checkmake --config=.checkmake Makefile
echo "Running shfmt check..."
shfmt -d .
echo "Running yamllint..."
yamllint -c .yamllint .

426
scripts/security-scan.sh Executable file
View File

@@ -0,0 +1,426 @@
#!/bin/bash
set -euo pipefail
# Security Scanning Script for gibidify
# This script runs comprehensive security checks locally and in CI
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$PROJECT_ROOT"
echo "🔒 Starting comprehensive security scan for gibidify..."
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to print status
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
# Check if required tools are installed
check_dependencies() {
print_status "Checking security scanning dependencies..."
local missing_tools=()
if ! command -v go &>/dev/null; then
missing_tools+=("go")
fi
if ! command -v golangci-lint &>/dev/null; then
print_warning "golangci-lint not found, installing..."
go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
fi
if ! command -v gosec &>/dev/null; then
print_warning "gosec not found, installing..."
go install github.com/securecodewarrior/gosec/v2/cmd/gosec@latest
fi
if ! command -v govulncheck &>/dev/null; then
print_warning "govulncheck not found, installing..."
go install golang.org/x/vuln/cmd/govulncheck@latest
fi
if ! command -v checkmake &>/dev/null; then
print_warning "checkmake not found, installing..."
go install github.com/mrtazz/checkmake/cmd/checkmake@latest
fi
if ! command -v shfmt &>/dev/null; then
print_warning "shfmt not found, installing..."
go install mvdan.cc/sh/v3/cmd/shfmt@latest
fi
if ! command -v yamllint &>/dev/null; then
print_warning "yamllint not found, installing..."
go install github.com/excilsploft/yamllint@latest
fi
if [ ${#missing_tools[@]} -ne 0 ]; then
print_error "Missing required tools: ${missing_tools[*]}"
print_error "Please install the missing tools and try again."
exit 1
fi
print_success "All dependencies are available"
}
# Run gosec security scanner
run_gosec() {
print_status "Running gosec security scanner..."
if gosec -fmt=json -out=gosec-report.json -stdout -verbose=text ./...; then
print_success "gosec scan completed successfully"
else
print_error "gosec found security issues!"
if [ -f "gosec-report.json" ]; then
echo "Detailed report saved to gosec-report.json"
fi
return 1
fi
}
# Run vulnerability check
run_govulncheck() {
print_status "Running govulncheck for dependency vulnerabilities..."
if govulncheck -json ./... >govulncheck-report.json 2>&1; then
print_success "No known vulnerabilities found in dependencies"
else
if grep -q '"finding"' govulncheck-report.json 2>/dev/null; then
print_error "Vulnerabilities found in dependencies!"
echo "Detailed report saved to govulncheck-report.json"
return 1
else
print_success "No vulnerabilities found"
fi
fi
}
# Run enhanced golangci-lint with security focus
run_security_lint() {
print_status "Running security-focused linting..."
local security_linters="gosec,gocritic,bodyclose,rowserrcheck,misspell,unconvert,unparam,unused,errcheck,ineffassign,staticcheck"
if golangci-lint run --enable="$security_linters" --timeout=5m; then
print_success "Security linting passed"
else
print_error "Security linting found issues!"
return 1
fi
}
# Check for potential secrets
check_secrets() {
print_status "Scanning for potential secrets and sensitive data..."
local secrets_found=false
# Common secret patterns
local patterns=(
"password\s*[:=]\s*['\"][^'\"]{3,}['\"]"
"secret\s*[:=]\s*['\"][^'\"]{3,}['\"]"
"key\s*[:=]\s*['\"][^'\"]{8,}['\"]"
"token\s*[:=]\s*['\"][^'\"]{8,}['\"]"
"api_?key\s*[:=]\s*['\"][^'\"]{8,}['\"]"
"aws_?access_?key"
"aws_?secret"
"AKIA[0-9A-Z]{16}" # AWS Access Key pattern
"github_?token"
"private_?key"
)
for pattern in "${patterns[@]}"; do
if grep -r -i -E "$pattern" --include="*.go" . 2>/dev/null; then
print_warning "Potential secret pattern found: $pattern"
secrets_found=true
fi
done
# Check git history for secrets (last 10 commits)
if git log --oneline -10 | grep -i -E "(password|secret|key|token)" >/dev/null 2>&1; then
print_warning "Potential secrets mentioned in recent commit messages"
secrets_found=true
fi
if [ "$secrets_found" = true ]; then
print_warning "Potential secrets detected. Please review manually."
return 1
else
print_success "No obvious secrets detected"
fi
}
# Check for hardcoded network addresses
check_hardcoded_addresses() {
print_status "Checking for hardcoded network addresses..."
local addresses_found=false
# Look for IP addresses (excluding common safe ones)
if grep -r -E "([0-9]{1,3}\.){3}[0-9]{1,3}" --include="*.go" . |
grep -v -E "(127\.0\.0\.1|0\.0\.0\.0|255\.255\.255\.255|localhost)" >/dev/null 2>&1; then
print_warning "Hardcoded IP addresses found:"
grep -r -E "([0-9]{1,3}\.){3}[0-9]{1,3}" --include="*.go" . |
grep -v -E "(127\.0\.0\.1|0\.0\.0\.0|255\.255\.255\.255|localhost)" || true
addresses_found=true
fi
# Look for URLs (excluding documentation examples)
if grep -r -E "https?://[^/\s]+" --include="*.go" . |
grep -v -E "(example\.com|localhost|127\.0\.0\.1|\$\{)" >/dev/null 2>&1; then
print_warning "Hardcoded URLs found:"
grep -r -E "https?://[^/\s]+" --include="*.go" . |
grep -v -E "(example\.com|localhost|127\.0\.0\.1|\$\{)" || true
addresses_found=true
fi
if [ "$addresses_found" = true ]; then
print_warning "Hardcoded network addresses detected. Please review."
return 1
else
print_success "No hardcoded network addresses found"
fi
}
# Check Docker security (if Dockerfile exists)
check_docker_security() {
if [ -f "Dockerfile" ]; then
print_status "Checking Docker security..."
# Basic Dockerfile security checks
local docker_issues=false
if grep -q "^USER root" Dockerfile; then
print_warning "Dockerfile runs as root user"
docker_issues=true
fi
if ! grep -q "^USER " Dockerfile; then
print_warning "Dockerfile doesn't specify a non-root user"
docker_issues=true
fi
if grep -q "RUN.*wget\|RUN.*curl" Dockerfile && ! grep -q "rm.*wget\|rm.*curl" Dockerfile; then
print_warning "Dockerfile may leave curl/wget installed"
docker_issues=true
fi
if [ "$docker_issues" = true ]; then
print_warning "Docker security issues detected"
return 1
else
print_success "Docker security check passed"
fi
else
print_status "No Dockerfile found, skipping Docker security check"
fi
}
# Check file permissions
check_file_permissions() {
print_status "Checking file permissions..."
local perm_issues=false
# Check for overly permissive files
if find . -type f -perm /o+w -not -path "./.git/*" | grep -q .; then
print_warning "World-writable files found:"
find . -type f -perm /o+w -not -path "./.git/*" || true
perm_issues=true
fi
# Check for executable files that shouldn't be
if find . -type f -name "*.go" -perm /a+x | grep -q .; then
print_warning "Executable Go files found (should not be executable):"
find . -type f -name "*.go" -perm /a+x || true
perm_issues=true
fi
if [ "$perm_issues" = true ]; then
print_warning "File permission issues detected"
return 1
else
print_success "File permissions check passed"
fi
}
# Check Makefile with checkmake
check_makefile() {
if [ -f "Makefile" ]; then
print_status "Checking Makefile with checkmake..."
if checkmake --config=.checkmake Makefile; then
print_success "Makefile check passed"
else
print_error "Makefile issues detected!"
return 1
fi
else
print_status "No Makefile found, skipping checkmake"
fi
}
# Check shell scripts with shfmt
check_shell_scripts() {
print_status "Checking shell script formatting..."
if find . -name "*.sh" -type f | head -1 | grep -q .; then
if shfmt -d .; then
print_success "Shell script formatting check passed"
else
print_error "Shell script formatting issues detected!"
return 1
fi
else
print_status "No shell scripts found, skipping shfmt check"
fi
}
# Check YAML files
check_yaml_files() {
print_status "Checking YAML files..."
if find . -name "*.yml" -o -name "*.yaml" -type f | head -1 | grep -q .; then
if yamllint -c .yamllint .; then
print_success "YAML files check passed"
else
print_error "YAML file issues detected!"
return 1
fi
else
print_status "No YAML files found, skipping yamllint check"
fi
}
# Generate security report
generate_report() {
print_status "Generating security scan report..."
local report_file="security-report.md"
cat >"$report_file" <<EOF
# Security Scan Report
**Generated:** $(date)
**Project:** gibidify
**Scan Type:** Comprehensive Security Analysis
## Scan Results
### Security Tools Used
- gosec (Go security analyzer)
- govulncheck (Vulnerability database checker)
- golangci-lint (Static analysis with security linters)
- checkmake (Makefile linting)
- shfmt (Shell script formatting)
- yamllint (YAML file validation)
- Custom secret detection
- Custom network address detection
- Docker security checks
- File permission checks
### Files Generated
- \`gosec-report.json\` - Detailed gosec security findings
- \`govulncheck-report.json\` - Dependency vulnerability report
### Recommendations
1. Review all security findings in the generated reports
2. Address any HIGH or MEDIUM severity issues immediately
3. Consider implementing additional security measures for LOW severity issues
4. Regularly update dependencies to patch known vulnerabilities
5. Run security scans before each release
### Next Steps
- Fix any identified vulnerabilities
- Update security scanning in CI/CD pipeline
- Consider adding security testing to the test suite
- Review and update security documentation
---
*This report was generated automatically by the gibidify security scanning script.*
EOF
print_success "Security report generated: $report_file"
}
# Main execution
main() {
echo "🔒 gibidify Security Scanner"
echo "=========================="
echo
local exit_code=0
check_dependencies
echo
# Run all security checks
run_gosec || exit_code=1
echo
run_govulncheck || exit_code=1
echo
run_security_lint || exit_code=1
echo
check_secrets || exit_code=1
echo
check_hardcoded_addresses || exit_code=1
echo
check_docker_security || exit_code=1
echo
check_file_permissions || exit_code=1
echo
check_makefile || exit_code=1
echo
check_shell_scripts || exit_code=1
echo
check_yaml_files || exit_code=1
echo
generate_report
echo
if [ $exit_code -eq 0 ]; then
print_success "🎉 All security checks passed!"
else
print_error "❌ Security issues detected. Please review the reports and fix identified issues."
print_status "Generated reports:"
print_status "- gosec-report.json (if exists)"
print_status "- govulncheck-report.json (if exists)"
print_status "- security-report.md"
fi
exit $exit_code
}
# Run main function
main "$@"

View File

@@ -0,0 +1,86 @@
package testutil
import (
"os"
"path/filepath"
"strings"
"testing"
)
// Test thread safety of functions that might be called concurrently
func TestConcurrentOperations(t *testing.T) {
tempDir := t.TempDir()
done := make(chan bool)
// Test concurrent file creation
for i := 0; i < 5; i++ {
go func(n int) {
CreateTestFile(t, tempDir, string(rune('a'+n))+".txt", []byte("content"))
done <- true
}(i)
}
// Test concurrent directory creation
for i := 0; i < 5; i++ {
go func(n int) {
CreateTestDirectory(t, tempDir, "dir"+string(rune('0'+n)))
done <- true
}(i)
}
// Wait for all goroutines
for i := 0; i < 10; i++ {
<-done
}
}
// Benchmarks
func BenchmarkCreateTestFile(b *testing.B) {
tempDir := b.TempDir()
content := []byte("benchmark content")
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Use a unique filename for each iteration to avoid conflicts
filename := "bench" + string(rune(i%26+'a')) + ".txt"
filePath := filepath.Join(tempDir, filename)
if err := os.WriteFile(filePath, content, FilePermission); err != nil {
b.Fatalf("Failed to write file: %v", err)
}
}
}
func BenchmarkCreateTestFiles(b *testing.B) {
tempDir := b.TempDir()
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Create specs with unique names for each iteration
specs := []FileSpec{
{Name: "file1_" + string(rune(i%26+'a')) + ".txt", Content: "content1"},
{Name: "file2_" + string(rune(i%26+'a')) + ".txt", Content: "content2"},
{Name: "file3_" + string(rune(i%26+'a')) + ".txt", Content: "content3"},
}
for _, spec := range specs {
filePath := filepath.Join(tempDir, spec.Name)
if err := os.WriteFile(filePath, []byte(spec.Content), FilePermission); err != nil {
b.Fatalf("Failed to write file: %v", err)
}
}
}
}
func BenchmarkVerifyContentContains(b *testing.B) {
content := strings.Repeat("test content with various words ", 100)
expected := []string{"test", "content", "various", "words"}
b.ResetTimer()
for i := 0; i < b.N; i++ {
// We can't use the actual function in benchmark since it needs testing.T
// So we'll benchmark the core logic
for _, exp := range expected {
_ = strings.Contains(content, exp)
}
}
}

132
testutil/config_test.go Normal file
View File

@@ -0,0 +1,132 @@
package testutil
import (
"os"
"testing"
"github.com/spf13/viper"
)
func TestResetViperConfig(t *testing.T) {
tests := []struct {
name string
configPath string
preSetup func()
verify func(t *testing.T)
}{
{
name: "reset with empty config path",
configPath: "",
preSetup: func() {
viper.Set("test.key", "value")
},
verify: func(t *testing.T) {
if viper.IsSet("test.key") {
t.Error("Viper config not reset properly")
}
},
},
{
name: "reset with config path",
configPath: t.TempDir(),
preSetup: func() {
viper.Set("test.key", "value")
},
verify: func(t *testing.T) {
if viper.IsSet("test.key") {
t.Error("Viper config not reset properly")
}
// Verify config path was added
paths := viper.ConfigFileUsed()
if paths == "" {
// This is expected as no config file exists
return
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.preSetup()
ResetViperConfig(t, tt.configPath)
tt.verify(t)
})
}
}
func TestSetupCLIArgs(t *testing.T) {
// Save original args
originalArgs := os.Args
defer func() {
os.Args = originalArgs
}()
tests := []struct {
name string
srcDir string
outFile string
prefix string
suffix string
concurrency int
wantLen int
}{
{
name: "basic CLI args",
srcDir: "/src",
outFile: "/out.txt",
prefix: "PREFIX",
suffix: "SUFFIX",
concurrency: 4,
wantLen: 11,
},
{
name: "empty strings",
srcDir: "",
outFile: "",
prefix: "",
suffix: "",
concurrency: 1,
wantLen: 11,
},
{
name: "special characters in args",
srcDir: "/path with spaces/src",
outFile: "/path/to/output file.txt",
prefix: "Prefix with\nnewline",
suffix: "Suffix with\ttab",
concurrency: 8,
wantLen: 11,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
SetupCLIArgs(tt.srcDir, tt.outFile, tt.prefix, tt.suffix, tt.concurrency)
if len(os.Args) != tt.wantLen {
t.Errorf("os.Args length = %d, want %d", len(os.Args), tt.wantLen)
}
// Verify specific args
if os.Args[0] != "gibidify" {
t.Errorf("Program name = %s, want gibidify", os.Args[0])
}
if os.Args[2] != tt.srcDir {
t.Errorf("Source dir = %s, want %s", os.Args[2], tt.srcDir)
}
if os.Args[4] != tt.outFile {
t.Errorf("Output file = %s, want %s", os.Args[4], tt.outFile)
}
if os.Args[6] != tt.prefix {
t.Errorf("Prefix = %s, want %s", os.Args[6], tt.prefix)
}
if os.Args[8] != tt.suffix {
t.Errorf("Suffix = %s, want %s", os.Args[8], tt.suffix)
}
if os.Args[10] != string(rune(tt.concurrency+'0')) {
t.Errorf("Concurrency = %s, want %d", os.Args[10], tt.concurrency)
}
})
}
}

View File

@@ -0,0 +1,286 @@
package testutil
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestCreateTestFile(t *testing.T) {
tests := []struct {
name string
dir string
filename string
content []byte
wantErr bool
}{
{
name: "create simple test file",
filename: "test.txt",
content: []byte("hello world"),
wantErr: false,
},
{
name: "create file with empty content",
filename: "empty.txt",
content: []byte{},
wantErr: false,
},
{
name: "create file with binary content",
filename: "binary.bin",
content: []byte{0x00, 0xFF, 0x42},
wantErr: false,
},
{
name: "create file with subdirectory",
filename: "subdir/test.txt",
content: []byte("nested file"),
wantErr: false,
},
{
name: "create file with special characters",
filename: "special-file_123.go",
content: []byte("package main"),
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Use a temporary directory for each test
tempDir := t.TempDir()
if tt.dir == "" {
tt.dir = tempDir
}
// Create subdirectory if needed
if strings.Contains(tt.filename, "/") {
subdir := filepath.Join(tt.dir, filepath.Dir(tt.filename))
if err := os.MkdirAll(subdir, DirPermission); err != nil {
t.Fatalf("Failed to create subdirectory: %v", err)
}
}
// Test CreateTestFile
filePath := CreateTestFile(t, tt.dir, tt.filename, tt.content)
// Verify file exists
info, err := os.Stat(filePath)
if err != nil {
t.Fatalf("Created file does not exist: %v", err)
}
// Verify it's a regular file
if !info.Mode().IsRegular() {
t.Errorf("Created path is not a regular file")
}
// Verify permissions
if info.Mode().Perm() != FilePermission {
t.Errorf("File permissions = %v, want %v", info.Mode().Perm(), FilePermission)
}
// Verify content
readContent, err := os.ReadFile(filePath)
if err != nil {
t.Fatalf("Failed to read created file: %v", err)
}
if string(readContent) != string(tt.content) {
t.Errorf("File content = %q, want %q", readContent, tt.content)
}
})
}
}
func TestCreateTempOutputFile(t *testing.T) {
tests := []struct {
name string
pattern string
}{
{
name: "simple pattern",
pattern: "output-*.txt",
},
{
name: "pattern with prefix only",
pattern: "test-",
},
{
name: "pattern with suffix only",
pattern: "*.json",
},
{
name: "empty pattern",
pattern: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
file, path := CreateTempOutputFile(t, tt.pattern)
defer CloseFile(t, file)
// Verify file exists
info, err := os.Stat(path)
if err != nil {
t.Fatalf("Temp file does not exist: %v", err)
}
// Verify it's a regular file
if !info.Mode().IsRegular() {
t.Errorf("Created path is not a regular file")
}
// Verify we can write to it
testContent := []byte("test content")
if _, err := file.Write(testContent); err != nil {
t.Errorf("Failed to write to temp file: %v", err)
}
// Verify the path is in a temp directory (any temp directory)
if !strings.Contains(path, os.TempDir()) {
t.Errorf("Temp file not in temp directory: %s", path)
}
})
}
}
func TestCreateTestDirectory(t *testing.T) {
tests := []struct {
name string
parent string
dir string
}{
{
name: "simple directory",
dir: "testdir",
},
{
name: "directory with special characters",
dir: "test-dir_123",
},
{
name: "nested directory name",
dir: "nested/dir",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tempDir := t.TempDir()
if tt.parent == "" {
tt.parent = tempDir
}
// For nested directories, create parent first
if strings.Contains(tt.dir, "/") {
parentPath := filepath.Join(tt.parent, filepath.Dir(tt.dir))
if err := os.MkdirAll(parentPath, DirPermission); err != nil {
t.Fatalf("Failed to create parent directory: %v", err)
}
tt.dir = filepath.Base(tt.dir)
tt.parent = parentPath
}
dirPath := CreateTestDirectory(t, tt.parent, tt.dir)
// Verify directory exists
info, err := os.Stat(dirPath)
if err != nil {
t.Fatalf("Created directory does not exist: %v", err)
}
// Verify it's a directory
if !info.IsDir() {
t.Errorf("Created path is not a directory")
}
// Verify permissions
if info.Mode().Perm() != DirPermission {
t.Errorf("Directory permissions = %v, want %v", info.Mode().Perm(), DirPermission)
}
// Verify we can create files in it
testFile := filepath.Join(dirPath, "test.txt")
if err := os.WriteFile(testFile, []byte("test"), FilePermission); err != nil {
t.Errorf("Cannot create file in directory: %v", err)
}
})
}
}
func TestCreateTestFiles(t *testing.T) {
tests := []struct {
name string
fileSpecs []FileSpec
wantCount int
}{
{
name: "create multiple files",
fileSpecs: []FileSpec{
{Name: "file1.txt", Content: "content1"},
{Name: "file2.go", Content: "package main"},
{Name: "file3.json", Content: `{"key": "value"}`},
},
wantCount: 3,
},
{
name: "create files with subdirectories",
fileSpecs: []FileSpec{
{Name: "src/main.go", Content: "package main"},
{Name: "test/test.go", Content: "package test"},
},
wantCount: 2,
},
{
name: "empty file specs",
fileSpecs: []FileSpec{},
wantCount: 0,
},
{
name: "files with empty content",
fileSpecs: []FileSpec{
{Name: "empty1.txt", Content: ""},
{Name: "empty2.txt", Content: ""},
},
wantCount: 2,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rootDir := t.TempDir()
// Create necessary subdirectories
for _, spec := range tt.fileSpecs {
if strings.Contains(spec.Name, "/") {
subdir := filepath.Join(rootDir, filepath.Dir(spec.Name))
if err := os.MkdirAll(subdir, DirPermission); err != nil {
t.Fatalf("Failed to create subdirectory: %v", err)
}
}
}
createdFiles := CreateTestFiles(t, rootDir, tt.fileSpecs)
// Verify count
if len(createdFiles) != tt.wantCount {
t.Errorf("Created %d files, want %d", len(createdFiles), tt.wantCount)
}
// Verify each file
for i, filePath := range createdFiles {
content, err := os.ReadFile(filePath)
if err != nil {
t.Errorf("Failed to read file %s: %v", filePath, err)
continue
}
if string(content) != tt.fileSpecs[i].Content {
t.Errorf("File %s content = %q, want %q", filePath, content, tt.fileSpecs[i].Content)
}
}
})
}
}

117
testutil/testutil.go Normal file
View File

@@ -0,0 +1,117 @@
// Package testutil provides common testing utilities and helper functions.
package testutil
import (
"os"
"path/filepath"
"strconv"
"strings"
"testing"
"github.com/spf13/viper"
"github.com/ivuorinen/gibidify/config"
)
const (
// FilePermission is the default file permission for test files.
FilePermission = 0o644
// DirPermission is the default directory permission for test directories.
DirPermission = 0o755
)
// CreateTestFile creates a test file with the given content and returns its path.
func CreateTestFile(t *testing.T, dir, filename string, content []byte) string {
t.Helper()
filePath := filepath.Join(dir, filename)
if err := os.WriteFile(filePath, content, FilePermission); err != nil {
t.Fatalf("Failed to write file %s: %v", filePath, err)
}
return filePath
}
// CreateTempOutputFile creates a temporary output file and returns the file handle and path.
func CreateTempOutputFile(t *testing.T, pattern string) (file *os.File, path string) {
t.Helper()
outFile, err := os.CreateTemp(t.TempDir(), pattern)
if err != nil {
t.Fatalf("Failed to create temp output file: %v", err)
}
path = outFile.Name()
return outFile, path
}
// CreateTestDirectory creates a test directory and returns its path.
func CreateTestDirectory(t *testing.T, parent, name string) string {
t.Helper()
dirPath := filepath.Join(parent, name)
if err := os.Mkdir(dirPath, DirPermission); err != nil {
t.Fatalf("Failed to create directory %s: %v", dirPath, err)
}
return dirPath
}
// FileSpec represents a file specification for creating test files.
type FileSpec struct {
Name string
Content string
}
// CreateTestFiles creates multiple test files from specifications.
func CreateTestFiles(t *testing.T, rootDir string, fileSpecs []FileSpec) []string {
t.Helper()
createdFiles := make([]string, 0, len(fileSpecs))
for _, spec := range fileSpecs {
filePath := CreateTestFile(t, rootDir, spec.Name, []byte(spec.Content))
createdFiles = append(createdFiles, filePath)
}
return createdFiles
}
// ResetViperConfig resets Viper configuration and optionally sets a config path.
func ResetViperConfig(t *testing.T, configPath string) {
t.Helper()
viper.Reset()
if configPath != "" {
viper.AddConfigPath(configPath)
}
config.LoadConfig()
}
// SetupCLIArgs configures os.Args for CLI testing.
func SetupCLIArgs(srcDir, outFilePath, prefix, suffix string, concurrency int) {
os.Args = []string{
"gibidify",
"-source", srcDir,
"-destination", outFilePath,
"-prefix", prefix,
"-suffix", suffix,
"-concurrency", strconv.Itoa(concurrency),
}
}
// VerifyContentContains checks that content contains all expected substrings.
func VerifyContentContains(t *testing.T, content string, expectedSubstrings []string) {
t.Helper()
for _, expected := range expectedSubstrings {
if !strings.Contains(content, expected) {
t.Errorf("Content missing expected substring: %s", expected)
}
}
}
// MustSucceed fails the test if the error is not nil.
func MustSucceed(t *testing.T, err error, operation string) {
t.Helper()
if err != nil {
t.Fatalf("Operation %s failed: %v", operation, err)
}
}
// CloseFile closes a file and reports errors to the test.
func CloseFile(t *testing.T, file *os.File) {
t.Helper()
if err := file.Close(); err != nil {
t.Errorf("Failed to close file: %v", err)
}
}

View File

@@ -0,0 +1,107 @@
package testutil
import (
"errors"
"os"
"testing"
)
func TestVerifyContentContains(t *testing.T) {
// Test successful verification
t.Run("all substrings present", func(t *testing.T) {
content := "This is a test file with multiple lines"
VerifyContentContains(t, content, []string{"test file", "multiple lines"})
// If we get here, the test passed
})
// Test empty expected substrings
t.Run("empty expected substrings", func(t *testing.T) {
content := "Any content"
VerifyContentContains(t, content, []string{})
// Should pass with no expected strings
})
// For failure cases, we'll test indirectly by verifying behavior
t.Run("verify error reporting", func(t *testing.T) {
// We can't easily test the failure case directly since it calls t.Errorf
// But we can at least verify the function doesn't panic
defer func() {
if r := recover(); r != nil {
t.Errorf("VerifyContentContains panicked: %v", r)
}
}()
// This would normally fail but we're just checking it doesn't panic
content := "test"
expected := []string{"not found"}
// Create a sub-test that we expect to fail
t.Run("expected_failure", func(t *testing.T) {
t.Skip("Skipping actual failure test")
VerifyContentContains(t, content, expected)
})
})
}
func TestMustSucceed(t *testing.T) {
// Test with nil error (should succeed)
t.Run("nil error", func(t *testing.T) {
MustSucceed(t, nil, "successful operation")
// If we get here, the test passed
})
// Test error behavior without causing test failure
t.Run("verify error handling", func(t *testing.T) {
// We can't test the failure case directly since it calls t.Fatalf
// But we can verify the function exists and is callable
defer func() {
if r := recover(); r != nil {
t.Errorf("MustSucceed panicked: %v", r)
}
}()
// Create a sub-test that we expect to fail
t.Run("expected_failure", func(t *testing.T) {
t.Skip("Skipping actual failure test")
MustSucceed(t, errors.New("test error"), "failed operation")
})
})
}
func TestCloseFile(t *testing.T) {
// Test closing a normal file
t.Run("close normal file", func(t *testing.T) {
file, err := os.CreateTemp(t.TempDir(), "test")
if err != nil {
t.Fatalf("Failed to create test file: %v", err)
}
CloseFile(t, file)
// Verify file is closed by trying to write to it
_, writeErr := file.Write([]byte("test"))
if writeErr == nil {
t.Error("Expected write to fail on closed file")
}
})
// Test that CloseFile doesn't panic on already closed files
// Note: We can't easily test the error case without causing test failure
// since CloseFile calls t.Errorf, which is the expected behavior
t.Run("verify CloseFile function exists and is callable", func(t *testing.T) {
// This test just verifies the function signature and basic functionality
// The error case is tested in integration tests where failures are expected
file, err := os.CreateTemp(t.TempDir(), "test")
if err != nil {
t.Fatalf("Failed to create test file: %v", err)
}
// Test normal case - file should close successfully
CloseFile(t, file)
// Verify file is closed
_, writeErr := file.Write([]byte("test"))
if writeErr == nil {
t.Error("Expected write to fail on closed file")
}
})
}

240
utils/errors.go Normal file
View File

@@ -0,0 +1,240 @@
// Package utils provides common utility functions.
package utils
import (
"fmt"
"github.com/sirupsen/logrus"
)
// ErrorType represents the category of error.
type ErrorType int
const (
// ErrorTypeUnknown represents an unknown error type.
ErrorTypeUnknown ErrorType = iota
// ErrorTypeCLI represents command-line interface errors.
ErrorTypeCLI
// ErrorTypeFileSystem represents file system operation errors.
ErrorTypeFileSystem
// ErrorTypeProcessing represents file processing errors.
ErrorTypeProcessing
// ErrorTypeConfiguration represents configuration errors.
ErrorTypeConfiguration
// ErrorTypeIO represents input/output errors.
ErrorTypeIO
// ErrorTypeValidation represents validation errors.
ErrorTypeValidation
)
// String returns the string representation of the error type.
func (e ErrorType) String() string {
switch e {
case ErrorTypeCLI:
return "CLI"
case ErrorTypeFileSystem:
return "FileSystem"
case ErrorTypeProcessing:
return "Processing"
case ErrorTypeConfiguration:
return "Configuration"
case ErrorTypeIO:
return "IO"
case ErrorTypeValidation:
return "Validation"
default:
return "Unknown"
}
}
// StructuredError represents a structured error with type, code, and context.
type StructuredError struct {
Type ErrorType
Code string
Message string
Cause error
Context map[string]any
FilePath string
Line int
}
// Error implements the error interface.
func (e *StructuredError) Error() string {
if e.Cause != nil {
return fmt.Sprintf("%s [%s]: %s: %v", e.Type, e.Code, e.Message, e.Cause)
}
return fmt.Sprintf("%s [%s]: %s", e.Type, e.Code, e.Message)
}
// Unwrap returns the underlying cause error.
func (e *StructuredError) Unwrap() error {
return e.Cause
}
// WithContext adds context information to the error.
func (e *StructuredError) WithContext(key string, value any) *StructuredError {
if e.Context == nil {
e.Context = make(map[string]any)
}
e.Context[key] = value
return e
}
// WithFilePath adds file path information to the error.
func (e *StructuredError) WithFilePath(filePath string) *StructuredError {
e.FilePath = filePath
return e
}
// WithLine adds line number information to the error.
func (e *StructuredError) WithLine(line int) *StructuredError {
e.Line = line
return e
}
// NewStructuredError creates a new structured error.
func NewStructuredError(errorType ErrorType, code, message, filePath string, context map[string]interface{}) *StructuredError {
return &StructuredError{
Type: errorType,
Code: code,
Message: message,
FilePath: filePath,
Context: context,
}
}
// NewStructuredErrorf creates a new structured error with formatted message.
func NewStructuredErrorf(errorType ErrorType, code, format string, args ...any) *StructuredError {
return &StructuredError{
Type: errorType,
Code: code,
Message: fmt.Sprintf(format, args...),
}
}
// WrapError wraps an existing error with structured error information.
func WrapError(err error, errorType ErrorType, code, message string) *StructuredError {
return &StructuredError{
Type: errorType,
Code: code,
Message: message,
Cause: err,
}
}
// WrapErrorf wraps an existing error with formatted message.
func WrapErrorf(err error, errorType ErrorType, code, format string, args ...any) *StructuredError {
return &StructuredError{
Type: errorType,
Code: code,
Message: fmt.Sprintf(format, args...),
Cause: err,
}
}
// Common error codes for each type
const (
// CLI Error Codes
CodeCLIMissingSource = "MISSING_SOURCE"
CodeCLIInvalidArgs = "INVALID_ARGS"
// FileSystem Error Codes
CodeFSPathResolution = "PATH_RESOLUTION"
CodeFSPermission = "PERMISSION_DENIED"
CodeFSNotFound = "NOT_FOUND"
CodeFSAccess = "ACCESS_DENIED"
// Processing Error Codes
CodeProcessingFileRead = "FILE_READ"
CodeProcessingCollection = "COLLECTION"
CodeProcessingTraversal = "TRAVERSAL"
CodeProcessingEncode = "ENCODE"
// Configuration Error Codes
CodeConfigValidation = "VALIDATION"
CodeConfigMissing = "MISSING"
// IO Error Codes
CodeIOFileCreate = "FILE_CREATE"
CodeIOFileWrite = "FILE_WRITE"
CodeIOEncoding = "ENCODING"
CodeIOWrite = "WRITE"
CodeIORead = "READ"
CodeIOClose = "CLOSE"
// Validation Error Codes
CodeValidationFormat = "FORMAT"
CodeValidationFileType = "FILE_TYPE"
CodeValidationSize = "SIZE_LIMIT"
CodeValidationRequired = "REQUIRED"
CodeValidationPath = "PATH_TRAVERSAL"
// Resource Limit Error Codes
CodeResourceLimitFiles = "FILE_COUNT_LIMIT"
CodeResourceLimitTotalSize = "TOTAL_SIZE_LIMIT"
CodeResourceLimitTimeout = "TIMEOUT"
CodeResourceLimitMemory = "MEMORY_LIMIT"
CodeResourceLimitConcurrency = "CONCURRENCY_LIMIT"
CodeResourceLimitRate = "RATE_LIMIT"
)
// Predefined error constructors for common error scenarios
// NewCLIMissingSourceError creates a CLI error for missing source argument.
func NewCLIMissingSourceError() *StructuredError {
return NewStructuredError(ErrorTypeCLI, CodeCLIMissingSource, "usage: gibidify -source <source_directory> [--destination <output_file>] [--format=json|yaml|markdown]", "", nil)
}
// NewFileSystemError creates a file system error.
func NewFileSystemError(code, message string) *StructuredError {
return NewStructuredError(ErrorTypeFileSystem, code, message, "", nil)
}
// NewProcessingError creates a processing error.
func NewProcessingError(code, message string) *StructuredError {
return NewStructuredError(ErrorTypeProcessing, code, message, "", nil)
}
// NewIOError creates an IO error.
func NewIOError(code, message string) *StructuredError {
return NewStructuredError(ErrorTypeIO, code, message, "", nil)
}
// NewValidationError creates a validation error.
func NewValidationError(code, message string) *StructuredError {
return NewStructuredError(ErrorTypeValidation, code, message, "", nil)
}
// LogError logs an error with a consistent format if the error is not nil.
// The operation parameter describes what was being attempted.
// Additional context can be provided via the args parameter.
func LogError(operation string, err error, args ...any) {
if err != nil {
msg := operation
if len(args) > 0 {
// Format the operation string with the provided arguments
msg = fmt.Sprintf(operation, args...)
}
// Check if it's a structured error and log with additional context
if structErr, ok := err.(*StructuredError); ok {
logrus.WithFields(logrus.Fields{
"error_type": structErr.Type.String(),
"error_code": structErr.Code,
"context": structErr.Context,
"file_path": structErr.FilePath,
"line": structErr.Line,
}).Errorf("%s: %v", msg, err)
} else {
logrus.Errorf("%s: %v", msg, err)
}
}
}
// LogErrorf logs an error with a formatted message if the error is not nil.
// This is a convenience wrapper around LogError for cases where formatting is needed.
func LogErrorf(err error, format string, args ...any) {
if err != nil {
LogError(format, err, args...)
}
}

242
utils/errors_test.go Normal file
View File

@@ -0,0 +1,242 @@
package utils
import (
"bytes"
"errors"
"fmt"
"strings"
"testing"
"github.com/sirupsen/logrus"
)
// captureLogOutput captures logrus output for testing
func captureLogOutput(f func()) string {
var buf bytes.Buffer
logrus.SetOutput(&buf)
defer logrus.SetOutput(logrus.StandardLogger().Out)
f()
return buf.String()
}
func TestLogError(t *testing.T) {
tests := []struct {
name string
operation string
err error
args []any
wantLog string
wantEmpty bool
}{
{
name: "nil error should not log",
operation: "test operation",
err: nil,
args: nil,
wantEmpty: true,
},
{
name: "basic error logging",
operation: "failed to read file",
err: errors.New("permission denied"),
args: nil,
wantLog: "failed to read file: permission denied",
},
{
name: "error with formatting args",
operation: "failed to process file %s",
err: errors.New("file too large"),
args: []any{"test.txt"},
wantLog: "failed to process file test.txt: file too large",
},
{
name: "error with multiple formatting args",
operation: "failed to copy from %s to %s",
err: errors.New("disk full"),
args: []any{"source.txt", "dest.txt"},
wantLog: "failed to copy from source.txt to dest.txt: disk full",
},
{
name: "wrapped error",
operation: "database operation failed",
err: fmt.Errorf("connection error: %w", errors.New("timeout")),
args: nil,
wantLog: "database operation failed: connection error: timeout",
},
{
name: "empty operation string",
operation: "",
err: errors.New("some error"),
args: nil,
wantLog: ": some error",
},
{
name: "operation with percentage sign",
operation: "processing 50% complete",
err: errors.New("interrupted"),
args: nil,
wantLog: "processing 50% complete: interrupted",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
output := captureLogOutput(func() {
LogError(tt.operation, tt.err, tt.args...)
})
if tt.wantEmpty {
if output != "" {
t.Errorf("LogError() logged output when error was nil: %q", output)
}
return
}
if !strings.Contains(output, tt.wantLog) {
t.Errorf("LogError() output = %q, want to contain %q", output, tt.wantLog)
}
// Verify it's logged at ERROR level
if !strings.Contains(output, "level=error") {
t.Errorf("LogError() should log at ERROR level, got: %q", output)
}
})
}
}
func TestLogErrorf(t *testing.T) {
tests := []struct {
name string
err error
format string
args []any
wantLog string
wantEmpty bool
}{
{
name: "nil error should not log",
err: nil,
format: "operation %s failed",
args: []any{"test"},
wantEmpty: true,
},
{
name: "basic formatted error",
err: errors.New("not found"),
format: "file %s not found",
args: []any{"config.yaml"},
wantLog: "file config.yaml not found: not found",
},
{
name: "multiple format arguments",
err: errors.New("invalid range"),
format: "value %d is not between %d and %d",
args: []any{150, 0, 100},
wantLog: "value 150 is not between 0 and 100: invalid range",
},
{
name: "no format arguments",
err: errors.New("generic error"),
format: "operation failed",
args: nil,
wantLog: "operation failed: generic error",
},
{
name: "format with different types",
err: errors.New("type mismatch"),
format: "expected %s but got %d",
args: []any{"string", 42},
wantLog: "expected string but got 42: type mismatch",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
output := captureLogOutput(func() {
LogErrorf(tt.err, tt.format, tt.args...)
})
if tt.wantEmpty {
if output != "" {
t.Errorf("LogErrorf() logged output when error was nil: %q", output)
}
return
}
if !strings.Contains(output, tt.wantLog) {
t.Errorf("LogErrorf() output = %q, want to contain %q", output, tt.wantLog)
}
// Verify it's logged at ERROR level
if !strings.Contains(output, "level=error") {
t.Errorf("LogErrorf() should log at ERROR level, got: %q", output)
}
})
}
}
func TestLogErrorConcurrency(t *testing.T) {
// Test that LogError is safe for concurrent use
done := make(chan bool)
for i := 0; i < 10; i++ {
go func(n int) {
LogError("concurrent operation", fmt.Errorf("error %d", n))
done <- true
}(i)
}
// Wait for all goroutines to complete
for i := 0; i < 10; i++ {
<-done
}
}
func TestLogErrorfConcurrency(t *testing.T) {
// Test that LogErrorf is safe for concurrent use
done := make(chan bool)
for i := 0; i < 10; i++ {
go func(n int) {
LogErrorf(fmt.Errorf("error %d", n), "concurrent operation %d", n)
done <- true
}(i)
}
// Wait for all goroutines to complete
for i := 0; i < 10; i++ {
<-done
}
}
// BenchmarkLogError benchmarks the LogError function
func BenchmarkLogError(b *testing.B) {
err := errors.New("benchmark error")
// Disable output during benchmark
logrus.SetOutput(bytes.NewBuffer(nil))
defer logrus.SetOutput(logrus.StandardLogger().Out)
b.ResetTimer()
for i := 0; i < b.N; i++ {
LogError("benchmark operation", err)
}
}
// BenchmarkLogErrorf benchmarks the LogErrorf function
func BenchmarkLogErrorf(b *testing.B) {
err := errors.New("benchmark error")
// Disable output during benchmark
logrus.SetOutput(bytes.NewBuffer(nil))
defer logrus.SetOutput(logrus.StandardLogger().Out)
b.ResetTimer()
for i := 0; i < b.N; i++ {
LogErrorf(err, "benchmark operation %d", i)
}
}
// BenchmarkLogErrorNil benchmarks LogError with nil error (no-op case)
func BenchmarkLogErrorNil(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
LogError("benchmark operation", nil)
}
}

167
utils/paths.go Normal file
View File

@@ -0,0 +1,167 @@
// Package utils provides common utility functions.
package utils
import (
"fmt"
"os"
"path/filepath"
"strings"
)
// GetAbsolutePath returns the absolute path for the given path.
// It wraps filepath.Abs with consistent error handling.
func GetAbsolutePath(path string) (string, error) {
abs, err := filepath.Abs(path)
if err != nil {
return "", fmt.Errorf("failed to get absolute path for %s: %w", path, err)
}
return abs, nil
}
// GetBaseName returns the base name for the given path, handling special cases.
func GetBaseName(absPath string) string {
baseName := filepath.Base(absPath)
if baseName == "." || baseName == "" {
return "output"
}
return baseName
}
// ValidateSourcePath validates a source directory path for security.
// It ensures the path exists, is a directory, and doesn't contain path traversal attempts.
func ValidateSourcePath(path string) error {
if path == "" {
return NewStructuredError(ErrorTypeValidation, CodeValidationRequired, "source path is required", "", nil)
}
// Check for path traversal patterns before cleaning
if strings.Contains(path, "..") {
return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "path traversal attempt detected in source path", path, map[string]interface{}{
"original_path": path,
})
}
// Clean and get absolute path
cleaned := filepath.Clean(path)
abs, err := filepath.Abs(cleaned)
if err != nil {
return NewStructuredError(ErrorTypeFileSystem, CodeFSPathResolution, "cannot resolve source path", path, map[string]interface{}{
"error": err.Error(),
})
}
// Get current working directory to ensure we're not escaping it for relative paths
if !filepath.IsAbs(path) {
cwd, err := os.Getwd()
if err != nil {
return NewStructuredError(ErrorTypeFileSystem, CodeFSPathResolution, "cannot get current working directory", path, map[string]interface{}{
"error": err.Error(),
})
}
// Ensure the resolved path is within or below the current working directory
cwdAbs, err := filepath.Abs(cwd)
if err != nil {
return NewStructuredError(ErrorTypeFileSystem, CodeFSPathResolution, "cannot resolve current working directory", path, map[string]interface{}{
"error": err.Error(),
})
}
// Check if the absolute path tries to escape the current working directory
if !strings.HasPrefix(abs, cwdAbs) {
return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "source path attempts to access directories outside current working directory", path, map[string]interface{}{
"resolved_path": abs,
"working_dir": cwdAbs,
})
}
}
// Check if path exists and is a directory
info, err := os.Stat(cleaned)
if err != nil {
if os.IsNotExist(err) {
return NewStructuredError(ErrorTypeFileSystem, CodeFSNotFound, "source directory does not exist", path, nil)
}
return NewStructuredError(ErrorTypeFileSystem, CodeFSAccess, "cannot access source directory", path, map[string]interface{}{
"error": err.Error(),
})
}
if !info.IsDir() {
return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "source path must be a directory", path, map[string]interface{}{
"is_file": true,
})
}
return nil
}
// ValidateDestinationPath validates a destination file path for security.
// It ensures the path doesn't contain path traversal attempts and the parent directory exists.
func ValidateDestinationPath(path string) error {
if path == "" {
return NewStructuredError(ErrorTypeValidation, CodeValidationRequired, "destination path is required", "", nil)
}
// Check for path traversal patterns before cleaning
if strings.Contains(path, "..") {
return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "path traversal attempt detected in destination path", path, map[string]interface{}{
"original_path": path,
})
}
// Clean and validate the path
cleaned := filepath.Clean(path)
// Get absolute path to ensure it's not trying to escape current working directory
abs, err := filepath.Abs(cleaned)
if err != nil {
return NewStructuredError(ErrorTypeFileSystem, CodeFSPathResolution, "cannot resolve destination path", path, map[string]interface{}{
"error": err.Error(),
})
}
// Ensure the destination is not a directory
if info, err := os.Stat(abs); err == nil && info.IsDir() {
return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "destination cannot be a directory", path, map[string]interface{}{
"is_directory": true,
})
}
// Check if parent directory exists and is writable
parentDir := filepath.Dir(abs)
if parentInfo, err := os.Stat(parentDir); err != nil {
if os.IsNotExist(err) {
return NewStructuredError(ErrorTypeFileSystem, CodeFSNotFound, "destination parent directory does not exist", path, map[string]interface{}{
"parent_dir": parentDir,
})
}
return NewStructuredError(ErrorTypeFileSystem, CodeFSAccess, "cannot access destination parent directory", path, map[string]interface{}{
"parent_dir": parentDir,
"error": err.Error(),
})
} else if !parentInfo.IsDir() {
return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "destination parent is not a directory", path, map[string]interface{}{
"parent_dir": parentDir,
})
}
return nil
}
// ValidateConfigPath validates a configuration file path for security.
// It ensures the path doesn't contain path traversal attempts.
func ValidateConfigPath(path string) error {
if path == "" {
return nil // Empty path is allowed for config
}
// Check for path traversal patterns before cleaning
if strings.Contains(path, "..") {
return NewStructuredError(ErrorTypeValidation, CodeValidationPath, "path traversal attempt detected in config path", path, map[string]interface{}{
"original_path": path,
})
}
return nil
}

262
utils/paths_test.go Normal file
View File

@@ -0,0 +1,262 @@
package utils
import (
"os"
"path/filepath"
"runtime"
"strings"
"testing"
)
func TestGetAbsolutePath(t *testing.T) {
// Get current working directory for tests
cwd, err := os.Getwd()
if err != nil {
t.Fatalf("Failed to get current directory: %v", err)
}
tests := []struct {
name string
path string
wantPrefix string
wantErr bool
wantErrMsg string
skipWindows bool
}{
{
name: "absolute path unchanged",
path: cwd,
wantPrefix: cwd,
wantErr: false,
},
{
name: "relative path current directory",
path: ".",
wantPrefix: cwd,
wantErr: false,
},
{
name: "relative path parent directory",
path: "..",
wantPrefix: filepath.Dir(cwd),
wantErr: false,
},
{
name: "relative path with file",
path: "test.txt",
wantPrefix: filepath.Join(cwd, "test.txt"),
wantErr: false,
},
{
name: "relative path with subdirectory",
path: "subdir/file.go",
wantPrefix: filepath.Join(cwd, "subdir", "file.go"),
wantErr: false,
},
{
name: "empty path",
path: "",
wantPrefix: cwd,
wantErr: false,
},
{
name: "path with tilde",
path: "~/test",
wantPrefix: filepath.Join(cwd, "~", "test"),
wantErr: false,
skipWindows: false,
},
{
name: "path with multiple separators",
path: "path//to///file",
wantPrefix: filepath.Join(cwd, "path", "to", "file"),
wantErr: false,
},
{
name: "path with trailing separator",
path: "path/",
wantPrefix: filepath.Join(cwd, "path"),
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.skipWindows && runtime.GOOS == "windows" {
t.Skip("Skipping test on Windows")
}
got, err := GetAbsolutePath(tt.path)
if tt.wantErr {
if err == nil {
t.Errorf("GetAbsolutePath() error = nil, wantErr %v", tt.wantErr)
return
}
if tt.wantErrMsg != "" && !strings.Contains(err.Error(), tt.wantErrMsg) {
t.Errorf("GetAbsolutePath() error = %v, want error containing %v", err, tt.wantErrMsg)
}
return
}
if err != nil {
t.Errorf("GetAbsolutePath() unexpected error = %v", err)
return
}
// Clean the expected path for comparison
wantClean := filepath.Clean(tt.wantPrefix)
gotClean := filepath.Clean(got)
if gotClean != wantClean {
t.Errorf("GetAbsolutePath() = %v, want %v", gotClean, wantClean)
}
// Verify the result is actually absolute
if !filepath.IsAbs(got) {
t.Errorf("GetAbsolutePath() returned non-absolute path: %v", got)
}
})
}
}
func TestGetAbsolutePathSpecialCases(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("Skipping Unix-specific tests on Windows")
}
tests := []struct {
name string
setup func() (string, func())
path string
wantErr bool
}{
{
name: "symlink to directory",
setup: func() (string, func()) {
tmpDir := t.TempDir()
target := filepath.Join(tmpDir, "target")
link := filepath.Join(tmpDir, "link")
if err := os.Mkdir(target, 0o755); err != nil {
t.Fatalf("Failed to create target directory: %v", err)
}
if err := os.Symlink(target, link); err != nil {
t.Fatalf("Failed to create symlink: %v", err)
}
return link, func() {}
},
path: "",
wantErr: false,
},
{
name: "broken symlink",
setup: func() (string, func()) {
tmpDir := t.TempDir()
link := filepath.Join(tmpDir, "broken_link")
if err := os.Symlink("/nonexistent/path", link); err != nil {
t.Fatalf("Failed to create broken symlink: %v", err)
}
return link, func() {}
},
path: "",
wantErr: false, // filepath.Abs still works with broken symlinks
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
path, cleanup := tt.setup()
defer cleanup()
if tt.path == "" {
tt.path = path
}
got, err := GetAbsolutePath(tt.path)
if (err != nil) != tt.wantErr {
t.Errorf("GetAbsolutePath() error = %v, wantErr %v", err, tt.wantErr)
return
}
if err == nil && !filepath.IsAbs(got) {
t.Errorf("GetAbsolutePath() returned non-absolute path: %v", got)
}
})
}
}
func TestGetAbsolutePathConcurrency(t *testing.T) {
// Test that GetAbsolutePath is safe for concurrent use
paths := []string{".", "..", "test.go", "subdir/file.txt", "/tmp/test"}
done := make(chan bool)
for _, p := range paths {
go func(path string) {
_, _ = GetAbsolutePath(path)
done <- true
}(p)
}
// Wait for all goroutines to complete
for range paths {
<-done
}
}
func TestGetAbsolutePathErrorFormatting(t *testing.T) {
// This test verifies error message formatting
// We need to trigger an actual error from filepath.Abs
// On Unix systems, we can't easily trigger filepath.Abs errors
// so we'll just verify the error wrapping works correctly
// Create a test that would fail if filepath.Abs returns an error
path := "test/path"
got, err := GetAbsolutePath(path)
if err != nil {
// If we somehow get an error, verify it's properly formatted
if !strings.Contains(err.Error(), "failed to get absolute path for") {
t.Errorf("Error message format incorrect: %v", err)
}
if !strings.Contains(err.Error(), path) {
t.Errorf("Error message should contain original path: %v", err)
}
} else {
// Normal case - just verify we got a valid absolute path
if !filepath.IsAbs(got) {
t.Errorf("Expected absolute path, got: %v", got)
}
}
}
// BenchmarkGetAbsolutePath benchmarks the GetAbsolutePath function
func BenchmarkGetAbsolutePath(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = GetAbsolutePath("test/path/file.go")
}
}
// BenchmarkGetAbsolutePathAbs benchmarks with already absolute path
func BenchmarkGetAbsolutePathAbs(b *testing.B) {
absPath := "/home/user/test/file.go"
if runtime.GOOS == "windows" {
absPath = "C:\\Users\\test\\file.go"
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = GetAbsolutePath(absPath)
}
}
// BenchmarkGetAbsolutePathCurrent benchmarks with current directory
func BenchmarkGetAbsolutePathCurrent(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = GetAbsolutePath(".")
}
}

138
utils/writers.go Normal file
View File

@@ -0,0 +1,138 @@
package utils
import (
"encoding/json"
"io"
"strings"
)
// SafeCloseReader safely closes a reader if it implements io.Closer.
// This eliminates the duplicated closeReader methods across all writers.
func SafeCloseReader(reader io.Reader, path string) {
if closer, ok := reader.(io.Closer); ok {
if err := closer.Close(); err != nil {
LogError(
"Failed to close file reader",
WrapError(err, ErrorTypeIO, CodeIOClose, "failed to close file reader").WithFilePath(path),
)
}
}
}
// WriteWithErrorWrap performs file writing with consistent error handling.
// This centralizes the common pattern of writing strings with error wrapping.
func WriteWithErrorWrap(writer io.Writer, content, errorMsg, filePath string) error {
if _, err := writer.Write([]byte(content)); err != nil {
wrappedErr := WrapError(err, ErrorTypeIO, CodeIOWrite, errorMsg)
if filePath != "" {
wrappedErr = wrappedErr.WithFilePath(filePath)
}
return wrappedErr
}
return nil
}
// StreamContent provides a common streaming implementation with chunk processing.
// This eliminates the similar streaming patterns across JSON and Markdown writers.
func StreamContent(reader io.Reader, writer io.Writer, chunkSize int, filePath string, processChunk func([]byte) []byte) error {
buf := make([]byte, chunkSize)
for {
n, err := reader.Read(buf)
if n > 0 {
processed := buf[:n]
if processChunk != nil {
processed = processChunk(processed)
}
if _, writeErr := writer.Write(processed); writeErr != nil {
wrappedErr := WrapError(writeErr, ErrorTypeIO, CodeIOWrite, "failed to write content chunk")
if filePath != "" {
wrappedErr = wrappedErr.WithFilePath(filePath)
}
return wrappedErr
}
}
if err == io.EOF {
break
}
if err != nil {
wrappedErr := WrapError(err, ErrorTypeIO, CodeIORead, "failed to read content chunk")
if filePath != "" {
wrappedErr = wrappedErr.WithFilePath(filePath)
}
return wrappedErr
}
}
return nil
}
// EscapeForJSON escapes content for JSON output using the standard library.
// This replaces the custom escapeJSONString function with a more robust implementation.
func EscapeForJSON(content string) string {
// Use the standard library's JSON marshaling for proper escaping
jsonBytes, _ := json.Marshal(content)
// Remove the surrounding quotes that json.Marshal adds
jsonStr := string(jsonBytes)
if len(jsonStr) >= 2 && jsonStr[0] == '"' && jsonStr[len(jsonStr)-1] == '"' {
return jsonStr[1 : len(jsonStr)-1]
}
return jsonStr
}
// EscapeForYAML quotes/escapes content for YAML output if needed.
// This centralizes the YAML string quoting logic.
func EscapeForYAML(content string) string {
// Quote if contains special characters, spaces, or starts with special chars
needsQuotes := strings.ContainsAny(content, " \t\n\r:{}[]|>-'\"\\") ||
strings.HasPrefix(content, "-") ||
strings.HasPrefix(content, "?") ||
strings.HasPrefix(content, ":") ||
content == "" ||
content == "true" || content == "false" ||
content == "null" || content == "~"
if needsQuotes {
// Use double quotes and escape internal quotes
escaped := strings.ReplaceAll(content, "\\", "\\\\")
escaped = strings.ReplaceAll(escaped, "\"", "\\\"")
return "\"" + escaped + "\""
}
return content
}
// StreamLines provides line-based streaming for YAML content.
// This provides an alternative streaming approach for YAML writers.
func StreamLines(reader io.Reader, writer io.Writer, filePath string, lineProcessor func(string) string) error {
// Read all content first (for small files this is fine)
content, err := io.ReadAll(reader)
if err != nil {
wrappedErr := WrapError(err, ErrorTypeIO, CodeIORead, "failed to read content for line processing")
if filePath != "" {
wrappedErr = wrappedErr.WithFilePath(filePath)
}
return wrappedErr
}
// Split into lines and process each
lines := strings.Split(string(content), "\n")
for i, line := range lines {
processedLine := line
if lineProcessor != nil {
processedLine = lineProcessor(line)
}
// Write line with proper line ending (except for last empty line)
lineToWrite := processedLine
if i < len(lines)-1 || line != "" {
lineToWrite += "\n"
}
if _, writeErr := writer.Write([]byte(lineToWrite)); writeErr != nil {
wrappedErr := WrapError(writeErr, ErrorTypeIO, CodeIOWrite, "failed to write processed line")
if filePath != "" {
wrappedErr = wrappedErr.WithFilePath(filePath)
}
return wrappedErr
}
}
return nil
}