feat: initial commit

This commit is contained in:
2025-10-19 09:45:39 +03:00
commit 36f25aaff4
55 changed files with 9246 additions and 0 deletions

22
.editorconfig Normal file
View File

@@ -0,0 +1,22 @@
# EditorConfig
# https://editorconfig.org
root = true
[*]
charset = utf-8
end_of_line = lf
indent_size = 4
indent_style = space
insert_final_newline = true
trim_trailing_whitespace = true
max_line_length = 120
[*.{yml,yaml,json,toml}]
indent_size = 2
[*.md]
trim_trailing_whitespace = false
[Makefile]
indent_style = tab

53
.github/workflows/pr.yml vendored Normal file
View File

@@ -0,0 +1,53 @@
---
name: PR
on:
pull_request:
jobs:
test:
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0
- name: Install dependencies
run: uv sync
- name: Run tests
run: uv run -m pytest --cov
- name: Lint
run: uvx ruff check .
- name: Type check
run: uvx mypy .
audit:
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0
- name: Install
run: |
uv sync
uv pip install -e .
- name: Audit workflows
run: uv run ghaw-auditor scan --repo . --output audit-results
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload results
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
if: always()
with:
name: audit-results
path: audit-results/

54
.gitignore vendored Normal file
View File

@@ -0,0 +1,54 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual environments
venv/
ENV/
env/
# IDEs
.vscode/
.idea/
*.swp
*.swo
*~
# Testing
.pytest_cache/
.coverage
htmlcov/
.tox/
# Type checking
.mypy_cache/
.dmypy.json
dmypy.json
# Ruff
.ruff_cache/
# Auditor output
.ghaw-auditor/
# Cache
.cache/
audit-results

77
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,77 @@
---
# Configure pre-commit to use uv for Python hooks
# Pre-commit 3.6.0+ automatically detects and uses uv when available
default_install_hook_types: [pre-commit, commit-msg]
repos:
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.9.2
hooks:
- id: uv-lock
- id: uv-sync
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: requirements-txt-fixer
- id: detect-private-key
- id: destroyed-symlinks
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: check-ast
- id: check-case-conflict
- id: check-merge-conflict
- id: check-executables-have-shebangs
- id: check-shebang-scripts-are-executable
- id: check-symlinks
- id: check-toml
- id: check-xml
- id: check-yaml
args: [--allow-multiple-documents]
- id: end-of-file-fixer
- id: mixed-line-ending
args: [--fix=auto]
- id: pretty-format-json
args: [--autofix, --no-sort-keys]
- repo: https://github.com/DavidAnson/markdownlint-cli2
rev: v0.18.1
hooks:
- id: markdownlint-cli2
args: [--fix]
- repo: https://github.com/adrienverge/yamllint
rev: v1.37.1
hooks:
- id: yamllint
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.0
hooks:
# Run the linter with auto-fix
- id: ruff-check
args: [--fix]
# Run the formatter
- id: ruff-format
- repo: https://github.com/rhysd/actionlint
rev: v1.7.8
hooks:
- id: actionlint
args: ["-shellcheck="]
- repo: https://github.com/renovatebot/pre-commit-hooks
rev: 41.149.2
hooks:
- id: renovate-config-validator
- repo: https://github.com/bridgecrewio/checkov.git
rev: "3.2.483"
hooks:
- id: checkov
args:
- "--quiet"
- repo: https://github.com/gitleaks/gitleaks
rev: v8.28.0
hooks:
- id: gitleaks

1
.yamlignore Normal file
View File

@@ -0,0 +1 @@
# Ignore patterns for yamllint

42
CHANGELOG.md Normal file
View File

@@ -0,0 +1,42 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.0.0] - 2025-10-02
### Added
- Initial release
- Full workflow and action scanning
- GitHub API integration with caching and retries
- Action reference resolution (local, GitHub, Docker)
- Monorepo action support (owner/repo/path@ref)
- Diff mode with baseline comparison
- Policy validation with enforcement
- JSON and Markdown report generation
- Comprehensive metadata extraction:
- Triggers, permissions, concurrency
- Jobs, steps, actions used
- Secrets, environment variables
- Containers, services, strategies
- `scan`, `inventory`, and `validate` commands
- uv-based dependency management
- Disk caching with configurable TTL
- Parallel API calls with configurable concurrency
- Reusable workflow detection and contract parsing
- Support for empty workflow_call declarations
- Robust error handling for malformed YAML
### Technical
- Python 3.11+ with type hints
- Pydantic v2 models
- ruamel.yaml parser
- httpx client with tenacity retries
- Rich console output
- Typer CLI framework
- diskcache for persistent caching
- Test coverage with pytest

66
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,66 @@
# Contributing
## Setup
```bash
uv sync
uv pip install -e .
```
## Development
```bash
# Run locally
uv run ghaw-auditor scan --repo .
# Tests
uv run -m pytest
uv run -m pytest -k test_name
# Coverage
uv run -m pytest --cov --cov-report=html
# Lint & format
uvx ruff check .
uvx ruff format .
# Type check
uvx mypy .
```
## Code Style
- Python 3.11+ with type hints
- Max line length: 120 characters
- Follow PEP 8
- Use Pydantic for models
- Add docstrings to public functions
## Testing
- Write tests for new features
- Maintain coverage ≥ 85%
- Use pytest fixtures
- Mock external API calls
## Pull Requests
1. Fork and create a feature branch
2. Add tests
3. Ensure all checks pass
4. Update CHANGELOG.md
5. Submit PR with clear description
## Commit Messages
Follow conventional commits:
- `feat:` new feature
- `fix:` bug fix
- `docs:` documentation
- `test:` tests
- `refactor:` code refactoring
## Questions?
Open an issue for discussion.

37
Makefile Normal file
View File

@@ -0,0 +1,37 @@
.PHONY: help install test lint format check clean audit
help:
@echo "Available targets:"
@echo " install - Install dependencies with uv"
@echo " test - Run tests with coverage"
@echo " lint - Run ruff linting"
@echo " format - Format code with ruff"
@echo " check - Run all checks (lint, format, type, test)"
@echo " clean - Remove generated files"
@echo " audit - Run auditor on current repo"
install:
uv sync
uv pip install -e .
test:
uv run -m pytest -v --cov=ghaw_auditor --cov-report=term-missing
lint:
uvx ruff check .
format:
uvx ruff format .
typecheck:
uvx mypy .
check: lint format typecheck test
clean:
rm -rf .pytest_cache .mypy_cache .ruff_cache htmlcov .coverage
rm -rf build dist *.egg-info
find . -type d -name __pycache__ -exec rm -rf {} +
audit:
uv run ghaw-auditor scan --repo . --output .ghaw-auditor

476
README.md Normal file
View File

@@ -0,0 +1,476 @@
# GitHub Actions & Workflows Auditor
A Python CLI tool for analyzing, auditing, and tracking
GitHub Actions workflows and actions.
## Features
- **Comprehensive Scanning**: Discovers workflows (`.github/workflows/*.yml`)
and action manifests (`action.yml`)
- **Action Resolution**: Resolves GitHub action references to specific SHAs
via GitHub API
- **Monorepo Support**: Handles monorepo actions like `owner/repo/path@ref`
- **Policy Validation**: Enforces security and best practice policies
- **Diff Mode**: Compare current state against baselines to track changes
over time
- **Multiple Output Formats**: JSON and Markdown reports
- **Fast & Cached**: Uses `uv` for dependency management and disk caching
for API responses
- **Rich Analysis**: Extracts triggers, permissions, secrets, runners,
containers, services, and more
## Usage (Recommended)
Run directly with `uvx` without installation:
```bash
# Scan current directory
uvx ghaw-auditor scan
# Scan specific repository
uvx ghaw-auditor scan --repo /path/to/repo
# With GitHub token for better rate limits
GITHUB_TOKEN=ghp_xxx uvx ghaw-auditor scan --repo /path/to/repo
# List unique actions
uvx ghaw-auditor inventory --repo /path/to/repo
# Validate against policy
uvx ghaw-auditor validate --policy policy.yml --enforce
```
> **Note:** `uvx` runs the tool directly without installation.
> For frequent use or CI pipelines, see
> [Installation](#installation-optional) below.
## Installation (Optional)
### Using uv (recommended)
```bash
# Install uv if you don't have it
curl -LsSf https://astral.sh/uv/install.sh | sh
# Clone and install
git clone <repo-url>
cd ghaw_auditor
uv sync
# Install in editable mode
uv pip install -e .
```
### Using pipx
```bash
pipx install .
```
> **When to install:** Install locally if you use the tool frequently,
> need it in CI pipelines, or want faster execution (no download on each run).
## Commands
> **Note:** Examples use `uvx ghaw-auditor`.
> If installed locally, use `ghaw-auditor` directly.
### `scan` - Full Analysis
Analyzes workflows, resolves actions, generates reports.
```bash
# Basic scan
uvx ghaw-auditor scan --repo .
# Full scan with all options
uvx ghaw-auditor scan \
--repo . \
--output .audit \
--format all \
--token $GITHUB_TOKEN \
--concurrency 8 \
--write-baseline
# Offline mode (no API calls)
uvx ghaw-auditor scan --offline --format md
```
**Options:**
- `--repo <path>` - Repository path (default: `.`)
- `--token <str>` - GitHub token (env: `GITHUB_TOKEN`)
- `--output <dir>` - Output directory (default: `.ghaw-auditor`)
- `--format <json|md|all>` - Output format (default: `all`)
- `--cache-dir <dir>` - Cache directory
- `--offline` - Skip API resolution
- `--concurrency <int>` - API concurrency (default: 4)
- `--verbose`, `--quiet` - Logging levels
### `inventory` - List Actions
Print deduplicated action inventory.
```bash
uvx ghaw-auditor inventory --repo /path/to/repo
# Output:
# Unique Actions: 15
# • actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
# • actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00
# ...
```
### `validate` - Policy Validation
Validate workflows against policies.
```bash
# Validate with default policy
uvx ghaw-auditor validate --repo .
# Validate with custom policy
uvx ghaw-auditor validate --policy policy.yml --enforce
```
**Options:**
- `--policy <file>` - Policy file path
- `--enforce` - Exit non-zero on violations
## Diff Mode
Track changes over time by comparing against baselines.
```bash
# Create initial baseline
uvx ghaw-auditor scan --write-baseline --output .audit
# Later, compare against baseline
uvx ghaw-auditor scan --diff --baseline .audit/baseline
# Output: .audit/diff/report.diff.md
```
**Baseline contents:**
- `baseline/actions.json` - Action inventory snapshot
- `baseline/workflows.json` - Workflow metadata snapshot
- `baseline/meta.json` - Auditor version, commit SHA, timestamp
**Diff reports show:**
- Added/removed/modified workflows
- Added/removed actions
- Changes to permissions, triggers, concurrency, secrets, etc.
## Output
The tool generates structured reports in the output directory:
### JSON Files
- **`actions.json`** - Deduplicated action inventory with manifests
- **`workflows.json`** - Complete workflow metadata
- **`violations.json`** - Policy violations
### Markdown Report
**`report.md`** includes:
- Summary (workflow count, action count, violations)
- Analysis (triggers, runners, secrets, permissions)
- Per-workflow details (jobs, actions used, configuration)
- Action inventory with inputs/outputs
- Policy violations
### Example Output
```text
.ghaw-auditor/
├── actions.json
├── workflows.json
├── violations.json
├── report.md
├── baseline/
│ ├── actions.json
│ ├── workflows.json
│ └── meta.json
└── diff/
├── actions.diff.json
├── workflows.diff.json
└── report.diff.md
```
## Policy Configuration
Create `policy.yml` to enforce policies:
```yaml
require_pinned_actions: true # Actions must use SHA refs
forbid_branch_refs: true # Forbid branch refs (main, master, etc.)
require_concurrency_on_pr: true # PR workflows must have concurrency
allowed_actions: # Whitelist
- actions/*
- github/*
- docker/*
denied_actions: # Blacklist
- dangerous/action
min_permissions: true # Enforce least-privilege
```
**Policy rules:**
- `require_pinned_actions` - Actions must be pinned to SHA (not tags/branches)
- `forbid_branch_refs` - Forbid branch references (main, master, develop)
- `allowed_actions` - Whitelist of allowed actions (glob patterns)
- `denied_actions` - Blacklist of forbidden actions
- `require_concurrency_on_pr` - PR workflows must set concurrency groups
**Enforcement:**
```bash
# Warn on violations
uvx ghaw-auditor validate --policy policy.yml
# Fail CI on violations
uvx ghaw-auditor validate --policy policy.yml --enforce
# Exit code: 0 (pass), 1 (violations), 2 (error)
```
## Extracted Metadata
### Workflows
- Name, path, triggers (push, PR, schedule, etc.)
- Permissions (workflow & job-level)
- Concurrency groups
- Environment variables
- Reusable workflow contracts (inputs, outputs, secrets)
### Jobs
- Runner (`runs-on`)
- Dependencies (`needs`)
- Conditions (`if`)
- Timeouts
- Container & service configurations
- Matrix strategies
- Actions used per job
### Actions
- Type (GitHub, local, Docker)
- Resolved SHAs for GitHub actions
- Input/output definitions
- Runtime (composite, Docker, Node.js)
- Monorepo path support
### Security
- Secrets used (`${{ secrets.* }}`)
- Permissions (contents, packages, issues, etc.)
- Service containers (databases, caches)
- External actions (owner/repo resolution)
## Architecture
**Layers:**
- `cli` - Typer-based CLI interface
- `scanner` - File discovery
- `parser` - YAML parsing (ruamel.yaml)
- `resolver` - GitHub API integration
- `analyzer` - Pattern extraction
- `policy` - Policy validation
- `renderer` - JSON/Markdown reports
- `differ` - Baseline comparison
- `cache` - Disk-based caching
- `github_client` - HTTP client with retries
**Models (Pydantic):**
- `ActionRef`, `ActionManifest`
- `WorkflowMeta`, `JobMeta`
- `Permissions`, `Strategy`, `Container`, `Service`
- `Policy`, `Baseline`, `DiffEntry`
## Development
```bash
# Install dependencies
uv sync
# Run locally
uv run ghaw-auditor scan --repo .
# Run tests
uv run -m pytest
# Lint
uvx ruff check .
# Format
uvx ruff format .
# Type check
uvx mypy .
# Coverage
uv run -m pytest --cov --cov-report=html
```
## CI Integration
### GitHub Actions
```yaml
- name: Audit GitHub Actions
run: |
uvx ghaw-auditor scan --output audit-results
uvx ghaw-auditor validate --policy policy.yml --enforce
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload Audit Results
uses: actions/upload-artifact@v4
with:
name: audit-results
path: audit-results/
```
> **Alternative:** For faster CI runs, cache the installation:
> `pip install ghaw-auditor` then use `ghaw-auditor` directly.
### Baseline Tracking
```yaml
- name: Compare Against Baseline
run: |
uvx ghaw-auditor scan --diff --baseline .audit/baseline
cat .audit/diff/report.diff.md >> $GITHUB_STEP_SUMMARY
```
## Examples
### Analyze a Repository
```bash
uvx ghaw-auditor scan --repo ~/projects/myrepo
```
Output:
```text
Scanning repository...
Found 7 workflows and 2 actions
Parsing workflows...
Found 15 unique action references
Resolving actions...
Analyzing workflows...
Generating reports...
✓ Audit complete! Reports in .ghaw-auditor
```
### Track Changes Over Time
```bash
# Day 1: Create baseline
uvx ghaw-auditor scan --write-baseline
# Day 7: Check for changes
uvx ghaw-auditor scan --diff --baseline .ghaw-auditor/baseline
# View diff
cat .ghaw-auditor/diff/report.diff.md
```
### Validate Security Policies
```bash
# Check for unpinned actions
uvx ghaw-auditor validate --enforce
# Output:
# [ERROR] .github/workflows/ci.yml: Action actions/checkout
# is not pinned to SHA: v4
# Policy enforcement failed: 1 errors
```
### Generate Inventory
```bash
uvx ghaw-auditor inventory --repo . > actions-inventory.txt
```
## Performance
- **Parallel API calls** - Configurable concurrency (default: 4)
- **Disk caching** - API responses cached with TTL
- **Fast parsing** - Efficient YAML parsing with ruamel.yaml
- **Target**: 100+ workflows in < 60 seconds (with warm cache)
## Configuration
Optional `auditor.yaml` in repo root:
```yaml
exclude_paths:
- "**/node_modules/**"
- "**/vendor/**"
cache:
dir: ~/.cache/ghaw-auditor
ttl: 3600 # 1 hour
policies:
require_pinned_actions: true
forbid_branch_refs: true
```
## Troubleshooting
### Rate Limiting
```bash
# Set GitHub token for higher rate limits
export GITHUB_TOKEN=ghp_xxx
uvx ghaw-auditor scan
```
### Large Repositories
```bash
# Increase concurrency
uvx ghaw-auditor scan --concurrency 10
# Use offline mode for local analysis
uvx ghaw-auditor scan --offline
```
### Debugging
```bash
# Verbose output
uvx ghaw-auditor scan --verbose
# JSON logging for CI
uvx ghaw-auditor scan --log-json
```
## License
MIT
## Contributing
Contributions welcome! Please ensure:
- Tests pass: `uv run -m pytest`
- Code formatted: `uvx ruff format .`
- Linting clean: `uvx ruff check .`
- Type hints valid: `uvx mypy .`
- Coverage 85%

3
ghaw_auditor/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
"""GitHub Actions & Workflows Auditor."""
__version__ = "1.0.0"

95
ghaw_auditor/analyzer.py Normal file
View File

@@ -0,0 +1,95 @@
"""Analyzer for workflows and actions."""
from __future__ import annotations
import logging
from typing import Any
from ghaw_auditor.models import ActionManifest, WorkflowMeta
logger = logging.getLogger(__name__)
class Analyzer:
"""Analyzes workflows and actions for patterns and risks."""
def __init__(self) -> None:
"""Initialize analyzer."""
pass
def analyze_workflows(
self, workflows: dict[str, WorkflowMeta], actions: dict[str, ActionManifest]
) -> dict[str, Any]:
"""Analyze workflows for patterns and issues."""
analysis = {
"total_workflows": len(workflows),
"total_jobs": sum(len(w.jobs) for w in workflows.values()),
"reusable_workflows": sum(1 for w in workflows.values() if w.is_reusable),
"triggers": self._analyze_triggers(workflows),
"permissions": self._analyze_permissions(workflows),
"secrets": self._analyze_secrets(workflows),
"runners": self._analyze_runners(workflows),
"containers": self._analyze_containers(workflows),
}
return analysis
def _analyze_triggers(self, workflows: dict[str, WorkflowMeta]) -> dict[str, int]:
"""Analyze workflow triggers."""
triggers: dict[str, int] = {}
for workflow in workflows.values():
for trigger in workflow.triggers:
triggers[trigger] = triggers.get(trigger, 0) + 1
return triggers
def _analyze_permissions(self, workflows: dict[str, WorkflowMeta]) -> dict[str, Any]:
"""Analyze permissions usage."""
has_permissions = sum(1 for w in workflows.values() if w.permissions)
job_permissions = sum(1 for w in workflows.values() for j in w.jobs.values() if j.permissions)
return {
"workflows_with_permissions": has_permissions,
"jobs_with_permissions": job_permissions,
}
def _analyze_secrets(self, workflows: dict[str, WorkflowMeta]) -> dict[str, Any]:
"""Analyze secrets usage."""
all_secrets: set[str] = set()
for workflow in workflows.values():
all_secrets.update(workflow.secrets_used)
return {
"total_unique_secrets": len(all_secrets),
"secrets": sorted(all_secrets),
}
def _analyze_runners(self, workflows: dict[str, WorkflowMeta]) -> dict[str, int]:
"""Analyze runner usage."""
runners: dict[str, int] = {}
for workflow in workflows.values():
for job in workflow.jobs.values():
runner = str(job.runs_on) if isinstance(job.runs_on, list) else job.runs_on
runners[runner] = runners.get(runner, 0) + 1
return runners
def _analyze_containers(self, workflows: dict[str, WorkflowMeta]) -> dict[str, Any]:
"""Analyze container usage."""
jobs_with_containers = 0
jobs_with_services = 0
for workflow in workflows.values():
for job in workflow.jobs.values():
jobs_with_containers += 1 if job.container else 0
jobs_with_services += 1 if job.services else 0
return {
"jobs_with_containers": jobs_with_containers,
"jobs_with_services": jobs_with_services,
}
def deduplicate_actions(self, all_actions: list[Any]) -> dict[str, Any]:
"""Deduplicate actions by canonical key."""
unique_actions: dict[str, Any] = {}
for action in all_actions:
key = action.canonical_key()
if key not in unique_actions:
unique_actions[key] = action
return unique_actions

47
ghaw_auditor/cache.py Normal file
View File

@@ -0,0 +1,47 @@
"""Caching layer for GitHub API responses and parsed data."""
from __future__ import annotations
import hashlib
import logging
from pathlib import Path
from typing import Any
import diskcache
from platformdirs import user_cache_dir
logger = logging.getLogger(__name__)
class Cache:
"""Disk-based cache for API responses and parsed objects."""
def __init__(self, cache_dir: str | Path | None = None, ttl: int = 3600) -> None:
"""Initialize cache."""
if cache_dir is None:
cache_dir = Path(user_cache_dir("ghaw-auditor"))
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
self.cache = diskcache.Cache(str(self.cache_dir))
self.ttl = ttl
def get(self, key: str) -> Any:
"""Get value from cache."""
return self.cache.get(key)
def set(self, key: str, value: Any, ttl: int | None = None) -> None:
"""Set value in cache."""
self.cache.set(key, value, expire=ttl or self.ttl)
def make_key(self, *parts: str) -> str:
"""Generate cache key from parts."""
combined = ":".join(str(p) for p in parts)
return hashlib.sha256(combined.encode()).hexdigest()
def clear(self) -> None:
"""Clear all cache entries."""
self.cache.clear()
def close(self) -> None:
"""Close cache."""
self.cache.close()

270
ghaw_auditor/cli.py Normal file
View File

@@ -0,0 +1,270 @@
"""CLI interface for GitHub Actions & Workflows Auditor."""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any
import typer
from rich.console import Console
from rich.logging import RichHandler
from ghaw_auditor import __version__
from ghaw_auditor.analyzer import Analyzer
from ghaw_auditor.differ import Differ
from ghaw_auditor.factory import AuditServiceFactory
from ghaw_auditor.models import Policy
from ghaw_auditor.parser import Parser
from ghaw_auditor.policy import PolicyValidator
from ghaw_auditor.renderer import Renderer
from ghaw_auditor.scanner import Scanner
from ghaw_auditor.services import DiffService, ScanResult
app = typer.Typer(
name="ghaw-auditor",
help="GitHub Actions & Workflows Auditor - analyze and audit GitHub Actions ecosystem",
)
console = Console()
def setup_logging(verbose: bool = False, quiet: bool = False, log_json: bool = False) -> None:
"""Configure logging."""
if quiet:
level = logging.ERROR
elif verbose:
level = logging.DEBUG
else:
level = logging.INFO
if log_json:
logging.basicConfig(level=level, format="%(message)s")
else:
logging.basicConfig(
level=level, format="%(message)s", handlers=[RichHandler(console=console, rich_tracebacks=True)]
)
def _render_reports(
renderer: Renderer,
result: ScanResult,
format_type: str,
) -> None:
"""Render reports based on format type."""
console.print("[cyan]Generating reports...[/cyan]")
if format_type in ("json", "all"):
renderer.render_json(result.workflows, result.actions, result.violations)
if format_type in ("md", "all"):
renderer.render_markdown(result.workflows, result.actions, result.violations, result.analysis)
def _handle_diff_mode(
result: ScanResult,
baseline: Path,
output: Path,
) -> None:
"""Handle diff mode comparison."""
console.print("[cyan]Running diff...[/cyan]")
diff_service = DiffService(Differ(baseline))
try:
workflow_diffs, action_diffs = diff_service.compare(result.workflows, result.actions)
diff_dir = output / "diff"
diff_dir.mkdir(exist_ok=True)
diff_service.differ.render_diff_markdown(workflow_diffs, action_diffs, diff_dir / "report.diff.md")
console.print(f"[green]Diff report written to {diff_dir / 'report.diff.md'}[/green]")
except FileNotFoundError as e:
logger = logging.getLogger(__name__)
logger.error(f"Baseline not found: {e}")
def _write_baseline(result: ScanResult, baseline_path: Path, commit_sha: str | None = None) -> None:
"""Write baseline snapshot."""
differ = Differ(baseline_path)
differ.save_baseline(result.workflows, result.actions, commit_sha)
console.print(f"[green]Baseline saved to {baseline_path}[/green]")
def _enforce_policy(violations: list[dict[str, Any]]) -> None:
"""Enforce policy and exit if errors found."""
error_violations = [v for v in violations if v.get("severity") == "error"]
if error_violations:
console.print(f"[red]Policy enforcement failed: {len(error_violations)} errors[/red]")
raise typer.Exit(1)
@app.command()
def scan(
repo: str = typer.Option(".", help="Repository path or URL"),
token: str | None = typer.Option(None, help="GitHub token", envvar="GITHUB_TOKEN"),
output: Path = typer.Option(".ghaw-auditor", help="Output directory"),
format_type: str = typer.Option("all", help="Output format: json, md, or all"),
cache_dir: Path | None = typer.Option(None, help="Cache directory"),
offline: bool = typer.Option(False, help="Offline mode (no API calls)"),
concurrency: int = typer.Option(4, help="Concurrency for API calls"),
enforce: bool = typer.Option(False, help="Enforce policy (exit non-zero on violations)"),
policy_file: Path | None = typer.Option(None, help="Policy file path"),
diff: bool = typer.Option(False, help="Run in diff mode"),
baseline: Path | None = typer.Option(None, help="Baseline path for diff"),
write_baseline: bool = typer.Option(False, help="Write baseline after scan"),
verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"),
quiet: bool = typer.Option(False, "--quiet", "-q", help="Quiet output"),
log_json: bool = typer.Option(False, help="JSON logging"),
) -> None:
"""Scan repository for GitHub Actions and workflows."""
setup_logging(verbose, quiet, log_json)
logger = logging.getLogger(__name__)
try:
# Validate repository path
repo_path = Path(repo).resolve()
if not repo_path.exists():
console.print(f"[red]Repository not found: {repo_path}[/red]")
raise typer.Exit(1)
# Load policy if specified
policy = None
if policy_file and policy_file.exists():
# TODO: Load policy from YAML file
policy = Policy()
# Create service via factory
service = AuditServiceFactory.create(
repo_path=repo_path,
token=token,
offline=offline,
cache_dir=cache_dir,
concurrency=concurrency,
policy=policy,
)
# Execute scan
console.print("[cyan]Scanning repository...[/cyan]")
result = service.scan(offline=offline)
# Display summary
console.print(f"Found {result.workflow_count} workflows and {result.action_count} actions")
console.print(f"Found {result.unique_action_count} unique action references")
if result.violations:
console.print(f"Found {len(result.violations)} policy violations")
# Render reports
renderer = Renderer(output)
_render_reports(renderer, result, format_type)
# Handle diff mode
if diff and baseline:
_handle_diff_mode(result, baseline, output)
# Write baseline
if write_baseline:
baseline_path = baseline or (output / "baseline")
_write_baseline(result, baseline_path)
console.print(f"[green]✓ Audit complete! Reports in {output}[/green]")
# Enforce policy
if enforce and result.violations:
_enforce_policy(result.violations)
except Exception as e:
logger.exception(f"Scan failed: {e}")
raise typer.Exit(2) from None
@app.command()
def inventory(
repo: str = typer.Option(".", help="Repository path"),
verbose: bool = typer.Option(False, "--verbose", "-v"),
) -> None:
"""Print deduplicated action inventory."""
setup_logging(verbose)
logger = logging.getLogger(__name__)
repo_path = Path(repo).resolve()
scanner = Scanner(repo_path)
parser = Parser(repo_path)
analyzer = Analyzer()
workflow_files = scanner.find_workflows()
all_actions = []
for wf_file in workflow_files:
try:
workflow = parser.parse_workflow(wf_file)
all_actions.extend(workflow.actions_used)
except Exception as e:
logger.error(f"Failed to parse {wf_file}: {e}")
if verbose:
logger.exception(e)
unique_actions = analyzer.deduplicate_actions(all_actions)
console.print(f"\n[cyan]Unique Actions: {len(unique_actions)}[/cyan]\n")
for key, _action in sorted(unique_actions.items()):
console.print(f"{key}")
@app.command()
def validate(
repo: str = typer.Option(".", help="Repository path"),
policy_file: Path | None = typer.Option(None, help="Policy file"),
enforce: bool = typer.Option(False, help="Exit non-zero on violations"),
verbose: bool = typer.Option(False, "--verbose", "-v"),
) -> None:
"""Validate workflows against policy."""
setup_logging(verbose)
logger = logging.getLogger(__name__)
repo_path = Path(repo).resolve()
scanner = Scanner(repo_path)
parser = Parser(repo_path)
workflow_files = scanner.find_workflows()
workflows = {}
all_actions = []
for wf_file in workflow_files:
try:
workflow = parser.parse_workflow(wf_file)
rel_path = str(wf_file.relative_to(repo_path))
workflows[rel_path] = workflow
all_actions.extend(workflow.actions_used)
except Exception as e:
logger.error(f"Failed to parse {wf_file}: {e}")
if verbose:
logger.exception(e)
# Load or use default policy
policy = Policy()
if policy_file and policy_file.exists():
# TODO: Parse YAML policy file here
pass
validator = PolicyValidator(policy)
violations = validator.validate(workflows, all_actions)
if violations:
console.print(f"\n[yellow]Found {len(violations)} policy violations:[/yellow]\n")
for v in violations:
severity = v.get("severity", "warning").upper()
color = "red" if severity == "ERROR" else "yellow"
console.print(f"[{color}]{severity}[/{color}] {v['workflow']}: {v['message']}")
if enforce:
errors = [v for v in violations if v.get("severity") == "error"]
if errors:
raise typer.Exit(1)
else:
console.print("[green]✓ No policy violations found[/green]")
@app.command()
def version() -> None:
"""Show version information."""
console.print(f"ghaw-auditor version {__version__}")
if __name__ == "__main__": # pragma: no cover
app()

248
ghaw_auditor/differ.py Normal file
View File

@@ -0,0 +1,248 @@
"""Diff functionality for comparing baselines."""
from __future__ import annotations
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Any
from ghaw_auditor.models import (
ActionDiff,
ActionManifest,
Baseline,
BaselineMeta,
DiffEntry,
WorkflowDiff,
WorkflowMeta,
)
logger = logging.getLogger(__name__)
class Differ:
"""Compares current state against baseline."""
def __init__(self, baseline_path: Path) -> None:
"""Initialize differ."""
self.baseline_path = baseline_path
def load_baseline(self) -> Baseline:
"""Load baseline from disk."""
actions_file = self.baseline_path / "actions.json"
workflows_file = self.baseline_path / "workflows.json"
meta_file = self.baseline_path / "meta.json"
if not actions_file.exists() or not workflows_file.exists():
raise FileNotFoundError(f"Baseline not found at {self.baseline_path}")
with open(actions_file, encoding="utf-8") as f:
actions_data = json.load(f)
with open(workflows_file, encoding="utf-8") as f:
workflows_data = json.load(f)
meta = BaselineMeta(auditor_version="1.0.0", commit_sha=None, timestamp=datetime.now())
if meta_file.exists():
with open(meta_file, encoding="utf-8") as f:
meta_data = json.load(f)
meta = BaselineMeta(**meta_data)
# Convert dicts to model instances
actions = {k: ActionManifest(**v) for k, v in actions_data.items()}
workflows = {k: WorkflowMeta(**v) for k, v in workflows_data.items()}
return Baseline(meta=meta, actions=actions, workflows=workflows)
def save_baseline(
self, workflows: dict[str, WorkflowMeta], actions: dict[str, ActionManifest], commit_sha: str | None = None
) -> None:
"""Save current state as baseline."""
self.baseline_path.mkdir(parents=True, exist_ok=True)
# Save actions
actions_data = {k: v.model_dump(mode="json") for k, v in actions.items()}
with open(self.baseline_path / "actions.json", "w", encoding="utf-8") as f:
json.dump(actions_data, f, indent=2, default=str)
# Save workflows
workflows_data = {k: v.model_dump(mode="json") for k, v in workflows.items()}
with open(self.baseline_path / "workflows.json", "w", encoding="utf-8") as f:
json.dump(workflows_data, f, indent=2, default=str)
# Save metadata
meta = BaselineMeta(auditor_version="1.0.0", commit_sha=commit_sha, timestamp=datetime.now())
with open(self.baseline_path / "meta.json", "w", encoding="utf-8") as f:
json.dump(meta.model_dump(mode="json"), f, indent=2, default=str)
logger.info(f"Baseline saved to {self.baseline_path}")
def diff_workflows(self, baseline: dict[str, WorkflowMeta], current: dict[str, WorkflowMeta]) -> list[WorkflowDiff]:
"""Compare workflows."""
diffs: list[WorkflowDiff] = []
all_paths = set(baseline.keys()) | set(current.keys())
for path in all_paths:
baseline_wf = baseline.get(path)
current_wf = current.get(path)
if not baseline_wf and current_wf:
# Added
diffs.append(WorkflowDiff(path=path, status="added", changes=[]))
elif baseline_wf and not current_wf:
# Removed
diffs.append(WorkflowDiff(path=path, status="removed", changes=[]))
elif baseline_wf and current_wf:
# Compare
changes = self._compare_workflows(baseline_wf, current_wf)
status = "modified" if changes else "unchanged"
diffs.append(WorkflowDiff(path=path, status=status, changes=changes))
return diffs
def _compare_workflows(self, old: WorkflowMeta, new: WorkflowMeta) -> list[DiffEntry]:
"""Compare two workflows."""
changes: list[DiffEntry] = []
# Compare triggers
if set(old.triggers) != set(new.triggers):
changes.append(
DiffEntry(field="triggers", old_value=old.triggers, new_value=new.triggers, change_type="modified")
)
# Compare permissions
if old.permissions != new.permissions:
changes.append(
DiffEntry(
field="permissions",
old_value=old.permissions.model_dump() if old.permissions else None,
new_value=new.permissions.model_dump() if new.permissions else None,
change_type="modified",
)
)
# Compare concurrency
if old.concurrency != new.concurrency:
changes.append(
DiffEntry(
field="concurrency", old_value=old.concurrency, new_value=new.concurrency, change_type="modified"
)
)
# Compare jobs
if set(old.jobs.keys()) != set(new.jobs.keys()):
changes.append(
DiffEntry(
field="jobs",
old_value=list(old.jobs.keys()),
new_value=list(new.jobs.keys()),
change_type="modified",
)
)
# Compare secrets
if old.secrets_used != new.secrets_used:
changes.append(
DiffEntry(
field="secrets_used",
old_value=sorted(old.secrets_used),
new_value=sorted(new.secrets_used),
change_type="modified",
)
)
return changes
def diff_actions(self, baseline: dict[str, ActionManifest], current: dict[str, ActionManifest]) -> list[ActionDiff]:
"""Compare actions."""
diffs: list[ActionDiff] = []
all_keys = set(baseline.keys()) | set(current.keys())
for key in all_keys:
baseline_action = baseline.get(key)
current_action = current.get(key)
if not baseline_action and current_action:
# Added
diffs.append(ActionDiff(key=key, status="added", changes=[]))
elif baseline_action and not current_action:
# Removed
diffs.append(ActionDiff(key=key, status="removed", changes=[]))
elif baseline_action and current_action:
# Compare (for now, just mark as unchanged)
diffs.append(ActionDiff(key=key, status="unchanged", changes=[]))
return diffs
def _write_workflow_changes(self, f: Any, workflow_diffs: list[WorkflowDiff]) -> None:
"""Write workflow changes section to markdown file."""
f.write("## Workflow Changes\n\n")
added_wfs = [d for d in workflow_diffs if d.status == "added"]
removed_wfs = [d for d in workflow_diffs if d.status == "removed"]
modified_wfs = [d for d in workflow_diffs if d.status == "modified"]
f.write(f"- **Added:** {len(added_wfs)}\n")
f.write(f"- **Removed:** {len(removed_wfs)}\n")
f.write(f"- **Modified:** {len(modified_wfs)}\n\n")
if added_wfs:
f.write("### Added Workflows\n\n")
for diff in added_wfs:
f.write(f"- `{diff.path}`\n")
f.write("\n")
if removed_wfs:
f.write("### Removed Workflows\n\n")
for diff in removed_wfs:
f.write(f"- `{diff.path}`\n")
f.write("\n")
if modified_wfs:
f.write("### Modified Workflows\n\n")
for diff in modified_wfs:
f.write(f"#### {diff.path}\n\n")
for change in diff.changes:
f.write(f"- **{change.field}** changed\n")
if change.old_value is not None:
f.write(f" - Old: `{change.old_value}`\n")
if change.new_value is not None:
f.write(f" - New: `{change.new_value}`\n")
f.write("\n")
def _write_action_changes(self, f: Any, action_diffs: list[ActionDiff]) -> None:
"""Write action changes section to markdown file."""
f.write("## Action Changes\n\n")
added_actions = [d for d in action_diffs if d.status == "added"]
removed_actions = [d for d in action_diffs if d.status == "removed"]
f.write(f"- **Added:** {len(added_actions)}\n")
f.write(f"- **Removed:** {len(removed_actions)}\n\n")
if added_actions:
f.write("### Added Actions\n\n")
for diff in added_actions:
f.write(f"- `{diff.key}`\n")
f.write("\n")
if removed_actions:
f.write("### Removed Actions\n\n")
for diff in removed_actions:
f.write(f"- `{diff.key}`\n")
def render_diff_markdown(
self, workflow_diffs: list[WorkflowDiff], action_diffs: list[ActionDiff], output_path: Path
) -> None:
"""Render diff as Markdown."""
with open(output_path, "w", encoding="utf-8") as f:
f.write("# Audit Diff Report\n\n")
f.write(f"**Generated:** {datetime.now().isoformat()}\n\n")
self._write_workflow_changes(f, workflow_diffs)
self._write_action_changes(f, action_diffs)
logger.info(f"Diff report written to {output_path}")

62
ghaw_auditor/factory.py Normal file
View File

@@ -0,0 +1,62 @@
"""Factory for creating audit services with dependency injection."""
from __future__ import annotations
from pathlib import Path
from ghaw_auditor.analyzer import Analyzer
from ghaw_auditor.cache import Cache
from ghaw_auditor.github_client import GitHubClient
from ghaw_auditor.models import Policy
from ghaw_auditor.parser import Parser
from ghaw_auditor.policy import PolicyValidator
from ghaw_auditor.resolver import Resolver
from ghaw_auditor.scanner import Scanner
from ghaw_auditor.services import AuditService
class AuditServiceFactory:
"""Factory for creating audit services with configured dependencies."""
@staticmethod
def create(
repo_path: Path,
token: str | None = None,
offline: bool = False,
cache_dir: Path | None = None,
concurrency: int = 4,
policy: Policy | None = None,
exclude_patterns: list[str] | None = None,
) -> AuditService:
"""Create configured audit service.
Args:
repo_path: Path to repository
token: GitHub API token
offline: Disable API calls
cache_dir: Cache directory path
concurrency: API concurrency level
policy: Policy configuration
exclude_patterns: File exclusion patterns
Returns:
Configured AuditService instance
"""
# Core components (always created)
scanner = Scanner(repo_path, exclude_patterns=exclude_patterns or [])
parser = Parser(repo_path)
analyzer = Analyzer()
cache = Cache(cache_dir)
# Optional resolver (only if not offline)
resolver = None
if not offline:
client = GitHubClient(token)
resolver = Resolver(client, cache, repo_path, concurrency)
# Optional validator (only if policy provided)
validator = None
if policy:
validator = PolicyValidator(policy)
return AuditService(scanner, parser, analyzer, resolver, validator)

View File

@@ -0,0 +1,128 @@
"""GitHub API client for resolving actions and refs."""
from __future__ import annotations
import logging
from typing import Any
import httpx
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential
logger = logging.getLogger(__name__)
# Suppress httpx INFO logging (we handle logging ourselves)
logging.getLogger("httpx").setLevel(logging.WARNING)
def should_retry_http_error(exception: BaseException) -> bool:
"""Determine if an HTTP error should be retried.
Retry on:
- Network errors (RequestError)
- Server errors (5xx)
- Rate limiting (429)
Don't retry on:
- 404 (not found - won't change on retry)
- 401/403 (auth errors - won't change on retry)
- 400 (bad request - won't change on retry)
"""
if isinstance(exception, httpx.RequestError):
# Network errors - retry
return True
if isinstance(exception, httpx.HTTPStatusError):
status_code = exception.response.status_code
# Retry on 5xx server errors and 429 rate limiting
# Don't retry on 4xx client errors (except 429)
if status_code == 429:
return True
return 500 <= status_code < 600
return False
class GitHubClient:
"""GitHub API client with rate limiting and retries."""
def __init__(self, token: str | None = None, base_url: str = "https://api.github.com") -> None:
"""Initialize GitHub client."""
self.base_url = base_url
self.headers = {
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
}
if token:
self.headers["Authorization"] = f"Bearer {token}"
self.client = httpx.Client(headers=self.headers, timeout=30.0, follow_redirects=True)
@retry(
retry=retry_if_exception(should_retry_http_error),
wait=wait_exponential(multiplier=1, min=2, max=30),
stop=stop_after_attempt(3),
)
def get_ref_sha(self, owner: str, repo: str, ref: str) -> str:
"""Resolve a ref (tag/branch) to a SHA."""
url = f"{self.base_url}/repos/{owner}/{repo}/commits/{ref}"
logger.debug(f"Fetching ref SHA: {owner}/{repo}@{ref}")
try:
response = self.client.get(url)
response.raise_for_status()
sha = response.json()["sha"]
logger.debug(f"Resolved {owner}/{repo}@{ref} -> {sha}")
return sha
except httpx.HTTPStatusError as e:
status_code = e.response.status_code
if status_code == 404:
logger.error(f"Action not found: {owner}/{repo}@{ref}")
elif status_code == 403:
logger.error(f"Access denied (check token permissions): {owner}/{repo}@{ref}")
elif status_code == 401:
logger.error(f"Authentication required: {owner}/{repo}@{ref}")
elif 400 <= status_code < 600:
logger.warning(f"HTTP {status_code} fetching {url}")
raise
@retry(
retry=retry_if_exception(should_retry_http_error),
wait=wait_exponential(multiplier=1, min=2, max=30),
stop=stop_after_attempt(3),
)
def get_file_content(self, owner: str, repo: str, path: str, ref: str) -> str:
"""Fetch raw file content at a specific ref."""
# Use raw.githubusercontent.com for files
raw_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{ref}/{path}"
logger.debug(f"Fetching file: {owner}/{repo}/{path}@{ref}")
try:
response = self.client.get(raw_url)
response.raise_for_status()
content = response.text
logger.debug(f"Downloaded {path} ({len(content)} bytes)")
return content
except httpx.HTTPStatusError as e:
status_code = e.response.status_code
# Don't log 404 as warning - it's expected when trying action.yml before action.yaml
if status_code == 404:
logger.debug(f"File not found: {path}")
elif status_code == 403:
logger.error(f"Access denied (check token permissions): {owner}/{repo}/{path}")
elif status_code == 401:
logger.error(f"Authentication required: {owner}/{repo}/{path}")
elif 400 <= status_code < 600:
logger.warning(f"HTTP {status_code} fetching {raw_url}")
raise
def close(self) -> None:
"""Close the HTTP client."""
self.client.close()
def __enter__(self) -> GitHubClient:
"""Context manager entry."""
return self
def __exit__(self, *args: Any) -> None:
"""Context manager exit."""
self.close()

254
ghaw_auditor/models.py Normal file
View File

@@ -0,0 +1,254 @@
"""Pydantic models for GitHub Actions and Workflows."""
from __future__ import annotations
from datetime import datetime
from enum import Enum
from typing import Any
from pydantic import BaseModel, Field
class ActionType(str, Enum):
"""Type of action reference."""
LOCAL = "local"
GITHUB = "github"
DOCKER = "docker"
REUSABLE_WORKFLOW = "reusable_workflow"
class ActionRef(BaseModel):
"""Reference to an action with version info."""
type: ActionType
owner: str | None = None
repo: str | None = None
path: str | None = None
ref: str | None = None # Tag, branch, or SHA
resolved_sha: str | None = None
source_file: str
source_line: int | None = None
def canonical_key(self) -> str:
"""Generate unique key for deduplication."""
if self.type == ActionType.LOCAL:
return f"local:{self.path}"
elif self.type == ActionType.DOCKER:
return f"docker:{self.path}"
elif self.type == ActionType.REUSABLE_WORKFLOW:
return f"{self.owner}/{self.repo}/{self.path}@{self.resolved_sha or self.ref}"
return f"{self.owner}/{self.repo}@{self.resolved_sha or self.ref}"
class ActionInput(BaseModel):
"""Action input definition."""
name: str
description: str | None = None
required: bool = False
default: str | bool | int | None = None
class ActionOutput(BaseModel):
"""Action output definition."""
name: str
description: str | None = None
class ActionManifest(BaseModel):
"""Parsed action.yml manifest."""
name: str
description: str | None = None
author: str | None = None
inputs: dict[str, ActionInput] = Field(default_factory=dict)
outputs: dict[str, ActionOutput] = Field(default_factory=dict)
runs: dict[str, Any] = Field(default_factory=dict)
branding: dict[str, str] | None = None
is_composite: bool = False
is_docker: bool = False
is_javascript: bool = False
class PermissionLevel(str, Enum):
"""Permission level."""
NONE = "none"
READ = "read"
WRITE = "write"
class Permissions(BaseModel):
"""Job or workflow permissions."""
actions: PermissionLevel | None = None
checks: PermissionLevel | None = None
contents: PermissionLevel | None = None
deployments: PermissionLevel | None = None
id_token: PermissionLevel | None = None
issues: PermissionLevel | None = None
packages: PermissionLevel | None = None
pages: PermissionLevel | None = None
pull_requests: PermissionLevel | None = None
repository_projects: PermissionLevel | None = None
security_events: PermissionLevel | None = None
statuses: PermissionLevel | None = None
class Container(BaseModel):
"""Container configuration."""
image: str
credentials: dict[str, str] | None = None
env: dict[str, str | int | float | bool] = Field(default_factory=dict)
ports: list[int] = Field(default_factory=list)
volumes: list[str] = Field(default_factory=list)
options: str | None = None
class Service(BaseModel):
"""Service container configuration."""
name: str
image: str
credentials: dict[str, str] | None = None
env: dict[str, str | int | float | bool] = Field(default_factory=dict)
ports: list[int] = Field(default_factory=list)
volumes: list[str] = Field(default_factory=list)
options: str | None = None
class Strategy(BaseModel):
"""Job matrix strategy."""
matrix: dict[str, Any] = Field(default_factory=dict)
fail_fast: bool = True
max_parallel: int | None = None
class JobMeta(BaseModel):
"""Job metadata."""
name: str
runs_on: str | list[str]
needs: list[str] = Field(default_factory=list)
if_condition: str | None = Field(None, alias="if")
permissions: Permissions | None = None
environment: str | dict[str, Any] | None = None
concurrency: str | dict[str, Any] | None = None
timeout_minutes: int | None = None
continue_on_error: bool = False
container: Container | None = None
services: dict[str, Service] = Field(default_factory=dict)
strategy: Strategy | None = None
# Reusable workflow fields
uses: str | None = None # Reusable workflow reference
with_inputs: dict[str, Any] = Field(default_factory=dict) # Inputs via 'with'
secrets_passed: dict[str, str] | None = None # Secrets passed to reusable workflow
inherit_secrets: bool = False # Whether secrets: inherit is used
outputs: dict[str, Any] = Field(default_factory=dict) # Job outputs
# Action tracking
actions_used: list[ActionRef] = Field(default_factory=list)
secrets_used: set[str] = Field(default_factory=set)
env_vars: dict[str, str | int | float | bool] = Field(default_factory=dict)
class ReusableContract(BaseModel):
"""Reusable workflow contract."""
inputs: dict[str, Any] = Field(default_factory=dict)
outputs: dict[str, Any] = Field(default_factory=dict)
secrets: dict[str, Any] = Field(default_factory=dict)
class WorkflowMeta(BaseModel):
"""Workflow metadata."""
name: str
path: str
triggers: list[str] = Field(default_factory=list)
permissions: Permissions | None = None
concurrency: str | dict[str, Any] | None = None
env: dict[str, str | int | float | bool] = Field(default_factory=dict)
defaults: dict[str, Any] = Field(default_factory=dict)
jobs: dict[str, JobMeta] = Field(default_factory=dict)
is_reusable: bool = False
reusable_contract: ReusableContract | None = None
secrets_used: set[str] = Field(default_factory=set)
actions_used: list[ActionRef] = Field(default_factory=list)
class PolicyRule(BaseModel):
"""Policy rule."""
name: str
enabled: bool = True
severity: str = "warning" # warning, error
config: dict[str, Any] = Field(default_factory=dict)
class Policy(BaseModel):
"""Audit policy configuration."""
min_permissions: bool = True
require_pinned_actions: bool = True
forbid_branch_refs: bool = False
allowed_actions: list[str] = Field(default_factory=list)
denied_actions: list[str] = Field(default_factory=list)
require_concurrency_on_pr: bool = False
custom_rules: list[PolicyRule] = Field(default_factory=list)
class BaselineMeta(BaseModel):
"""Baseline metadata."""
auditor_version: str
commit_sha: str | None = None
timestamp: datetime
schema_version: str = "1.0"
class Baseline(BaseModel):
"""Baseline snapshot for diff mode."""
meta: BaselineMeta
actions: dict[str, ActionManifest]
workflows: dict[str, WorkflowMeta]
class DiffEntry(BaseModel):
"""Single diff entry."""
field: str
old_value: Any = None
new_value: Any = None
change_type: str # added, removed, modified
class ActionDiff(BaseModel):
"""Action diff."""
key: str
status: str # added, removed, modified, unchanged
changes: list[DiffEntry] = Field(default_factory=list)
class WorkflowDiff(BaseModel):
"""Workflow diff."""
path: str
status: str # added, removed, modified, unchanged
changes: list[DiffEntry] = Field(default_factory=list)
class AuditReport(BaseModel):
"""Complete audit report."""
generated_at: datetime
repository: str
commit_sha: str | None = None
actions: dict[str, ActionManifest]
workflows: dict[str, WorkflowMeta]
policy_violations: list[dict[str, Any]] = Field(default_factory=list)

373
ghaw_auditor/parser.py Normal file
View File

@@ -0,0 +1,373 @@
"""YAML parser for workflow and action files."""
from __future__ import annotations
import logging
import re
from pathlib import Path
from typing import Any
from ruamel.yaml import YAML
from ghaw_auditor.models import (
ActionInput,
ActionManifest,
ActionOutput,
ActionRef,
ActionType,
Container,
JobMeta,
PermissionLevel,
Permissions,
ReusableContract,
Service,
Strategy,
WorkflowMeta,
)
logger = logging.getLogger(__name__)
class Parser:
"""Parse workflow and action YAML files."""
def __init__(self, repo_path: Path | None = None) -> None:
"""Initialize parser."""
self.yaml = YAML(typ="safe")
self.repo_path = repo_path or Path.cwd()
def parse_workflow(self, path: Path) -> WorkflowMeta:
"""Parse a workflow file."""
with open(path, encoding="utf-8") as f:
content = f.read()
data = self.yaml.load(content)
if not data:
raise ValueError(f"Empty workflow file: {path}")
name = data.get("name", path.stem)
triggers = self._extract_triggers(data.get("on", {}))
permissions = self._parse_permissions(data.get("permissions"))
env = data.get("env", {})
concurrency = data.get("concurrency")
defaults = data.get("defaults", {})
# Check if reusable workflow
is_reusable = "workflow_call" in triggers
reusable_contract = None
if is_reusable:
on_data = data.get("on", {})
if isinstance(on_data, dict) and "workflow_call" in on_data:
call_data = on_data["workflow_call"]
if call_data is not None:
reusable_contract = ReusableContract(
inputs=call_data.get("inputs", {}),
outputs=call_data.get("outputs", {}),
secrets=call_data.get("secrets", {}),
)
# Parse jobs
jobs = {}
secrets_used: set[str] = set()
actions_used: list[ActionRef] = []
jobs_data = data.get("jobs")
if jobs_data:
for job_name, job_data in jobs_data.items():
job_meta = self._parse_job(job_name, job_data, path, content)
jobs[job_name] = job_meta
secrets_used.update(job_meta.secrets_used)
actions_used.extend(job_meta.actions_used)
return WorkflowMeta(
name=name,
path=str(path.relative_to(self.repo_path)),
triggers=triggers,
permissions=permissions,
concurrency=concurrency,
env=env,
defaults=defaults,
jobs=jobs,
is_reusable=is_reusable,
reusable_contract=reusable_contract,
secrets_used=secrets_used,
actions_used=actions_used,
)
def _extract_triggers(self, on_data: Any) -> list[str]:
"""Extract trigger events from 'on' field."""
if isinstance(on_data, str):
return [on_data]
elif isinstance(on_data, list):
return on_data
elif isinstance(on_data, dict):
return list(on_data.keys())
return []
def _parse_permissions(self, perms: Any) -> Permissions | None:
"""Parse permissions."""
if perms is None:
return None
if isinstance(perms, str):
# Global read-all or write-all
return Permissions()
if isinstance(perms, dict):
return Permissions(**{k: PermissionLevel(v) for k, v in perms.items() if v})
return None
def _parse_job(self, name: str, data: dict[str, Any] | None, path: Path, content: str) -> JobMeta:
"""Parse a job."""
if data is None:
data = {}
# Check if this is a reusable workflow call
uses = data.get("uses")
is_reusable_call = uses is not None
# runs-on is optional for reusable workflow calls
runs_on = data.get("runs-on", "ubuntu-latest" if not is_reusable_call else "")
needs = data.get("needs", [])
if isinstance(needs, str):
needs = [needs]
permissions = self._parse_permissions(data.get("permissions"))
environment = data.get("environment")
concurrency = data.get("concurrency")
timeout_minutes = data.get("timeout-minutes")
continue_on_error = data.get("continue-on-error", False)
container = self._parse_container(data.get("container"))
services = self._parse_services(data.get("services", {}))
strategy = self._parse_strategy(data.get("strategy"))
# Reusable workflow fields
with_inputs = data.get("with", {})
outputs = data.get("outputs", {})
# Parse secrets for reusable workflows
secrets_passed = None
inherit_secrets = False
secrets_data = data.get("secrets")
if secrets_data == "inherit":
inherit_secrets = True
elif isinstance(secrets_data, dict):
secrets_passed = secrets_data
# Extract actions from steps or reusable workflow
actions_used: list[ActionRef] = []
secrets_used: set[str] = set()
if is_reusable_call:
# Parse reusable workflow reference
workflow_ref = self._parse_reusable_workflow_ref(uses, path)
actions_used.append(workflow_ref)
else:
# Parse actions from steps
for step in data.get("steps", []):
if step is None:
continue
if "uses" in step:
action_ref = self._parse_action_ref(step["uses"], path)
actions_used.append(action_ref)
# Extract secrets from entire job content
secrets_used.update(self._extract_secrets(str(data)))
job_data = {
"name": name,
"runs_on": runs_on,
"needs": needs,
"permissions": permissions,
"environment": environment,
"concurrency": concurrency,
"timeout_minutes": timeout_minutes,
"continue_on_error": continue_on_error,
"container": container,
"services": services,
"strategy": strategy,
"uses": uses,
"with_inputs": with_inputs,
"secrets_passed": secrets_passed,
"inherit_secrets": inherit_secrets,
"outputs": outputs,
"actions_used": actions_used,
"secrets_used": secrets_used,
"env_vars": data.get("env", {}),
}
# Use alias for 'if' field
if data.get("if") is not None:
job_data["if"] = data.get("if")
return JobMeta(**job_data)
def _parse_action_ref(self, uses: str, source_file: Path) -> ActionRef:
"""Parse a 'uses' string into ActionRef."""
uses = uses.strip()
# Local action: ./path or ./.github/actions/name
if uses.startswith("./"):
return ActionRef(
type=ActionType.LOCAL,
path=uses,
source_file=str(source_file),
)
# Docker action: docker://
if uses.startswith("docker://"):
return ActionRef(
type=ActionType.DOCKER,
path=uses,
source_file=str(source_file),
)
# GitHub action: owner/repo@ref or owner/repo/path@ref
match = re.match(r"^([^/]+)/([^/@]+)(?:/([^@]+))?@(.+)$", uses)
if match:
owner, repo, path, ref = match.groups()
return ActionRef(
type=ActionType.GITHUB,
owner=owner,
repo=repo,
path=path or "action.yml",
ref=ref,
source_file=str(source_file),
)
raise ValueError(f"Invalid action reference: {uses}")
def _parse_reusable_workflow_ref(self, uses: str, source_file: Path) -> ActionRef:
"""Parse a reusable workflow 'uses' string into ActionRef.
Format: owner/repo/.github/workflows/workflow.yml@ref
or: ./.github/workflows/workflow.yml (local)
"""
uses = uses.strip()
# Local reusable workflow
if uses.startswith("./"):
return ActionRef(
type=ActionType.REUSABLE_WORKFLOW,
path=uses,
source_file=str(source_file),
)
# GitHub reusable workflow: owner/repo/path/to/workflow.yml@ref
match = re.match(r"^([^/]+)/([^/@]+)/(.+\.ya?ml)@(.+)$", uses)
if match:
owner, repo, path, ref = match.groups()
return ActionRef(
type=ActionType.REUSABLE_WORKFLOW,
owner=owner,
repo=repo,
path=path,
ref=ref,
source_file=str(source_file),
)
raise ValueError(f"Invalid reusable workflow reference: {uses}")
def _parse_container(self, data: Any) -> Container | None:
"""Parse container configuration."""
if data is None:
return None
if isinstance(data, str):
return Container(image=data)
return Container(
image=data.get("image", ""),
credentials=data.get("credentials"),
env=data.get("env", {}),
ports=data.get("ports", []),
volumes=data.get("volumes", []),
options=data.get("options"),
)
def _parse_services(self, data: dict[str, Any] | None) -> dict[str, Service]:
"""Parse services."""
if data is None:
return {}
services = {}
for name, svc_data in data.items():
if isinstance(svc_data, str):
services[name] = Service(name=name, image=svc_data)
else:
services[name] = Service(
name=name,
image=svc_data.get("image", ""),
credentials=svc_data.get("credentials"),
env=svc_data.get("env", {}),
ports=svc_data.get("ports", []),
volumes=svc_data.get("volumes", []),
options=svc_data.get("options"),
)
return services
def _parse_strategy(self, data: Any) -> Strategy | None:
"""Parse strategy."""
if data is None:
return None
return Strategy(
matrix=data.get("matrix", {}),
fail_fast=data.get("fail-fast", True),
max_parallel=data.get("max-parallel"),
)
def _extract_secrets(self, content: str) -> set[str]:
"""Extract secret references from content."""
secrets = set()
# Match ${{ secrets.NAME }}
pattern = r"\$\{\{\s*secrets\.(\w+)\s*\}\}"
for match in re.finditer(pattern, content):
secrets.add(match.group(1))
return secrets
def parse_action(self, path: Path) -> ActionManifest:
"""Parse an action.yml file."""
with open(path, encoding="utf-8") as f:
data = self.yaml.load(f)
if not data:
raise ValueError(f"Empty action file: {path}")
name = data.get("name", path.parent.name)
description = data.get("description")
author = data.get("author")
# Parse inputs
inputs = {}
for input_name, input_data in data.get("inputs", {}).items():
if isinstance(input_data, dict):
inputs[input_name] = ActionInput(
name=input_name,
description=input_data.get("description"),
required=input_data.get("required", False),
default=input_data.get("default"),
)
# Parse outputs
outputs = {}
for output_name, output_data in data.get("outputs", {}).items():
if isinstance(output_data, dict):
outputs[output_name] = ActionOutput(
name=output_name,
description=output_data.get("description"),
)
# Parse runs
runs = data.get("runs", {})
is_composite = runs.get("using") == "composite"
is_docker = runs.get("using") in ("docker", "Dockerfile")
is_javascript = runs.get("using", "").startswith("node")
return ActionManifest(
name=name,
description=description,
author=author,
inputs=inputs,
outputs=outputs,
runs=runs,
branding=data.get("branding"),
is_composite=is_composite,
is_docker=is_docker,
is_javascript=is_javascript,
)

163
ghaw_auditor/policy.py Normal file
View File

@@ -0,0 +1,163 @@
"""Policy validator for workflows and actions."""
from __future__ import annotations
import logging
import re
from typing import Any
from ghaw_auditor.models import ActionRef, ActionType, Policy, WorkflowMeta
logger = logging.getLogger(__name__)
class PolicyValidator:
"""Validates workflows against policy rules."""
def __init__(self, policy: Policy) -> None:
"""Initialize validator."""
self.policy = policy
def validate(self, workflows: dict[str, WorkflowMeta], actions: list[ActionRef]) -> list[dict[str, Any]]:
"""Validate workflows and actions against policy."""
violations: list[dict[str, Any]] = []
for workflow_path, workflow in workflows.items():
violations.extend(self._validate_workflow(workflow_path, workflow, actions))
return violations
def _validate_workflow(
self, workflow_path: str, workflow: WorkflowMeta, actions: list[ActionRef]
) -> list[dict[str, Any]]:
"""Validate a single workflow."""
violations: list[dict[str, Any]] = []
# Check pinned actions
if self.policy.require_pinned_actions:
violations.extend(self._check_pinned_actions(workflow_path, workflow))
# Check branch refs
if self.policy.forbid_branch_refs:
violations.extend(self._check_branch_refs(workflow_path, workflow))
# Check allowed/denied actions
violations.extend(self._check_action_allowlist(workflow_path, workflow))
# Check concurrency on PR
if self.policy.require_concurrency_on_pr:
violations.extend(self._check_pr_concurrency(workflow_path, workflow))
return violations
def _check_pinned_actions(self, workflow_path: str, workflow: WorkflowMeta) -> list[dict[str, Any]]:
"""Check if actions are pinned to SHA."""
violations: list[dict[str, Any]] = []
# Check all actions in workflow
all_actions = workflow.actions_used[:]
for job in workflow.jobs.values():
all_actions.extend(job.actions_used)
for action in all_actions:
# Check if ref is a SHA (40 hex chars)
if action.type == ActionType.GITHUB and action.ref and not re.match(r"^[a-f0-9]{40}$", action.ref):
violations.append(
{
"workflow": workflow_path,
"rule": "require_pinned_actions",
"severity": "error",
"message": f"Action {action.owner}/{action.repo} is not pinned to SHA: {action.ref}",
}
)
return violations
def _check_branch_refs(self, workflow_path: str, workflow: WorkflowMeta) -> list[dict[str, Any]]:
"""Check for branch refs in actions."""
violations: list[dict[str, Any]] = []
# Check all actions in workflow
all_actions = workflow.actions_used[:]
for job in workflow.jobs.values():
all_actions.extend(job.actions_used)
for action in all_actions:
# Common branch names
if action.type == ActionType.GITHUB and action.ref and action.ref in ("main", "master", "develop", "dev"):
violations.append(
{
"workflow": workflow_path,
"rule": "forbid_branch_refs",
"severity": "error",
"message": f"Action {action.owner}/{action.repo} uses branch ref: {action.ref}",
}
)
return violations
def _check_action_allowlist(self, workflow_path: str, workflow: WorkflowMeta) -> list[dict[str, Any]]:
"""Check allowed/denied actions."""
violations: list[dict[str, Any]] = []
# Check all actions in workflow
all_actions = workflow.actions_used[:]
for job in workflow.jobs.values():
all_actions.extend(job.actions_used)
for action in all_actions:
if action.type == ActionType.GITHUB:
action_id = f"{action.owner}/{action.repo}"
# Check denied list
if self.policy.denied_actions:
for denied in self.policy.denied_actions:
if self._matches_pattern(action_id, denied):
violations.append(
{
"workflow": workflow_path,
"rule": "denied_actions",
"severity": "error",
"message": f"Action {action_id} is denied by policy",
}
)
# Check allowed list (if specified)
if self.policy.allowed_actions:
allowed = any(self._matches_pattern(action_id, pattern) for pattern in self.policy.allowed_actions)
if not allowed:
violations.append(
{
"workflow": workflow_path,
"rule": "allowed_actions",
"severity": "error",
"message": f"Action {action_id} is not in allowed list",
}
)
return violations
def _check_pr_concurrency(self, workflow_path: str, workflow: WorkflowMeta) -> list[dict[str, Any]]:
"""Check if PR workflows have concurrency set."""
violations: list[dict[str, Any]] = []
# Check if workflow is triggered by PR
pr_triggers = {"pull_request", "pull_request_target"}
has_pr_trigger = any(t in pr_triggers for t in workflow.triggers)
if has_pr_trigger and not workflow.concurrency:
violations.append(
{
"workflow": workflow_path,
"rule": "require_concurrency_on_pr",
"severity": "warning",
"message": "PR workflow should have concurrency group to prevent resource waste",
}
)
return violations
def _matches_pattern(self, action_id: str, pattern: str) -> bool:
"""Check if action ID matches pattern (supports wildcards)."""
regex_pattern = pattern.replace("*", ".*")
return bool(re.match(f"^{regex_pattern}$", action_id))

268
ghaw_auditor/renderer.py Normal file
View File

@@ -0,0 +1,268 @@
"""Renderers for JSON and Markdown reports."""
from __future__ import annotations
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Any
from ghaw_auditor.models import ActionManifest, ActionRef, ActionType, WorkflowMeta
logger = logging.getLogger(__name__)
class Renderer:
"""Renders audit reports in various formats."""
def __init__(self, output_dir: Path) -> None:
"""Initialize renderer."""
self.output_dir = output_dir
self.output_dir.mkdir(parents=True, exist_ok=True)
@staticmethod
def _create_action_anchor(key: str) -> str:
"""Create markdown-compatible anchor ID from action key.
Examples:
"actions/checkout@abc123" -> "actions-checkout"
"local:./sync-labels" -> "local-sync-labels"
"docker://alpine:3.8" -> "docker-alpine-3-8"
"""
# For GitHub actions, remove the @ref/SHA part
if "@" in key and not key.startswith("docker://"):
key = key.split("@")[0]
# Replace special characters with dashes
anchor = key.replace("/", "-").replace(":", "-").replace(".", "-")
# Clean up multiple consecutive dashes
while "--" in anchor:
anchor = anchor.replace("--", "-")
return anchor.lower().strip("-")
@staticmethod
def _get_action_repo_url(action_ref: ActionRef) -> str | None:
"""Get GitHub repository URL for an action.
Returns:
URL string for GitHub actions, None for local/docker actions
"""
if action_ref.type == ActionType.GITHUB and action_ref.owner and action_ref.repo:
return f"https://github.com/{action_ref.owner}/{action_ref.repo}"
return None
def render_json(
self, workflows: dict[str, WorkflowMeta], actions: dict[str, ActionManifest], violations: list[dict[str, Any]]
) -> None:
"""Render JSON reports."""
# Write workflows
workflows_data = {k: v.model_dump(mode="json") for k, v in workflows.items()}
workflows_file = self.output_dir / "workflows.json"
with open(workflows_file, "w", encoding="utf-8") as f:
json.dump(workflows_data, f, indent=2, default=str)
# Write actions
actions_data = {k: v.model_dump(mode="json") for k, v in actions.items()}
actions_file = self.output_dir / "actions.json"
with open(actions_file, "w", encoding="utf-8") as f:
json.dump(actions_data, f, indent=2, default=str)
# Write violations
violations_file = self.output_dir / "violations.json"
with open(violations_file, "w", encoding="utf-8") as f:
json.dump(violations, f, indent=2)
logger.info(f"JSON reports written to {self.output_dir}")
def _write_summary(
self,
f: Any,
workflows: dict[str, WorkflowMeta],
actions: dict[str, ActionManifest],
violations: list[dict[str, Any]],
) -> None:
"""Write summary section to markdown file."""
f.write("## Summary\n\n")
f.write(f"- **Workflows:** {len(workflows)}\n")
f.write(f"- **Actions:** {len(actions)}\n")
f.write(f"- **Policy Violations:** {len(violations)}\n\n")
def _write_analysis(self, f: Any, analysis: dict[str, Any]) -> None:
"""Write analysis section to markdown file."""
if not analysis:
return
f.write("## Analysis\n\n")
f.write(f"- **Total Jobs:** {analysis.get('total_jobs', 0)}\n")
f.write(f"- **Reusable Workflows:** {analysis.get('reusable_workflows', 0)}\n")
if "triggers" in analysis:
f.write("\n### Triggers\n\n")
for trigger, count in sorted(analysis["triggers"].items()):
f.write(f"- `{trigger}`: {count}\n")
f.write("\n")
if "runners" in analysis:
f.write("\n### Runners\n\n")
for runner, count in sorted(analysis["runners"].items()):
f.write(f"- `{runner}`: {count}\n")
f.write("\n")
if "secrets" in analysis:
f.write("\n### Secrets\n\n")
f.write(f"Total unique secrets: {analysis['secrets'].get('total_unique_secrets', 0)}\n\n")
secrets = analysis["secrets"].get("secrets", [])
if secrets:
for secret in sorted(secrets):
f.write(f"- `{secret}`\n")
f.write("\n")
def _write_job_details(self, f: Any, job_name: str, job: Any) -> None:
"""Write job details to markdown file."""
f.write(f"- **{job_name}**\n")
f.write(f" - Runner: `{job.runs_on}`\n")
if job.permissions:
active_perms = {k: v for k, v in job.permissions.model_dump(mode="json").items() if v is not None}
if active_perms:
f.write(" - Permissions:\n")
for perm_name, perm_level in sorted(active_perms.items()):
display_name = perm_name.replace("_", "-")
f.write(f" - `{display_name}`: {perm_level}\n")
if job.actions_used:
f.write(" - Actions used:\n")
for action_ref in job.actions_used:
action_key = action_ref.canonical_key()
anchor = self._create_action_anchor(action_key)
if action_ref.type == ActionType.GITHUB:
type_label = "GitHub"
display_name = f"{action_ref.owner}/{action_ref.repo}"
elif action_ref.type == ActionType.LOCAL:
type_label = "Local"
display_name = action_ref.path or "local"
elif action_ref.type == ActionType.DOCKER:
type_label = "Docker"
display_name = action_ref.path or action_key
else:
type_label = "Reusable Workflow"
display_name = action_ref.path or action_key
f.write(f" - [{display_name}](#{anchor}) ({type_label})\n")
def _write_workflows(self, f: Any, workflows: dict[str, WorkflowMeta]) -> None:
"""Write workflows section to markdown file."""
f.write("\n## Workflows\n\n")
for path, workflow in sorted(workflows.items()):
f.write(f"### {workflow.name}\n\n")
f.write(f"**Path:** `{path}`\n\n")
f.write(f"**Triggers:** {', '.join(f'`{t}`' for t in workflow.triggers)}\n\n")
f.write(f"**Jobs:** {len(workflow.jobs)}\n\n")
if workflow.jobs:
f.write("#### Jobs\n\n")
for job_name, job in workflow.jobs.items():
self._write_job_details(f, job_name, job)
f.write("\n")
def _write_action_header(
self, f: Any, key: str, action: ActionManifest, action_ref_map: dict[str, ActionRef]
) -> None:
"""Write action header with key and repository info."""
anchor = self._create_action_anchor(key)
f.write(f'### <a id="{anchor}"></a>{action.name}\n\n')
f.write(f"**Key:** `{key}`\n\n")
if key in action_ref_map:
repo_url = self._get_action_repo_url(action_ref_map[key])
if repo_url:
f.write(f"**Repository:** [{action_ref_map[key].owner}/{action_ref_map[key].repo}]({repo_url})\n\n")
elif action_ref_map[key].type == ActionType.LOCAL:
f.write("**Type:** Local Action\n\n")
if action.description:
f.write(f"{action.description}\n\n")
def _write_workflows_using_action(self, f: Any, key: str, workflows: dict[str, WorkflowMeta]) -> None:
"""Write section showing workflows that use this action."""
workflows_using_action = []
for workflow_path, workflow in workflows.items():
for action_ref in workflow.actions_used:
if action_ref.canonical_key() == key:
workflows_using_action.append((workflow_path, workflow.name))
break
if workflows_using_action:
f.write("<details>\n")
f.write("<summary><b>Used in Workflows</b></summary>\n\n")
for workflow_path, workflow_name in sorted(workflows_using_action):
workflow_anchor = workflow_name.lower().replace(" ", "-").replace(".", "-")
f.write(f"- [{workflow_name}](#{workflow_anchor}) (`{workflow_path}`)\n")
f.write("\n</details>\n\n")
def _write_action_inputs(self, f: Any, action: ActionManifest) -> None:
"""Write action inputs section."""
if action.inputs:
f.write("<details>\n")
f.write("<summary><b>Inputs</b></summary>\n\n")
for inp in action.inputs.values():
req = "required" if inp.required else "optional"
f.write(f"- `{inp.name}` ({req}): {inp.description or 'No description'}\n")
f.write("\n</details>\n\n")
else:
f.write("\n")
def _write_actions_inventory(
self, f: Any, workflows: dict[str, WorkflowMeta], actions: dict[str, ActionManifest]
) -> None:
"""Write actions inventory section to markdown file."""
f.write("\n## Actions Inventory\n\n")
# Build mapping of action keys to ActionRef for repo URLs
action_ref_map: dict[str, ActionRef] = {}
for workflow in workflows.values():
for action_ref in workflow.actions_used:
key = action_ref.canonical_key()
if key not in action_ref_map:
action_ref_map[key] = action_ref
for key, action in sorted(actions.items()):
self._write_action_header(f, key, action, action_ref_map)
self._write_workflows_using_action(f, key, workflows)
self._write_action_inputs(f, action)
def _write_violations(self, f: Any, violations: list[dict[str, Any]]) -> None:
"""Write violations section to markdown file."""
if not violations:
return
f.write("\n## Policy Violations\n\n")
for violation in violations:
severity = violation.get("severity", "warning").upper()
f.write(f"### [{severity}] {violation['rule']}\n\n")
f.write(f"**Workflow:** `{violation['workflow']}`\n\n")
f.write(f"{violation['message']}\n\n")
def render_markdown(
self,
workflows: dict[str, WorkflowMeta],
actions: dict[str, ActionManifest],
violations: list[dict[str, Any]],
analysis: dict[str, Any],
) -> None:
"""Render Markdown report."""
report_file = self.output_dir / "report.md"
with open(report_file, "w", encoding="utf-8") as f:
f.write("# GitHub Actions & Workflows Audit Report\n\n")
f.write(f"**Generated:** {datetime.now().isoformat()}\n\n")
self._write_summary(f, workflows, actions, violations)
self._write_analysis(f, analysis)
self._write_workflows(f, workflows)
self._write_actions_inventory(f, workflows, actions)
self._write_violations(f, violations)
logger.info(f"Markdown report written to {report_file}")

164
ghaw_auditor/resolver.py Normal file
View File

@@ -0,0 +1,164 @@
"""Action resolver for GitHub actions."""
from __future__ import annotations
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from ghaw_auditor.cache import Cache
from ghaw_auditor.github_client import GitHubClient
from ghaw_auditor.models import ActionManifest, ActionRef, ActionType
from ghaw_auditor.parser import Parser
logger = logging.getLogger(__name__)
class Resolver:
"""Resolves action references and fetches manifests."""
def __init__(
self,
github_client: GitHubClient,
cache: Cache,
repo_path: Path,
concurrency: int = 4,
) -> None:
"""Initialize resolver."""
self.github_client = github_client
self.cache = cache
self.parser = Parser(repo_path)
self.repo_path = repo_path
self.concurrency = concurrency
def resolve_actions(self, actions: list[ActionRef]) -> dict[str, ActionManifest]:
"""Resolve multiple action references in parallel."""
resolved: dict[str, ActionManifest] = {}
with ThreadPoolExecutor(max_workers=self.concurrency) as executor:
futures = {executor.submit(self._resolve_action, action): action for action in actions}
for future in as_completed(futures):
action = futures[future]
try:
key, manifest = future.result()
if key and manifest:
resolved[key] = manifest
except Exception as e:
logger.error(f"Failed to resolve {action.canonical_key()}: {e}")
return resolved
def _resolve_action(self, action: ActionRef) -> tuple[str, ActionManifest | None]:
"""Resolve a single action reference."""
if action.type == ActionType.LOCAL:
return self._resolve_local_action(action)
elif action.type == ActionType.GITHUB:
return self._resolve_github_action(action)
elif action.type == ActionType.DOCKER:
# Docker actions don't have manifests to parse
return action.canonical_key(), None
return "", None
def _resolve_local_action(self, action: ActionRef) -> tuple[str, ActionManifest | None]:
"""Resolve a local action."""
if not action.path:
return "", None
# Remove leading ./ prefix only
clean_path = action.path[2:] if action.path.startswith("./") else action.path
action_path = self.repo_path / clean_path
# If action_path is a directory, look for action.yml/yaml inside
# If it's a file path, look in parent directory
if action_path.is_dir():
for name in ("action.yml", "action.yaml"):
manifest_path = action_path / name
if manifest_path.exists():
try:
manifest = self.parser.parse_action(manifest_path)
return action.canonical_key(), manifest
except Exception as e:
logger.error(f"Failed to parse local action {manifest_path}: {e}")
continue
else:
# Try as parent directory
parent = action_path.parent if action_path.name.startswith("action.") else action_path
for name in ("action.yml", "action.yaml"):
manifest_path = parent / name
if manifest_path.exists():
try:
manifest = self.parser.parse_action(manifest_path)
return action.canonical_key(), manifest
except Exception as e:
logger.error(f"Failed to parse local action {manifest_path}: {e}")
continue
logger.warning(f"Local action manifest not found: {action_path}")
return "", None
def _resolve_github_action(self, action: ActionRef) -> tuple[str, ActionManifest | None]:
"""Resolve a GitHub action."""
if not action.owner or not action.repo or not action.ref:
return "", None
# Resolve ref to SHA
cache_key = self.cache.make_key("ref", action.owner, action.repo, action.ref)
sha = self.cache.get(cache_key)
if not sha:
try:
sha = self.github_client.get_ref_sha(action.owner, action.repo, action.ref)
self.cache.set(cache_key, sha)
except Exception as e:
logger.error(f"Failed to resolve ref {action.owner}/{action.repo}@{action.ref}: {e}")
return "", None
action.resolved_sha = sha
# Fetch action manifest
manifest_path = action.path if action.path and action.path != "action.yml" else ""
manifest_key = self.cache.make_key("manifest", action.owner, action.repo, sha, manifest_path)
manifest_content = self.cache.get(manifest_key)
if not manifest_content:
# Try action.yml first, then action.yaml
base_path = f"{manifest_path}/" if manifest_path else ""
for name in ("action.yml", "action.yaml"):
file_path = f"{base_path}{name}"
try:
manifest_content = self.github_client.get_file_content(action.owner, action.repo, file_path, sha)
self.cache.set(manifest_key, manifest_content)
break
except Exception:
continue
if not manifest_content:
# Only log warning if both extensions failed
if manifest_path:
logger.error(
f"Action manifest not found: {action.owner}/{action.repo}/{manifest_path} "
f"(tried action.yml and action.yaml)"
)
else:
logger.error(
f"Action manifest not found: {action.owner}/{action.repo} (tried action.yml and action.yaml)"
)
return action.canonical_key(), None
# Parse manifest
try:
# Write to temp file and parse
import tempfile
with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f:
f.write(manifest_content)
temp_path = Path(f.name)
manifest = self.parser.parse_action(temp_path)
temp_path.unlink()
return action.canonical_key(), manifest
except Exception as e:
logger.error(f"Failed to parse manifest for {action.canonical_key()}: {e}")
return "", None

84
ghaw_auditor/scanner.py Normal file
View File

@@ -0,0 +1,84 @@
"""File scanner for discovering GitHub Actions and workflows."""
from __future__ import annotations
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
class Scanner:
"""Scans repository for workflow and action files."""
WORKFLOW_PATTERNS = [
".github/workflows/*.yml",
".github/workflows/*.yaml",
]
ACTION_PATTERNS = [
"**/action.yml",
"**/action.yaml",
".github/actions/*/action.yml",
".github/actions/*/action.yaml",
]
def __init__(self, repo_path: str | Path, exclude_patterns: list[str] | None = None) -> None:
"""Initialize scanner."""
self.repo_path = Path(repo_path).resolve()
self.exclude_patterns = exclude_patterns or []
def _should_exclude(self, path: Path) -> bool:
"""Check if path should be excluded."""
rel_path = path.relative_to(self.repo_path)
return any(rel_path.match(pattern) for pattern in self.exclude_patterns)
def find_workflows(self) -> list[Path]:
"""Find all workflow files."""
workflows = []
workflow_dir = self.repo_path / ".github" / "workflows"
if not workflow_dir.exists():
logger.warning(f"Workflow directory not found: {workflow_dir}")
return workflows
for pattern in ["*.yml", "*.yaml"]:
for file_path in workflow_dir.glob(pattern):
if not self._should_exclude(file_path):
workflows.append(file_path)
logger.info(f"Found {len(workflows)} workflow files")
return sorted(workflows)
def find_actions(self) -> list[Path]:
"""Find all action manifest files.
Supports multiple action discovery patterns:
- .github/actions/*/action.yml (standard GitHub location)
- ./action-name/action.yml (monorepo root-level actions)
- Any depth: path/to/action/action.yml (recursive search)
Excludes .github/workflows directory to avoid false positives.
"""
actions = []
# Check .github/actions directory
actions_dir = self.repo_path / ".github" / "actions"
if actions_dir.exists():
for action_file in actions_dir.rglob("action.y*ml"):
if action_file.name in ("action.yml", "action.yaml") and not self._should_exclude(action_file):
actions.append(action_file)
logger.debug(f"Found action: {action_file.relative_to(self.repo_path)}")
# Check for action files in root and subdirectories (supports monorepo structure)
for name in ("action.yml", "action.yaml"):
for action_file in self.repo_path.rglob(name):
# Skip if in .github/workflows
if ".github/workflows" in str(action_file.relative_to(self.repo_path)):
continue
if not self._should_exclude(action_file) and action_file not in actions:
actions.append(action_file)
logger.debug(f"Found action: {action_file.relative_to(self.repo_path)}")
logger.info(f"Found {len(actions)} action files")
return sorted(actions)

118
ghaw_auditor/services.py Normal file
View File

@@ -0,0 +1,118 @@
"""Service layer for orchestrating audit operations."""
from __future__ import annotations
import logging
from dataclasses import dataclass
from typing import Any
from ghaw_auditor.analyzer import Analyzer
from ghaw_auditor.differ import Differ
from ghaw_auditor.models import (
ActionDiff,
ActionManifest,
WorkflowDiff,
WorkflowMeta,
)
from ghaw_auditor.parser import Parser
from ghaw_auditor.policy import PolicyValidator
from ghaw_auditor.resolver import Resolver
from ghaw_auditor.scanner import Scanner
logger = logging.getLogger(__name__)
@dataclass
class ScanResult:
"""Result of a scan operation."""
workflows: dict[str, WorkflowMeta]
actions: dict[str, ActionManifest]
violations: list[dict[str, Any]]
analysis: dict[str, Any]
workflow_count: int
action_count: int
unique_action_count: int
class AuditService:
"""Orchestrates the audit workflow."""
def __init__(
self,
scanner: Scanner,
parser: Parser,
analyzer: Analyzer,
resolver: Resolver | None = None,
validator: PolicyValidator | None = None,
) -> None:
"""Initialize audit service."""
self.scanner = scanner
self.parser = parser
self.analyzer = analyzer
self.resolver = resolver
self.validator = validator
def scan(self, offline: bool = False) -> ScanResult:
"""Execute scan workflow and return results."""
# Find files
workflow_files = self.scanner.find_workflows()
action_files = self.scanner.find_actions()
# Parse workflows
workflows = {}
all_actions = []
for wf_file in workflow_files:
try:
workflow = self.parser.parse_workflow(wf_file)
rel_path = str(wf_file.relative_to(self.scanner.repo_path))
workflows[rel_path] = workflow
all_actions.extend(workflow.actions_used)
except Exception as e:
logger.error(f"Failed to parse workflow {wf_file}: {e}")
# Deduplicate actions
unique_actions = self.analyzer.deduplicate_actions(all_actions)
# Resolve actions
actions = {}
if not offline and self.resolver:
actions = self.resolver.resolve_actions(list(unique_actions.values()))
# Analyze
analysis = self.analyzer.analyze_workflows(workflows, actions)
# Validate
violations = []
if self.validator:
violations = self.validator.validate(workflows, all_actions)
return ScanResult(
workflows=workflows,
actions=actions,
violations=violations,
analysis=analysis,
workflow_count=len(workflow_files),
action_count=len(action_files),
unique_action_count=len(unique_actions),
)
class DiffService:
"""Handles baseline comparison."""
def __init__(self, differ: Differ) -> None:
"""Initialize diff service."""
self.differ = differ
def compare(
self,
workflows: dict[str, WorkflowMeta],
actions: dict[str, ActionManifest],
) -> tuple[list[WorkflowDiff], list[ActionDiff]]:
"""Compare current state with baseline."""
baseline = self.differ.load_baseline()
workflow_diffs = self.differ.diff_workflows(baseline.workflows, workflows)
action_diffs = self.differ.diff_actions(baseline.actions, actions)
return workflow_diffs, action_diffs

62
pyproject.toml Normal file
View File

@@ -0,0 +1,62 @@
[project]
name = "ghaw-auditor"
version = "1.0.0"
description = "GitHub Actions & Workflows Auditor - analyze and audit GitHub Actions ecosystem"
readme = "README.md"
requires-python = ">=3.11"
license = {text = "MIT"}
authors = [
{name = "Ismo Vuorinen", email = "ismo@ivuorinen.net"}
]
dependencies = [
"typer>=0.12.0",
"rich>=13.7.0",
"httpx>=0.27.0",
"pydantic>=2.6.0",
"ruamel.yaml>=0.18.0",
"platformdirs>=4.2.0",
"diskcache>=5.6.0",
"packaging>=24.0",
"tenacity>=8.2.0",
]
[project.optional-dependencies]
dev = [
"pytest>=8.0.0",
"pytest-cov>=4.1.0",
"vcrpy>=6.0.0",
"mypy>=1.8.0",
"ruff>=0.3.0",
"types-PyYAML",
]
[project.scripts]
ghaw-auditor = "ghaw_auditor.cli:app"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.ruff]
line-length = 120
target-version = "py311"
[tool.ruff.lint]
select = ["E", "F", "I", "N", "W", "UP", "B", "SIM", "C90"]
ignore = ["E501"]
[tool.ruff.lint.per-file-ignores]
"ghaw_auditor/cli.py" = ["B008"] # Typer uses function calls in defaults
[tool.mypy]
python_version = "3.11"
strict = true
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = "test_*.py"
python_functions = "test_*"
addopts = "-v --cov=ghaw_auditor --cov-report=term-missing --cov-fail-under=70"

1
tests/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Tests for ghaw-auditor."""

23
tests/fixtures/action-with-defaults.yml vendored Normal file
View File

@@ -0,0 +1,23 @@
---
name: 'Action with Various Defaults'
description: 'Tests different input default types'
inputs:
string-input:
description: 'String input'
default: 'hello'
boolean-input:
description: 'Boolean input'
default: true
number-input:
description: 'Number input'
default: 42
no-default:
description: 'Input without default'
required: true
runs:
using: composite
steps:
- run: echo "test"
shell: bash

12
tests/fixtures/basic-workflow.yml vendored Normal file
View File

@@ -0,0 +1,12 @@
---
name: Basic Workflow
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run tests
run: echo "Testing"

89
tests/fixtures/complex-workflow.yml vendored Normal file
View File

@@ -0,0 +1,89 @@
---
name: Complex Workflow
on:
push:
branches: [main, develop]
pull_request:
workflow_dispatch:
permissions:
contents: read
issues: write
pull_requests: write
env:
NODE_ENV: production
API_URL: https://api.example.com
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
defaults:
run:
shell: bash
working-directory: ./src
jobs:
build:
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: read
environment:
name: production
url: https://example.com
env:
BUILD_ENV: production
steps:
- uses: actions/checkout@v4
- name: Build
run: npm run build
env:
API_KEY: ${{ secrets.API_KEY }}
TOKEN: ${{ secrets.GITHUB_TOKEN }}
test:
runs-on: ubuntu-latest
needs: build
if: github.event_name == 'pull_request'
container:
image: node:20-alpine
credentials:
username: ${{ secrets.DOCKER_USER }}
password: ${{ secrets.DOCKER_PASS }}
env:
NODE_ENV: test
ports:
- 8080
volumes:
- /tmp:/tmp
options: --cpus 2
services:
postgres:
image: postgres:15
credentials:
username: ${{ secrets.DOCKER_USER }}
password: ${{ secrets.DOCKER_PASS }}
env:
POSTGRES_PASSWORD: ${{ secrets.DB_PASSWORD }}
ports:
- 5432
options: --health-cmd pg_isready
strategy:
matrix:
node-version: [18, 20]
os: [ubuntu-latest, windows-latest]
fail-fast: false
max-parallel: 2
continue-on-error: true
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
- name: Test
run: npm test
env:
DATABASE_URL: ${{ secrets.DATABASE_URL }}

33
tests/fixtures/composite-action.yml vendored Normal file
View File

@@ -0,0 +1,33 @@
---
name: 'Composite Action'
description: 'A composite action example'
author: 'Test Author'
inputs:
message:
description: 'Message to display'
required: true
debug:
description: 'Enable debug mode'
required: false
default: 'false'
outputs:
result:
description: 'Action result'
value: ${{ steps.output.outputs.result }}
runs:
using: composite
steps:
- name: Display message
run: echo "${{ inputs.message }}"
shell: bash
- name: Set output
id: output
run: echo "result=success" >> $GITHUB_OUTPUT
shell: bash
branding:
icon: 'check'
color: 'green'

21
tests/fixtures/docker-action.yml vendored Normal file
View File

@@ -0,0 +1,21 @@
---
name: 'Docker Action'
description: 'A Docker action example'
inputs:
dockerfile:
description: 'Path to Dockerfile'
required: false
default: 'Dockerfile'
outputs:
image-id:
description: 'Built image ID'
runs:
using: docker
image: Dockerfile
args:
- ${{ inputs.dockerfile }}
env:
BUILD_ENV: production

11
tests/fixtures/empty-workflow-call.yml vendored Normal file
View File

@@ -0,0 +1,11 @@
---
name: Empty Workflow Call
on:
workflow_call:
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

1
tests/fixtures/invalid-action.yml vendored Normal file
View File

@@ -0,0 +1 @@
# Empty action file

1
tests/fixtures/invalid-workflow.yml vendored Normal file
View File

@@ -0,0 +1 @@
# Empty file

27
tests/fixtures/javascript-action.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
---
name: 'JavaScript Action'
description: 'A Node.js action example'
author: 'GitHub'
inputs:
token:
description: 'GitHub token'
required: true
timeout:
description: 'Timeout in seconds'
required: false
default: '60'
outputs:
status:
description: 'Action status'
runs:
using: node20
main: dist/index.js
pre: dist/setup.js
post: dist/cleanup.js
branding:
icon: 'code'
color: 'blue'

27
tests/fixtures/job-with-outputs.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
---
name: Job with Outputs
on: push
jobs:
build:
runs-on: ubuntu-latest
outputs:
version: ${{ steps.version.outputs.version }}
artifact-url: ${{ steps.upload.outputs.url }}
status: success
steps:
- uses: actions/checkout@v4
- name: Get version
id: version
run: echo "version=1.0.0" >> $GITHUB_OUTPUT
- name: Upload artifact
id: upload
run: echo "url=https://example.com/artifact" >> $GITHUB_OUTPUT
deploy:
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy
run: echo "Deploying version ${{ needs.build.outputs.version }}"

View File

@@ -0,0 +1,26 @@
---
name: Reusable Workflow Caller
on:
push:
branches: [main]
jobs:
call-workflow:
uses: owner/repo/.github/workflows/deploy.yml@v1
with:
environment: production
debug: false
version: 1.2.3
secrets:
deploy-token: ${{ secrets.DEPLOY_TOKEN }}
api-key: ${{ secrets.API_KEY }}
call-workflow-inherit:
uses: owner/repo/.github/workflows/test.yml@main
secrets: inherit
call-local-workflow:
uses: ./.github/workflows/shared.yml
with:
config: custom

39
tests/fixtures/reusable-workflow.yml vendored Normal file
View File

@@ -0,0 +1,39 @@
---
name: Reusable Workflow
on:
workflow_call:
inputs:
environment:
description: 'Deployment environment'
required: true
type: string
debug:
description: 'Enable debug mode'
required: false
type: boolean
default: false
outputs:
deployment-id:
description: 'Deployment ID'
value: ${{ jobs.deploy.outputs.id }}
secrets:
deploy-token:
description: 'Deployment token'
required: true
api-key:
required: false
jobs:
deploy:
runs-on: ubuntu-latest
outputs:
id: ${{ steps.deploy.outputs.id }}
steps:
- uses: actions/checkout@v4
- name: Deploy
id: deploy
run: echo "id=12345" >> $GITHUB_OUTPUT
env:
TOKEN: ${{ secrets.deploy-token }}
API_KEY: ${{ secrets.api-key }}

30
tests/golden/actions.json Normal file
View File

@@ -0,0 +1,30 @@
{
"actions/checkout@abc123": {
"name": "Checkout",
"description": "Checkout a Git repository",
"author": "GitHub",
"inputs": {
"repository": {
"name": "repository",
"description": "Repository name with owner",
"required": false,
"default": null
},
"ref": {
"name": "ref",
"description": "The branch, tag or SHA to checkout",
"required": false,
"default": null
}
},
"outputs": {},
"runs": {
"using": "node20",
"main": "dist/index.js"
},
"branding": null,
"is_composite": false,
"is_docker": false,
"is_javascript": true
}
}

57
tests/golden/report.md Normal file
View File

@@ -0,0 +1,57 @@
# GitHub Actions & Workflows Audit Report
**Generated:** 2025-10-02T00:00:00.000000
## Summary
- **Workflows:** 1
- **Actions:** 1
- **Policy Violations:** 0
## Analysis
- **Total Jobs:** 1
- **Reusable Workflows:** 0
### Triggers
- `pull_request`: 1
- `push`: 1
### Runners
- `ubuntu-latest`: 1
### Secrets
Total unique secrets: 1
- `GITHUB_TOKEN`
## Workflows
### Test Workflow
**Path:** `test.yml`
**Triggers:** `push`, `pull_request`
**Jobs:** 1
#### Jobs
- **test**
- Runner: `ubuntu-latest`
## Actions Inventory
### Checkout
**Key:** `actions/checkout@abc123`
Checkout a Git repository
**Inputs:**
- `repository` (optional): Repository name with owner
- `ref` (optional): The branch, tag or SHA to checkout

View File

@@ -0,0 +1,41 @@
{
"test.yml": {
"name": "Test Workflow",
"path": "test.yml",
"triggers": [
"push",
"pull_request"
],
"permissions": null,
"concurrency": null,
"env": {},
"defaults": {},
"jobs": {
"test": {
"name": "test",
"runs_on": "ubuntu-latest",
"needs": [],
"if_condition": null,
"permissions": null,
"environment": null,
"concurrency": null,
"timeout_minutes": null,
"continue_on_error": false,
"container": null,
"services": {},
"strategy": null,
"actions_used": [],
"secrets_used": [
"GITHUB_TOKEN"
],
"env_vars": {}
}
},
"is_reusable": false,
"reusable_contract": null,
"secrets_used": [
"GITHUB_TOKEN"
],
"actions_used": []
}
}

144
tests/test_analyzer.py Normal file
View File

@@ -0,0 +1,144 @@
"""Tests for analyzer module."""
from ghaw_auditor.analyzer import Analyzer
from ghaw_auditor.models import ActionRef, ActionType, JobMeta, WorkflowMeta
def test_analyzer_initialization() -> None:
"""Test analyzer can be initialized."""
analyzer = Analyzer()
assert analyzer is not None
def test_deduplicate_actions() -> None:
"""Test action deduplication."""
analyzer = Analyzer()
action1 = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
)
action2 = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test2.yml",
)
action3 = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="setup-node",
ref="v4",
source_file="test.yml",
)
result = analyzer.deduplicate_actions([action1, action2, action3])
# Should have 2 unique actions (checkout appears twice)
assert len(result) == 2
def test_analyze_workflows() -> None:
"""Test workflow analysis."""
analyzer = Analyzer()
job = JobMeta(
name="test",
runs_on="ubuntu-latest",
)
workflow = WorkflowMeta(
name="Test Workflow",
path="test.yml",
triggers=["push", "pull_request"],
jobs={"test": job},
secrets_used={"SECRET1", "SECRET2"},
)
workflows = {"test.yml": workflow}
analysis = analyzer.analyze_workflows(workflows, {})
assert analysis["total_workflows"] == 1
assert analysis["total_jobs"] == 1
assert "push" in analysis["triggers"]
assert analysis["triggers"]["push"] == 1
assert analysis["secrets"]["total_unique_secrets"] == 2
def test_analyze_runners_with_list() -> None:
"""Test runner analysis with list runner."""
from ghaw_auditor.analyzer import Analyzer
from ghaw_auditor.models import JobMeta, WorkflowMeta
analyzer = Analyzer()
# Job with list runner (matrix runner)
job = JobMeta(
name="test",
runs_on=["ubuntu-latest", "macos-latest"],
)
workflow = WorkflowMeta(
name="Test Workflow",
path="test.yml",
triggers=["push"],
jobs={"test": job},
)
workflows = {"test.yml": workflow}
analysis = analyzer.analyze_workflows(workflows, {})
# List runner should be converted to string
assert "['ubuntu-latest', 'macos-latest']" in analysis["runners"]
def test_analyze_containers_and_services() -> None:
"""Test container and service analysis."""
from ghaw_auditor.analyzer import Analyzer
from ghaw_auditor.models import Container, JobMeta, Service, WorkflowMeta
analyzer = Analyzer()
# Job with container
job1 = JobMeta(
name="with-container",
runs_on="ubuntu-latest",
container=Container(image="node:18"),
)
# Job with services
job2 = JobMeta(
name="with-services",
runs_on="ubuntu-latest",
services={"postgres": Service(name="postgres", image="postgres:14")},
)
# Job with both
job3 = JobMeta(
name="with-both",
runs_on="ubuntu-latest",
container=Container(image="node:18"),
services={"redis": Service(name="redis", image="redis:7")},
)
workflow = WorkflowMeta(
name="Test Workflow",
path="test.yml",
triggers=["push"],
jobs={
"with-container": job1,
"with-services": job2,
"with-both": job3,
},
)
workflows = {"test.yml": workflow}
analysis = analyzer.analyze_workflows(workflows, {})
# Should count containers and services
assert analysis["containers"]["jobs_with_containers"] == 2
assert analysis["containers"]["jobs_with_services"] == 2

58
tests/test_cache.py Normal file
View File

@@ -0,0 +1,58 @@
"""Tests for cache module."""
from pathlib import Path
from ghaw_auditor.cache import Cache
def test_cache_initialization(tmp_path: Path) -> None:
"""Test cache can be initialized."""
cache = Cache(tmp_path / "cache")
assert cache.cache_dir.exists()
cache.close()
def test_cache_set_get(tmp_path: Path) -> None:
"""Test cache set and get."""
cache = Cache(tmp_path / "cache")
cache.set("test_key", "test_value")
value = cache.get("test_key")
assert value == "test_value"
cache.close()
def test_cache_make_key() -> None:
"""Test cache key generation."""
cache = Cache()
key1 = cache.make_key("part1", "part2", "part3")
key2 = cache.make_key("part1", "part2", "part3")
key3 = cache.make_key("different", "parts")
assert key1 == key2
assert key1 != key3
cache.close()
def test_cache_clear(tmp_path: Path) -> None:
"""Test cache clear."""
cache = Cache(tmp_path / "cache")
# Add some values
cache.set("key1", "value1")
cache.set("key2", "value2")
# Verify they exist
assert cache.get("key1") == "value1"
assert cache.get("key2") == "value2"
# Clear cache
cache.clear()
# Verify values are gone
assert cache.get("key1") is None
assert cache.get("key2") is None
cache.close()

584
tests/test_cli.py Normal file
View File

@@ -0,0 +1,584 @@
"""Integration tests for CLI commands."""
from pathlib import Path
from unittest.mock import Mock, patch
from typer.testing import CliRunner
from ghaw_auditor.cli import app
runner = CliRunner()
def test_scan_command_basic(tmp_path: Path) -> None:
"""Test basic scan command."""
output_dir = tmp_path / "output"
with patch("ghaw_auditor.cli.Scanner") as mock_scanner:
mock_scanner.return_value.find_workflows.return_value = []
mock_scanner.return_value.find_actions.return_value = []
result = runner.invoke(app, ["scan", "--repo", str(tmp_path), "--output", str(output_dir), "--offline"])
assert result.exit_code == 0
assert "Scanning repository" in result.stdout
def test_scan_command_with_token(tmp_path: Path) -> None:
"""Test scan with GitHub token."""
with patch("ghaw_auditor.cli.Scanner") as mock_scanner:
mock_scanner.return_value.find_workflows.return_value = []
mock_scanner.return_value.find_actions.return_value = []
result = runner.invoke(
app,
["scan", "--repo", str(tmp_path), "--token", "test_token", "--offline"],
)
assert result.exit_code == 0
def test_inventory_command(tmp_path: Path) -> None:
"""Test inventory command."""
with patch("ghaw_auditor.cli.Scanner") as mock_scanner:
mock_scanner.return_value.find_workflows.return_value = []
result = runner.invoke(app, ["inventory", "--repo", str(tmp_path)])
assert result.exit_code == 0
assert "Unique Actions" in result.stdout
def test_validate_command(tmp_path: Path) -> None:
"""Test validate command."""
with patch("ghaw_auditor.cli.Scanner") as mock_scanner:
mock_scanner.return_value.find_workflows.return_value = []
result = runner.invoke(app, ["validate", "--repo", str(tmp_path)])
assert result.exit_code == 0
def test_version_command() -> None:
"""Test version command."""
result = runner.invoke(app, ["version"])
assert result.exit_code == 0
assert "ghaw-auditor version" in result.stdout
def test_scan_command_verbose(tmp_path: Path) -> None:
"""Test scan with verbose flag."""
with patch("ghaw_auditor.cli.Scanner") as mock_scanner:
mock_scanner.return_value.find_workflows.return_value = []
mock_scanner.return_value.find_actions.return_value = []
result = runner.invoke(app, ["scan", "--repo", str(tmp_path), "--verbose", "--offline"])
assert result.exit_code == 0
def test_scan_command_quiet(tmp_path: Path) -> None:
"""Test scan with quiet flag."""
with patch("ghaw_auditor.cli.Scanner") as mock_scanner:
mock_scanner.return_value.find_workflows.return_value = []
mock_scanner.return_value.find_actions.return_value = []
result = runner.invoke(app, ["scan", "--repo", str(tmp_path), "--quiet", "--offline"])
assert result.exit_code == 0
def test_scan_command_nonexistent_repo() -> None:
"""Test scan with nonexistent repository."""
result = runner.invoke(app, ["scan", "--repo", "/nonexistent/path"])
assert result.exit_code in (1, 2) # Either repo not found or other error
assert "Repository not found" in result.stdout or result.exit_code == 2
def test_scan_command_with_log_json(tmp_path: Path) -> None:
"""Test scan with JSON logging."""
with patch("ghaw_auditor.cli.Scanner") as mock_scanner:
mock_scanner.return_value.find_workflows.return_value = []
mock_scanner.return_value.find_actions.return_value = []
result = runner.invoke(app, ["scan", "--repo", str(tmp_path), "--log-json", "--offline"])
assert result.exit_code == 0
def test_scan_command_with_policy_file(tmp_path: Path) -> None:
"""Test scan with policy file."""
policy_file = tmp_path / "policy.yml"
policy_file.write_text("require_pinned_actions: true")
with patch("ghaw_auditor.cli.Scanner") as mock_scanner:
mock_scanner.return_value.find_workflows.return_value = []
mock_scanner.return_value.find_actions.return_value = []
result = runner.invoke(
app,
[
"scan",
"--repo",
str(tmp_path),
"--policy-file",
str(policy_file),
"--offline",
],
)
assert result.exit_code == 0
def test_scan_command_with_violations(tmp_path: Path) -> None:
"""Test scan with policy violations."""
# Create workflow with unpinned action
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@main
"""
)
policy_file = tmp_path / "policy.yml"
policy_file.write_text("require_pinned_actions: true")
result = runner.invoke(
app,
[
"scan",
"--repo",
str(tmp_path),
"--policy-file",
str(policy_file),
"--offline",
],
)
assert result.exit_code == 0
assert "policy violations" in result.stdout
def test_scan_command_with_enforcement(tmp_path: Path) -> None:
"""Test scan with policy enforcement."""
# Create workflow with unpinned action
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@main
"""
)
policy_file = tmp_path / "policy.yml"
policy_file.write_text("require_pinned_actions: true")
result = runner.invoke(
app,
[
"scan",
"--repo",
str(tmp_path),
"--policy-file",
str(policy_file),
"--enforce",
"--offline",
],
)
# Should exit with error due to violations
assert result.exit_code in (1, 2) # Exit code 1 from policy, or 2 from exception handling
# Check that enforcement was triggered
assert "policy violations" in result.stdout or "Policy enforcement failed" in result.stdout
def test_scan_command_with_diff_mode(tmp_path: Path) -> None:
"""Test scan in diff mode."""
# Create baseline
baseline_dir = tmp_path / "baseline"
baseline_dir.mkdir()
from ghaw_auditor.differ import Differ
from ghaw_auditor.models import WorkflowMeta
differ = Differ(baseline_dir)
workflow = WorkflowMeta(name="Test", path="test.yml", triggers=["push"], jobs={})
differ.save_baseline({"test.yml": workflow}, {})
# Create workflow
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "test.yml").write_text("name: Test\non: push\njobs: {}")
output_dir = tmp_path / "output"
result = runner.invoke(
app,
[
"scan",
"--repo",
str(tmp_path),
"--diff",
"--baseline",
str(baseline_dir),
"--output",
str(output_dir),
"--offline",
],
)
assert result.exit_code == 0
assert "Running diff" in result.stdout
def test_scan_command_with_write_baseline(tmp_path: Path) -> None:
"""Test scan with baseline writing."""
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text("name: CI\non: push\njobs:\n test:\n runs-on: ubuntu-latest")
baseline_dir = tmp_path / "baseline"
result = runner.invoke(
app,
[
"scan",
"--repo",
str(tmp_path),
"--write-baseline",
"--baseline",
str(baseline_dir),
"--offline",
],
)
assert result.exit_code == 0
assert "Baseline saved" in result.stdout
assert baseline_dir.exists()
def test_scan_command_with_format_json(tmp_path: Path) -> None:
"""Test scan with JSON format only."""
with patch("ghaw_auditor.cli.Scanner") as mock_scanner:
mock_scanner.return_value.find_workflows.return_value = []
mock_scanner.return_value.find_actions.return_value = []
result = runner.invoke(
app,
["scan", "--repo", str(tmp_path), "--format-type", "json", "--offline"],
)
assert result.exit_code == 0
def test_scan_command_with_format_md(tmp_path: Path) -> None:
"""Test scan with Markdown format only."""
with patch("ghaw_auditor.cli.Scanner") as mock_scanner:
mock_scanner.return_value.find_workflows.return_value = []
mock_scanner.return_value.find_actions.return_value = []
result = runner.invoke(
app,
["scan", "--repo", str(tmp_path), "--format-type", "md", "--offline"],
)
assert result.exit_code == 0
def test_inventory_command_with_error(tmp_path: Path) -> None:
"""Test inventory command with parse error."""
# Create invalid workflow
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "invalid.yml").write_text("invalid: yaml: {{{")
result = runner.invoke(app, ["inventory", "--repo", str(tmp_path)])
assert result.exit_code == 0
assert "Unique Actions" in result.stdout
def test_inventory_command_verbose_with_error(tmp_path: Path) -> None:
"""Test inventory command verbose mode with error."""
# Create invalid workflow
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "invalid.yml").write_text("invalid: yaml: {{{")
result = runner.invoke(app, ["inventory", "--repo", str(tmp_path), "--verbose"])
assert result.exit_code == 0
def test_validate_command_with_violations(tmp_path: Path) -> None:
"""Test validate command with violations."""
# Create workflow with unpinned action
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@main
"""
)
result = runner.invoke(app, ["validate", "--repo", str(tmp_path)])
assert result.exit_code == 0
assert "policy violations" in result.stdout
def test_validate_command_with_enforcement(tmp_path: Path) -> None:
"""Test validate command with enforcement."""
# Create workflow with unpinned action
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@main
"""
)
result = runner.invoke(app, ["validate", "--repo", str(tmp_path), "--enforce"])
# Should exit with error
assert result.exit_code == 1
def test_validate_command_no_violations(tmp_path: Path) -> None:
"""Test validate command with no violations."""
# Create workflow with pinned action (valid 40-char SHA)
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@a81bbbf8298c0fa03ea29cdc473d45769f953675
"""
)
result = runner.invoke(app, ["validate", "--repo", str(tmp_path)])
assert result.exit_code == 0
assert "No policy violations found" in result.stdout
def test_validate_command_with_error(tmp_path: Path) -> None:
"""Test validate command with parse error."""
# Create invalid workflow
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "invalid.yml").write_text("invalid: yaml: {{{")
result = runner.invoke(app, ["validate", "--repo", str(tmp_path)])
assert result.exit_code == 0
def test_validate_command_verbose_with_error(tmp_path: Path) -> None:
"""Test validate command verbose mode with error."""
# Create invalid workflow
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "invalid.yml").write_text("invalid: yaml: {{{")
result = runner.invoke(app, ["validate", "--repo", str(tmp_path), "--verbose"])
assert result.exit_code == 0
def test_scan_command_diff_baseline_not_found(tmp_path: Path) -> None:
"""Test scan with diff mode when baseline doesn't exist."""
# Create workflow
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text("name: CI\non: push\njobs:\n test:\n runs-on: ubuntu-latest")
# Non-existent baseline
baseline_dir = tmp_path / "nonexistent_baseline"
output_dir = tmp_path / "output"
result = runner.invoke(
app,
[
"scan",
"--repo",
str(tmp_path),
"--diff",
"--baseline",
str(baseline_dir),
"--output",
str(output_dir),
"--offline",
],
)
# Should complete but log error about missing baseline
assert result.exit_code == 0
# Diff should be attempted but baseline not found is logged
def test_scan_command_general_exception(tmp_path: Path) -> None:
"""Test scan command with general exception."""
# Mock the factory to raise an exception
with patch("ghaw_auditor.cli.AuditServiceFactory") as mock_factory:
mock_factory.create.side_effect = RuntimeError("Factory failed")
result = runner.invoke(
app,
["scan", "--repo", str(tmp_path), "--offline"],
)
# Should exit with code 2 (exception)
assert result.exit_code == 2
def test_inventory_command_parse_error_verbose(tmp_path: Path) -> None:
"""Test inventory command logs exceptions in verbose mode."""
# Create workflow that will cause parse exception
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "bad.yml").write_text("!!invalid yaml!!")
result = runner.invoke(
app,
["inventory", "--repo", str(tmp_path), "--verbose"],
)
# Should complete (exception is caught)
assert result.exit_code == 0
# Check for error message in output or logs
def test_validate_command_parse_error_verbose(tmp_path: Path) -> None:
"""Test validate command logs exceptions in verbose mode."""
# Create workflow that will cause parse exception
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "bad.yml").write_text("!!invalid yaml!!")
result = runner.invoke(
app,
["validate", "--repo", str(tmp_path), "--verbose"],
)
# Should complete (exception is caught)
assert result.exit_code == 0
def test_scan_command_with_resolver_exception(tmp_path: Path) -> None:
"""Test scan with resolver that raises exception."""
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
"""
)
# Mock resolver to raise exception
with patch("ghaw_auditor.cli.AuditServiceFactory") as mock_factory:
mock_service = Mock()
mock_service.scan.side_effect = Exception("Resolver error")
mock_factory.create.return_value = mock_service
result = runner.invoke(
app,
["scan", "--repo", str(tmp_path), "--offline"],
)
# Should exit with code 2
assert result.exit_code == 2
def test_inventory_command_with_actions(tmp_path: Path) -> None:
"""Test inventory command with workflow that has actions."""
# Create workflow with actions
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
"""
)
result = runner.invoke(app, ["inventory", "--repo", str(tmp_path)])
assert result.exit_code == 0
assert "Unique Actions" in result.stdout
# Should list the actions
assert "actions/checkout" in result.stdout or "" in result.stdout
def test_validate_command_with_policy_file(tmp_path: Path) -> None:
"""Test validate command with policy file."""
# Create workflow
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
"""
)
# Create policy file
policy_file = tmp_path / "policy.yml"
policy_file.write_text("require_pinned_actions: true")
result = runner.invoke(
app,
["validate", "--repo", str(tmp_path), "--policy-file", str(policy_file)],
)
assert result.exit_code == 0
# Policy file exists, so TODO block executes

376
tests/test_differ.py Normal file
View File

@@ -0,0 +1,376 @@
"""Tests for differ module."""
from pathlib import Path
import pytest
from ghaw_auditor.differ import Differ
from ghaw_auditor.models import (
ActionManifest,
JobMeta,
PermissionLevel,
Permissions,
WorkflowMeta,
)
def test_save_and_load_baseline(tmp_path: Path) -> None:
"""Test saving and loading baseline."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
# Create sample data
workflows = {
"test.yml": WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={"test": JobMeta(name="test", runs_on="ubuntu-latest")},
)
}
actions = {
"actions/checkout@v4": ActionManifest(
name="Checkout",
description="Checkout code",
)
}
# Save baseline
differ.save_baseline(workflows, actions, "abc123")
assert (baseline_path / "workflows.json").exists()
assert (baseline_path / "actions.json").exists()
assert (baseline_path / "meta.json").exists()
# Load baseline
baseline = differ.load_baseline()
assert baseline.meta.commit_sha == "abc123"
assert len(baseline.workflows) == 1
assert len(baseline.actions) == 1
def test_diff_workflows(tmp_path: Path) -> None:
"""Test workflow diff."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
old_workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={},
)
new_workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push", "pull_request"],
jobs={},
)
diffs = differ.diff_workflows({"test.yml": old_workflow}, {"test.yml": new_workflow})
assert len(diffs) == 1
assert diffs[0].status == "modified"
assert len(diffs[0].changes) > 0
def test_diff_added_workflow(tmp_path: Path) -> None:
"""Test added workflow detection."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
new_workflow = WorkflowMeta(
name="New",
path="new.yml",
triggers=["push"],
jobs={},
)
diffs = differ.diff_workflows({}, {"new.yml": new_workflow})
assert len(diffs) == 1
assert diffs[0].status == "added"
assert diffs[0].path == "new.yml"
def test_diff_removed_workflow(tmp_path: Path) -> None:
"""Test removed workflow detection."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
old_workflow = WorkflowMeta(
name="Old",
path="old.yml",
triggers=["push"],
jobs={},
)
diffs = differ.diff_workflows({"old.yml": old_workflow}, {})
assert len(diffs) == 1
assert diffs[0].status == "removed"
assert diffs[0].path == "old.yml"
def test_load_baseline_not_found(tmp_path: Path) -> None:
"""Test loading baseline when it doesn't exist."""
baseline_path = tmp_path / "nonexistent"
differ = Differ(baseline_path)
with pytest.raises(FileNotFoundError, match="Baseline not found"):
differ.load_baseline()
def test_load_baseline_without_meta(tmp_path: Path) -> None:
"""Test loading baseline when meta.json doesn't exist."""
baseline_path = tmp_path / "baseline"
baseline_path.mkdir()
# Create only workflows.json and actions.json
(baseline_path / "workflows.json").write_text("{}")
(baseline_path / "actions.json").write_text("{}")
differ = Differ(baseline_path)
baseline = differ.load_baseline()
# Should still load with default meta
assert baseline.meta is not None
assert baseline.workflows == {}
assert baseline.actions == {}
def test_diff_workflows_permissions_change(tmp_path: Path) -> None:
"""Test workflow diff with permissions changes."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
old_workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
permissions=Permissions(contents=PermissionLevel.READ),
jobs={},
)
new_workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
permissions=Permissions(contents=PermissionLevel.WRITE),
jobs={},
)
diffs = differ.diff_workflows({"test.yml": old_workflow}, {"test.yml": new_workflow})
assert len(diffs) == 1
assert diffs[0].status == "modified"
assert any(c.field == "permissions" for c in diffs[0].changes)
def test_diff_workflows_concurrency_change(tmp_path: Path) -> None:
"""Test workflow diff with concurrency changes."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
old_workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
concurrency="group1",
jobs={},
)
new_workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
concurrency="group2",
jobs={},
)
diffs = differ.diff_workflows({"test.yml": old_workflow}, {"test.yml": new_workflow})
assert len(diffs) == 1
assert diffs[0].status == "modified"
assert any(c.field == "concurrency" for c in diffs[0].changes)
def test_diff_workflows_jobs_change(tmp_path: Path) -> None:
"""Test workflow diff with job changes."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
old_workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={"build": JobMeta(name="build", runs_on="ubuntu-latest")},
)
new_workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={
"build": JobMeta(name="build", runs_on="ubuntu-latest"),
"test": JobMeta(name="test", runs_on="ubuntu-latest"),
},
)
diffs = differ.diff_workflows({"test.yml": old_workflow}, {"test.yml": new_workflow})
assert len(diffs) == 1
assert diffs[0].status == "modified"
assert any(c.field == "jobs" for c in diffs[0].changes)
def test_diff_workflows_secrets_change(tmp_path: Path) -> None:
"""Test workflow diff with secrets changes."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
old_workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={},
secrets_used={"API_KEY"},
)
new_workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={},
secrets_used={"API_KEY", "DATABASE_URL"},
)
diffs = differ.diff_workflows({"test.yml": old_workflow}, {"test.yml": new_workflow})
assert len(diffs) == 1
assert diffs[0].status == "modified"
assert any(c.field == "secrets_used" for c in diffs[0].changes)
def test_diff_workflows_unchanged(tmp_path: Path) -> None:
"""Test workflow diff when unchanged."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={},
)
diffs = differ.diff_workflows({"test.yml": workflow}, {"test.yml": workflow})
assert len(diffs) == 1
assert diffs[0].status == "unchanged"
assert len(diffs[0].changes) == 0
def test_diff_actions_added(tmp_path: Path) -> None:
"""Test action diff with added action."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
new_action = ActionManifest(name="New Action", description="Test")
diffs = differ.diff_actions({}, {"actions/new@v1": new_action})
assert len(diffs) == 1
assert diffs[0].status == "added"
assert diffs[0].key == "actions/new@v1"
def test_diff_actions_removed(tmp_path: Path) -> None:
"""Test action diff with removed action."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
old_action = ActionManifest(name="Old Action", description="Test")
diffs = differ.diff_actions({"actions/old@v1": old_action}, {})
assert len(diffs) == 1
assert diffs[0].status == "removed"
assert diffs[0].key == "actions/old@v1"
def test_diff_actions_unchanged(tmp_path: Path) -> None:
"""Test action diff when unchanged."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
action = ActionManifest(name="Test Action", description="Test")
diffs = differ.diff_actions({"actions/test@v1": action}, {"actions/test@v1": action})
assert len(diffs) == 1
assert diffs[0].status == "unchanged"
assert len(diffs[0].changes) == 0
def test_render_diff_markdown(tmp_path: Path) -> None:
"""Test rendering diff as Markdown."""
from ghaw_auditor.models import ActionDiff, DiffEntry, WorkflowDiff
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
workflow_diffs = [
WorkflowDiff(path="added.yml", status="added", changes=[]),
WorkflowDiff(path="removed.yml", status="removed", changes=[]),
WorkflowDiff(
path="modified.yml",
status="modified",
changes=[
DiffEntry(
field="triggers",
old_value=["push"],
new_value=["push", "pull_request"],
change_type="modified",
)
],
),
]
action_diffs = [
ActionDiff(key="actions/new@v1", status="added", changes=[]),
ActionDiff(key="actions/old@v1", status="removed", changes=[]),
]
output_path = tmp_path / "diff.md"
differ.render_diff_markdown(workflow_diffs, action_diffs, output_path)
assert output_path.exists()
content = output_path.read_text()
# Check content
assert "# Audit Diff Report" in content
assert "## Workflow Changes" in content
assert "## Action Changes" in content
assert "added.yml" in content
assert "removed.yml" in content
assert "modified.yml" in content
assert "actions/new@v1" in content
assert "actions/old@v1" in content
assert "triggers" in content
def test_render_diff_markdown_empty(tmp_path: Path) -> None:
"""Test rendering empty diff."""
baseline_path = tmp_path / "baseline"
differ = Differ(baseline_path)
output_path = tmp_path / "diff.md"
differ.render_diff_markdown([], [], output_path)
assert output_path.exists()
content = output_path.read_text()
assert "# Audit Diff Report" in content
assert "**Added:** 0" in content
assert "**Removed:** 0" in content

81
tests/test_factory.py Normal file
View File

@@ -0,0 +1,81 @@
"""Tests for factory module."""
from pathlib import Path
from ghaw_auditor.factory import AuditServiceFactory
from ghaw_auditor.models import Policy
def test_factory_create_basic(tmp_path: Path) -> None:
"""Test factory creates service with basic configuration."""
service = AuditServiceFactory.create(
repo_path=tmp_path,
offline=True,
)
assert service.scanner is not None
assert service.parser is not None
assert service.analyzer is not None
assert service.resolver is None # Offline mode
assert service.validator is None # No policy
def test_factory_create_with_policy(tmp_path: Path) -> None:
"""Test factory creates service with policy."""
policy = Policy(require_pinned_actions=True)
service = AuditServiceFactory.create(
repo_path=tmp_path,
offline=True,
policy=policy,
)
assert service.validator is not None
def test_factory_create_with_resolver(tmp_path: Path) -> None:
"""Test factory creates service with resolver."""
service = AuditServiceFactory.create(
repo_path=tmp_path,
offline=False,
token="test_token",
)
assert service.resolver is not None
def test_factory_create_with_exclude_patterns(tmp_path: Path) -> None:
"""Test factory creates service with exclusion patterns."""
service = AuditServiceFactory.create(
repo_path=tmp_path,
offline=True,
exclude_patterns=["**/node_modules/**", "**/dist/**"],
)
assert len(service.scanner.exclude_patterns) == 2
def test_factory_create_with_cache_dir(tmp_path: Path) -> None:
"""Test factory creates service with custom cache directory."""
cache_dir = tmp_path / "custom_cache"
service = AuditServiceFactory.create(
repo_path=tmp_path,
offline=True,
cache_dir=cache_dir,
)
# Service created successfully
assert service is not None
def test_factory_create_with_concurrency(tmp_path: Path) -> None:
"""Test factory creates service with custom concurrency."""
service = AuditServiceFactory.create(
repo_path=tmp_path,
offline=False,
concurrency=8,
)
assert service.resolver is not None
assert service.resolver.concurrency == 8

399
tests/test_github_client.py Normal file
View File

@@ -0,0 +1,399 @@
"""Tests for GitHub client module."""
from unittest.mock import Mock, patch
import httpx
import pytest
from ghaw_auditor.github_client import GitHubClient, should_retry_http_error
def test_github_client_initialization_no_token() -> None:
"""Test GitHub client initialization without token."""
client = GitHubClient()
assert client.base_url == "https://api.github.com"
assert "Accept" in client.headers
assert "Authorization" not in client.headers
assert client.client is not None
client.close()
def test_github_client_initialization_with_token() -> None:
"""Test GitHub client initialization with token."""
client = GitHubClient(token="ghp_test123")
assert "Authorization" in client.headers
assert client.headers["Authorization"] == "Bearer ghp_test123"
client.close()
def test_github_client_custom_base_url() -> None:
"""Test GitHub client with custom base URL."""
client = GitHubClient(base_url="https://github.enterprise.com/api/v3")
assert client.base_url == "https://github.enterprise.com/api/v3"
client.close()
@patch("httpx.Client")
def test_get_ref_sha_success(mock_client_class: Mock) -> None:
"""Test successful ref SHA resolution."""
# Setup mock
mock_response = Mock()
mock_response.json.return_value = {"sha": "abc123def456"}
mock_response.raise_for_status = Mock()
mock_http_client = Mock()
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
# Test
client = GitHubClient(token="test")
sha = client.get_ref_sha("actions", "checkout", "v4")
assert sha == "abc123def456"
mock_http_client.get.assert_called_once_with("https://api.github.com/repos/actions/checkout/commits/v4")
@patch("httpx.Client")
def test_get_ref_sha_http_error(mock_client_class: Mock) -> None:
"""Test ref SHA resolution with HTTP error."""
# Setup mock to raise HTTPStatusError
mock_error_response = Mock()
mock_error_response.status_code = 404
mock_response = Mock()
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"404 Not Found",
request=Mock(),
response=mock_error_response,
)
mock_http_client = Mock()
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
# Test - 404 errors should not be retried, so expect HTTPStatusError
client = GitHubClient(token="test")
with pytest.raises(httpx.HTTPStatusError):
client.get_ref_sha("actions", "nonexistent", "v1")
@patch("httpx.Client")
def test_get_file_content_success(mock_client_class: Mock) -> None:
"""Test successful file content retrieval."""
# Setup mock
mock_response = Mock()
mock_response.text = "name: Test Action\\nruns:\\n using: node20"
mock_response.raise_for_status = Mock()
mock_http_client = Mock()
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
# Test
client = GitHubClient()
content = client.get_file_content("actions", "checkout", "action.yml", "abc123")
assert "Test Action" in content
mock_http_client.get.assert_called_once_with("https://raw.githubusercontent.com/actions/checkout/abc123/action.yml")
@patch("httpx.Client")
def test_get_file_content_http_error(mock_client_class: Mock) -> None:
"""Test file content retrieval with HTTP error."""
# Setup mock to raise HTTPStatusError
mock_error_response = Mock()
mock_error_response.status_code = 404
mock_response = Mock()
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"404 Not Found",
request=Mock(),
response=mock_error_response,
)
mock_http_client = Mock()
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
# Test - 404 errors should not be retried, so expect HTTPStatusError
client = GitHubClient()
with pytest.raises(httpx.HTTPStatusError):
client.get_file_content("actions", "checkout", "missing.yml", "abc123")
@patch("httpx.Client")
def test_github_client_context_manager(mock_client_class: Mock) -> None:
"""Test GitHub client as context manager."""
mock_http_client = Mock()
mock_client_class.return_value = mock_http_client
# Test context manager
with GitHubClient(token="test") as client:
assert client is not None
assert isinstance(client, GitHubClient)
# Should have called close
mock_http_client.close.assert_called_once()
@patch("httpx.Client")
def test_github_client_close(mock_client_class: Mock) -> None:
"""Test GitHub client close method."""
mock_http_client = Mock()
mock_client_class.return_value = mock_http_client
client = GitHubClient()
client.close()
mock_http_client.close.assert_called_once()
@patch("httpx.Client")
def test_github_client_logs_successful_ref_sha(mock_client_class: Mock, caplog: pytest.LogCaptureFixture) -> None:
"""Test that successful ref SHA requests are logged at DEBUG level."""
import logging
mock_http_client = Mock()
mock_response = Mock()
mock_response.json.return_value = {"sha": "abc123def"}
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
with caplog.at_level(logging.DEBUG):
client = GitHubClient(token="test")
sha = client.get_ref_sha("actions", "checkout", "v4")
assert sha == "abc123def"
assert "Fetching ref SHA: actions/checkout@v4" in caplog.text
assert "Resolved actions/checkout@v4 -> abc123def" in caplog.text
@patch("httpx.Client")
def test_github_client_logs_4xx_error(mock_client_class: Mock, caplog: pytest.LogCaptureFixture) -> None:
"""Test that 404 errors are logged with user-friendly messages at ERROR level."""
import logging
mock_http_client = Mock()
mock_response = Mock()
mock_response.status_code = 404
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"Not found", request=Mock(), response=mock_response
)
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
with caplog.at_level(logging.ERROR):
client = GitHubClient()
with pytest.raises(httpx.HTTPStatusError):
client.get_ref_sha("actions", "nonexistent", "v1")
# Check for user-friendly error message
assert "Action not found" in caplog.text
assert "actions/nonexistent@v1" in caplog.text
@patch("httpx.Client")
def test_github_client_logs_successful_file_content(mock_client_class: Mock, caplog: pytest.LogCaptureFixture) -> None:
"""Test that successful file content requests are logged at DEBUG level."""
import logging
mock_http_client = Mock()
mock_response = Mock()
mock_response.text = "name: Checkout\ndescription: Test"
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
with caplog.at_level(logging.DEBUG):
client = GitHubClient(token="test")
content = client.get_file_content("actions", "checkout", "action.yml", "v4")
assert content == "name: Checkout\ndescription: Test"
assert "Fetching file: actions/checkout/action.yml@v4" in caplog.text
assert "Downloaded action.yml" in caplog.text
assert "bytes" in caplog.text
@patch("httpx.Client")
def test_github_client_retries_5xx_errors(mock_client_class: Mock) -> None:
"""Test that 5xx errors are retried."""
from tenacity import RetryError
mock_http_client = Mock()
mock_response = Mock()
mock_response.status_code = 500
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"Server error", request=Mock(), response=mock_response
)
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
client = GitHubClient()
with pytest.raises(RetryError):
client.get_ref_sha("actions", "checkout", "v1")
# Should have retried 3 times
assert mock_http_client.get.call_count == 3
@patch("httpx.Client")
def test_github_client_logs_5xx_warning(mock_client_class: Mock, caplog: pytest.LogCaptureFixture) -> None:
"""Test that 5xx errors are logged at WARNING level."""
import logging
from tenacity import RetryError
mock_http_client = Mock()
mock_response = Mock()
mock_response.status_code = 503
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"Service unavailable", request=Mock(), response=mock_response
)
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
with caplog.at_level(logging.WARNING):
client = GitHubClient()
with pytest.raises(RetryError):
client.get_file_content("actions", "checkout", "action.yml", "v4")
assert "HTTP 503" in caplog.text
def test_should_retry_http_error_network_errors() -> None:
"""Test that network errors should be retried."""
error = httpx.RequestError("Connection failed")
assert should_retry_http_error(error) is True
def test_should_retry_http_error_404() -> None:
"""Test that 404 errors should not be retried."""
mock_response = Mock()
mock_response.status_code = 404
error = httpx.HTTPStatusError("Not found", request=Mock(), response=mock_response)
assert should_retry_http_error(error) is False
def test_should_retry_http_error_403() -> None:
"""Test that 403 errors should not be retried."""
mock_response = Mock()
mock_response.status_code = 403
error = httpx.HTTPStatusError("Forbidden", request=Mock(), response=mock_response)
assert should_retry_http_error(error) is False
def test_should_retry_http_error_429() -> None:
"""Test that 429 rate limiting errors should be retried."""
mock_response = Mock()
mock_response.status_code = 429
error = httpx.HTTPStatusError("Rate limited", request=Mock(), response=mock_response)
assert should_retry_http_error(error) is True
def test_should_retry_http_error_500() -> None:
"""Test that 500 errors should be retried."""
mock_response = Mock()
mock_response.status_code = 500
error = httpx.HTTPStatusError("Server error", request=Mock(), response=mock_response)
assert should_retry_http_error(error) is True
def test_should_retry_http_error_other() -> None:
"""Test that non-HTTP errors should not be retried."""
error = ValueError("Some other error")
assert should_retry_http_error(error) is False
@patch("httpx.Client")
def test_github_client_logs_403_error(mock_client_class: Mock, caplog: pytest.LogCaptureFixture) -> None:
"""Test that 403 errors are logged with user-friendly messages."""
import logging
mock_http_client = Mock()
mock_response = Mock()
mock_response.status_code = 403
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"Forbidden", request=Mock(), response=mock_response
)
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
with caplog.at_level(logging.ERROR):
client = GitHubClient()
with pytest.raises(httpx.HTTPStatusError):
client.get_ref_sha("actions", "checkout", "v1")
assert "Access denied (check token permissions)" in caplog.text
@patch("httpx.Client")
def test_github_client_logs_401_error(mock_client_class: Mock, caplog: pytest.LogCaptureFixture) -> None:
"""Test that 401 errors are logged with user-friendly messages."""
import logging
mock_http_client = Mock()
mock_response = Mock()
mock_response.status_code = 401
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"Unauthorized", request=Mock(), response=mock_response
)
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
with caplog.at_level(logging.ERROR):
client = GitHubClient()
with pytest.raises(httpx.HTTPStatusError):
client.get_file_content("actions", "checkout", "action.yml", "abc123")
assert "Authentication required" in caplog.text
@patch("httpx.Client")
def test_github_client_logs_401_error_get_ref_sha(mock_client_class: Mock, caplog: pytest.LogCaptureFixture) -> None:
"""Test that 401 errors are logged in get_ref_sha."""
import logging
mock_http_client = Mock()
mock_response = Mock()
mock_response.status_code = 401
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"Unauthorized", request=Mock(), response=mock_response
)
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
with caplog.at_level(logging.ERROR):
client = GitHubClient()
with pytest.raises(httpx.HTTPStatusError):
client.get_ref_sha("actions", "checkout", "v1")
assert "Authentication required" in caplog.text
@patch("httpx.Client")
def test_github_client_logs_403_error_get_file_content(
mock_client_class: Mock, caplog: pytest.LogCaptureFixture
) -> None:
"""Test that 403 errors are logged in get_file_content."""
import logging
mock_http_client = Mock()
mock_response = Mock()
mock_response.status_code = 403
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"Forbidden", request=Mock(), response=mock_response
)
mock_http_client.get.return_value = mock_response
mock_client_class.return_value = mock_http_client
with caplog.at_level(logging.ERROR):
client = GitHubClient()
with pytest.raises(httpx.HTTPStatusError):
client.get_file_content("actions", "checkout", "action.yml", "abc123")
assert "Access denied (check token permissions)" in caplog.text

168
tests/test_golden.py Normal file
View File

@@ -0,0 +1,168 @@
"""Golden file tests for reports."""
import json
from pathlib import Path
from ghaw_auditor.models import (
ActionInput,
ActionManifest,
JobMeta,
WorkflowMeta,
)
from ghaw_auditor.renderer import Renderer
def test_json_workflow_output(tmp_path: Path) -> None:
"""Test workflow JSON matches golden file."""
renderer = Renderer(tmp_path)
workflows = {
"test.yml": WorkflowMeta(
name="Test Workflow",
path="test.yml",
triggers=["push", "pull_request"],
jobs={
"test": JobMeta(
name="test",
runs_on="ubuntu-latest",
secrets_used={"GITHUB_TOKEN"},
)
},
secrets_used={"GITHUB_TOKEN"},
)
}
renderer.render_json(workflows, {}, [])
# Load generated and golden files
with open(tmp_path / "workflows.json") as f:
generated = json.load(f)
golden_path = Path(__file__).parent / "golden" / "workflows.json"
with open(golden_path) as f:
golden = json.load(f)
# Compare structure (ignoring list order differences)
assert generated["test.yml"]["name"] == golden["test.yml"]["name"]
assert set(generated["test.yml"]["triggers"]) == set(golden["test.yml"]["triggers"])
assert generated["test.yml"]["jobs"]["test"]["runs_on"] == golden["test.yml"]["jobs"]["test"]["runs_on"]
def test_json_action_output(tmp_path: Path) -> None:
"""Test action JSON matches golden file."""
renderer = Renderer(tmp_path)
actions = {
"actions/checkout@abc123": ActionManifest(
name="Checkout",
description="Checkout a Git repository",
author="GitHub",
inputs={
"repository": ActionInput(
name="repository",
description="Repository name with owner",
required=False,
),
"ref": ActionInput(
name="ref",
description="The branch, tag or SHA to checkout",
required=False,
),
},
runs={"using": "node20", "main": "dist/index.js"},
is_javascript=True,
)
}
renderer.render_json({}, actions, [])
with open(tmp_path / "actions.json") as f:
generated = json.load(f)
golden_path = Path(__file__).parent / "golden" / "actions.json"
with open(golden_path) as f:
golden = json.load(f)
assert generated["actions/checkout@abc123"]["name"] == golden["actions/checkout@abc123"]["name"]
assert generated["actions/checkout@abc123"]["is_javascript"] is True
def test_markdown_report_structure(tmp_path: Path) -> None:
"""Test markdown report structure."""
renderer = Renderer(tmp_path)
workflows = {
"test.yml": WorkflowMeta(
name="Test Workflow",
path="test.yml",
triggers=["push", "pull_request"],
jobs={
"test": JobMeta(
name="test",
runs_on="ubuntu-latest",
secrets_used={"GITHUB_TOKEN"},
)
},
secrets_used={"GITHUB_TOKEN"},
)
}
actions = {
"actions/checkout@abc123": ActionManifest(
name="Checkout",
description="Checkout a Git repository",
inputs={
"repository": ActionInput(
name="repository",
description="Repository name with owner",
),
"ref": ActionInput(
name="ref",
description="The branch, tag or SHA to checkout",
),
},
)
}
analysis = {
"total_jobs": 1,
"reusable_workflows": 0,
"triggers": {"push": 1, "pull_request": 1},
"runners": {"ubuntu-latest": 1},
"secrets": {"total_unique_secrets": 1, "secrets": ["GITHUB_TOKEN"]},
}
renderer.render_markdown(workflows, actions, [], analysis)
with open(tmp_path / "report.md") as f:
content = f.read()
# Check key sections exist
assert "# GitHub Actions & Workflows Audit Report" in content
assert "## Summary" in content
assert "## Analysis" in content
assert "## Workflows" in content
assert "## Actions Inventory" in content
# Check specific content
assert "Test Workflow" in content
assert "Checkout" in content
assert "GITHUB_TOKEN" in content
assert "`ubuntu-latest`" in content
def test_empty_report_generation(tmp_path: Path) -> None:
"""Test report generation with empty data."""
renderer = Renderer(tmp_path)
renderer.render_json({}, {}, [])
renderer.render_markdown({}, {}, [], {})
# Files should exist even with empty data
assert (tmp_path / "workflows.json").exists()
assert (tmp_path / "actions.json").exists()
assert (tmp_path / "violations.json").exists()
assert (tmp_path / "report.md").exists()
with open(tmp_path / "workflows.json") as f:
assert json.load(f) == {}

105
tests/test_models.py Normal file
View File

@@ -0,0 +1,105 @@
"""Tests for models."""
from datetime import datetime
from ghaw_auditor.models import (
ActionInput,
ActionManifest,
ActionRef,
ActionType,
BaselineMeta,
PermissionLevel,
Permissions,
)
def test_action_ref_canonical_key_github() -> None:
"""Test canonical key for GitHub action."""
ref = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
resolved_sha="abc123",
source_file="test.yml",
)
key = ref.canonical_key()
assert key == "actions/checkout@abc123"
def test_action_ref_canonical_key_local() -> None:
"""Test canonical key for local action."""
ref = ActionRef(
type=ActionType.LOCAL,
path="./.github/actions/custom",
source_file="test.yml",
)
key = ref.canonical_key()
assert key == "local:./.github/actions/custom"
def test_action_ref_canonical_key_reusable_workflow() -> None:
"""Test canonical key for reusable workflow."""
ref = ActionRef(
type=ActionType.REUSABLE_WORKFLOW,
owner="owner",
repo="repo",
path=".github/workflows/reusable.yml",
ref="v1",
resolved_sha="abc123",
source_file="test.yml",
)
key = ref.canonical_key()
assert key == "owner/repo/.github/workflows/reusable.yml@abc123"
def test_action_ref_canonical_key_docker() -> None:
"""Test canonical key for Docker action."""
ref = ActionRef(
type=ActionType.DOCKER,
path="docker://alpine:3.8",
source_file="test.yml",
)
key = ref.canonical_key()
assert key == "docker:docker://alpine:3.8"
def test_permissions_model() -> None:
"""Test permissions model."""
perms = Permissions(
contents=PermissionLevel.READ,
pull_requests=PermissionLevel.WRITE,
)
assert perms.contents == PermissionLevel.READ
assert perms.pull_requests == PermissionLevel.WRITE
def test_action_manifest() -> None:
"""Test action manifest model."""
manifest = ActionManifest(
name="Test Action",
description="A test action",
inputs={"test-input": ActionInput(name="test-input", required=True)},
)
assert manifest.name == "Test Action"
assert "test-input" in manifest.inputs
assert manifest.inputs["test-input"].required is True
def test_baseline_meta() -> None:
"""Test baseline metadata model."""
meta = BaselineMeta(
auditor_version="1.0.0",
commit_sha="abc123",
timestamp=datetime.now(),
)
assert meta.auditor_version == "1.0.0"
assert meta.commit_sha == "abc123"
assert meta.schema_version == "1.0"

672
tests/test_parser.py Normal file
View File

@@ -0,0 +1,672 @@
"""Tests for parser module."""
from pathlib import Path
import pytest
from ghaw_auditor.models import ActionType, PermissionLevel
from ghaw_auditor.parser import Parser
FIXTURES_DIR = Path(__file__).parent / "fixtures"
def test_parser_initialization() -> None:
"""Test parser can be initialized."""
parser = Parser(Path.cwd())
assert parser.yaml is not None
# ============================================================================
# Workflow Parsing Tests
# ============================================================================
def test_parse_basic_workflow() -> None:
"""Test parsing a basic workflow."""
parser = Parser(FIXTURES_DIR)
workflow = parser.parse_workflow(FIXTURES_DIR / "basic-workflow.yml")
assert workflow.name == "Basic Workflow"
assert workflow.path == "basic-workflow.yml"
assert workflow.triggers == ["push"]
assert "test" in workflow.jobs
assert workflow.jobs["test"].runs_on == "ubuntu-latest"
assert len(workflow.jobs["test"].actions_used) == 1
assert workflow.jobs["test"].actions_used[0].owner == "actions"
assert workflow.jobs["test"].actions_used[0].repo == "checkout"
def test_parse_complex_workflow() -> None:
"""Test parsing a complex workflow with all features."""
parser = Parser(FIXTURES_DIR)
workflow = parser.parse_workflow(FIXTURES_DIR / "complex-workflow.yml")
# Basic metadata
assert workflow.name == "Complex Workflow"
assert set(workflow.triggers) == {"push", "pull_request", "workflow_dispatch"}
# Permissions
assert workflow.permissions is not None
assert workflow.permissions.contents == PermissionLevel.READ
assert workflow.permissions.issues == PermissionLevel.WRITE
assert workflow.permissions.pull_requests == PermissionLevel.WRITE
# Environment variables
assert workflow.env["NODE_ENV"] == "production"
assert workflow.env["API_URL"] == "https://api.example.com"
# Concurrency
assert workflow.concurrency is not None
# Defaults
assert workflow.defaults["run"]["shell"] == "bash"
# Jobs
assert "build" in workflow.jobs
assert "test" in workflow.jobs
# Build job
build = workflow.jobs["build"]
assert build.timeout_minutes == 30
assert build.permissions is not None
assert build.environment == {"name": "production", "url": "https://example.com"}
# Test job
test = workflow.jobs["test"]
assert test.needs == ["build"]
assert test.if_condition == "github.event_name == 'pull_request'"
assert test.container is not None
assert test.container.image == "node:20-alpine"
assert "NODE_ENV" in test.container.env
assert test.continue_on_error is True
# Services
assert "postgres" in test.services
assert test.services["postgres"].image == "postgres:15"
# Strategy
assert test.strategy is not None
assert test.strategy.fail_fast is False
assert test.strategy.max_parallel == 2
# Secrets extraction
assert "API_KEY" in workflow.secrets_used
assert "GITHUB_TOKEN" in workflow.secrets_used
assert "DATABASE_URL" in workflow.secrets_used
def test_parse_reusable_workflow() -> None:
"""Test parsing a reusable workflow."""
parser = Parser(FIXTURES_DIR)
workflow = parser.parse_workflow(FIXTURES_DIR / "reusable-workflow.yml")
assert workflow.is_reusable is True
assert workflow.reusable_contract is not None
# Check inputs
assert "environment" in workflow.reusable_contract.inputs
assert workflow.reusable_contract.inputs["environment"]["required"] is True
assert workflow.reusable_contract.inputs["debug"]["default"] is False
# Check outputs
assert "deployment-id" in workflow.reusable_contract.outputs
# Check secrets
assert "deploy-token" in workflow.reusable_contract.secrets
assert workflow.reusable_contract.secrets["deploy-token"]["required"] is True
def test_parse_workflow_with_empty_workflow_call() -> None:
"""Test parsing workflow with empty workflow_call."""
parser = Parser(FIXTURES_DIR)
workflow = parser.parse_workflow(FIXTURES_DIR / "empty-workflow-call.yml")
assert workflow.is_reusable is True
# Empty workflow_call should result in None contract
assert workflow.reusable_contract is None or workflow.reusable_contract.inputs == {}
def test_parse_empty_workflow() -> None:
"""Test parsing an empty workflow file raises error."""
parser = Parser(FIXTURES_DIR)
with pytest.raises(ValueError, match="Empty workflow file"):
parser.parse_workflow(FIXTURES_DIR / "invalid-workflow.yml")
# ============================================================================
# Action Reference Parsing Tests
# ============================================================================
def test_parse_action_ref_github() -> None:
"""Test parsing GitHub action reference."""
parser = Parser(Path.cwd())
ref = parser._parse_action_ref("actions/checkout@v4", Path("test.yml"))
assert ref.type == ActionType.GITHUB
assert ref.owner == "actions"
assert ref.repo == "checkout"
assert ref.ref == "v4"
def test_parse_action_ref_github_with_path() -> None:
"""Test parsing GitHub action reference with path (monorepo)."""
parser = Parser(Path.cwd())
ref = parser._parse_action_ref("owner/repo/path/to/action@v1", Path("test.yml"))
assert ref.type == ActionType.GITHUB
assert ref.owner == "owner"
assert ref.repo == "repo"
assert ref.path == "path/to/action"
assert ref.ref == "v1"
def test_parse_action_ref_local() -> None:
"""Test parsing local action reference."""
parser = Parser(Path.cwd())
ref = parser._parse_action_ref("./.github/actions/custom", Path("test.yml"))
assert ref.type == ActionType.LOCAL
assert ref.path == "./.github/actions/custom"
def test_parse_action_ref_docker() -> None:
"""Test parsing Docker action reference."""
parser = Parser(Path.cwd())
ref = parser._parse_action_ref("docker://alpine:3.8", Path("test.yml"))
assert ref.type == ActionType.DOCKER
assert ref.path == "docker://alpine:3.8"
def test_parse_action_ref_invalid() -> None:
"""Test parsing invalid action reference raises error."""
parser = Parser(Path.cwd())
with pytest.raises(ValueError, match="Invalid action reference"):
parser._parse_action_ref("invalid-ref", Path("test.yml"))
def test_extract_secrets() -> None:
"""Test extracting secrets from content."""
parser = Parser(Path.cwd())
content = """
env:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
API_KEY: ${{ secrets.API_KEY }}
"""
secrets = parser._extract_secrets(content)
assert "GITHUB_TOKEN" in secrets
assert "API_KEY" in secrets
assert len(secrets) == 2
# ============================================================================
# Trigger Extraction Tests
# ============================================================================
def test_extract_triggers_string() -> None:
"""Test extracting triggers from string."""
parser = Parser(Path.cwd())
triggers = parser._extract_triggers("push")
assert triggers == ["push"]
def test_extract_triggers_list() -> None:
"""Test extracting triggers from list."""
parser = Parser(Path.cwd())
triggers = parser._extract_triggers(["push", "pull_request"])
assert triggers == ["push", "pull_request"]
def test_extract_triggers_dict() -> None:
"""Test extracting triggers from dict."""
parser = Parser(Path.cwd())
triggers = parser._extract_triggers(
{
"push": {"branches": ["main"]},
"pull_request": None,
"workflow_dispatch": None,
}
)
assert set(triggers) == {"push", "pull_request", "workflow_dispatch"}
def test_extract_triggers_empty() -> None:
"""Test extracting triggers from empty value."""
parser = Parser(Path.cwd())
triggers = parser._extract_triggers(None)
assert triggers == []
# ============================================================================
# Permissions Parsing Tests
# ============================================================================
def test_parse_permissions_none() -> None:
"""Test parsing None permissions."""
parser = Parser(Path.cwd())
perms = parser._parse_permissions(None)
assert perms is None
def test_parse_permissions_string() -> None:
"""Test parsing string permissions (read-all/write-all)."""
parser = Parser(Path.cwd())
perms = parser._parse_permissions("read-all")
# Should return an empty Permissions object
assert perms is not None
def test_parse_permissions_dict() -> None:
"""Test parsing dict permissions."""
parser = Parser(Path.cwd())
perms = parser._parse_permissions(
{
"contents": "read",
"issues": "write",
"pull_requests": "write",
}
)
assert perms is not None
assert perms.contents == PermissionLevel.READ
assert perms.issues == PermissionLevel.WRITE
assert perms.pull_requests == PermissionLevel.WRITE
# ============================================================================
# Job Parsing Tests
# ============================================================================
def test_parse_job_with_none_data() -> None:
"""Test parsing job with None data."""
parser = Parser(Path.cwd())
job = parser._parse_job("test", None, Path("test.yml"), "")
assert job.name == "test"
assert job.runs_on == "ubuntu-latest" # default value
def test_parse_job_needs_string_vs_list() -> None:
"""Test parsing job needs as string vs list."""
parser = Parser(Path.cwd())
# String needs
job1 = parser._parse_job("test", {"needs": "build"}, Path("test.yml"), "")
assert job1.needs == ["build"]
# List needs
job2 = parser._parse_job("test", {"needs": ["build", "lint"]}, Path("test.yml"), "")
assert job2.needs == ["build", "lint"]
def test_parse_job_with_none_steps() -> None:
"""Test parsing job with None steps."""
parser = Parser(Path.cwd())
job = parser._parse_job(
"test",
{"steps": [None, {"uses": "actions/checkout@v4"}]},
Path("test.yml"),
"",
)
# Should skip None steps
assert len(job.actions_used) == 1
assert job.actions_used[0].repo == "checkout"
# ============================================================================
# Container/Services/Strategy Parsing Tests
# ============================================================================
def test_parse_container_none() -> None:
"""Test parsing None container."""
parser = Parser(Path.cwd())
container = parser._parse_container(None)
assert container is None
def test_parse_container_string() -> None:
"""Test parsing container from string."""
parser = Parser(Path.cwd())
container = parser._parse_container("ubuntu:latest")
assert container is not None
assert container.image == "ubuntu:latest"
def test_parse_container_dict() -> None:
"""Test parsing container from dict."""
parser = Parser(Path.cwd())
container = parser._parse_container(
{
"image": "node:20",
"credentials": {"username": "user", "password": "pass"},
"env": {"NODE_ENV": "test"},
"ports": [8080],
"volumes": ["/tmp:/tmp"],
"options": "--cpus 2",
}
)
assert container is not None
assert container.image == "node:20"
assert container.credentials == {"username": "user", "password": "pass"}
assert container.env["NODE_ENV"] == "test"
assert container.ports == [8080]
assert container.volumes == ["/tmp:/tmp"]
assert container.options == "--cpus 2"
def test_parse_services_none() -> None:
"""Test parsing None services."""
parser = Parser(Path.cwd())
services = parser._parse_services(None)
assert services == {}
def test_parse_services_string_image() -> None:
"""Test parsing service with string image."""
parser = Parser(Path.cwd())
services = parser._parse_services({"postgres": "postgres:15"})
assert "postgres" in services
assert services["postgres"].name == "postgres"
assert services["postgres"].image == "postgres:15"
def test_parse_services_dict() -> None:
"""Test parsing service with dict config."""
parser = Parser(Path.cwd())
services = parser._parse_services(
{
"redis": {
"image": "redis:alpine",
"ports": [6379],
"options": "--health-cmd 'redis-cli ping'",
}
}
)
assert "redis" in services
assert services["redis"].image == "redis:alpine"
assert services["redis"].ports == [6379]
def test_parse_strategy_none() -> None:
"""Test parsing None strategy."""
parser = Parser(Path.cwd())
strategy = parser._parse_strategy(None)
assert strategy is None
def test_parse_strategy_matrix() -> None:
"""Test parsing strategy with matrix."""
parser = Parser(Path.cwd())
strategy = parser._parse_strategy(
{
"matrix": {"node-version": [18, 20], "os": ["ubuntu-latest", "windows-latest"]},
"fail-fast": False,
"max-parallel": 4,
}
)
assert strategy is not None
assert strategy.matrix == {"node-version": [18, 20], "os": ["ubuntu-latest", "windows-latest"]}
assert strategy.fail_fast is False
assert strategy.max_parallel == 4
# ============================================================================
# Action Manifest Parsing Tests
# ============================================================================
def test_parse_composite_action() -> None:
"""Test parsing a composite action."""
parser = Parser(FIXTURES_DIR)
action = parser.parse_action(FIXTURES_DIR / "composite-action.yml")
assert action.name == "Composite Action"
assert action.description == "A composite action example"
assert action.author == "Test Author"
assert action.is_composite is True
assert action.is_docker is False
assert action.is_javascript is False
# Check inputs
assert "message" in action.inputs
assert action.inputs["message"].required is True
assert "debug" in action.inputs
assert action.inputs["debug"].required is False
assert action.inputs["debug"].default == "false"
# Check outputs
assert "result" in action.outputs
assert action.outputs["result"].description == "Action result"
# Check branding
assert action.branding is not None
def test_parse_docker_action() -> None:
"""Test parsing a Docker action."""
parser = Parser(FIXTURES_DIR)
action = parser.parse_action(FIXTURES_DIR / "docker-action.yml")
assert action.name == "Docker Action"
assert action.is_docker is True
assert action.is_composite is False
assert action.is_javascript is False
# Check inputs
assert "dockerfile" in action.inputs
assert action.inputs["dockerfile"].default == "Dockerfile"
# Check outputs
assert "image-id" in action.outputs
def test_parse_javascript_action() -> None:
"""Test parsing a JavaScript action."""
parser = Parser(FIXTURES_DIR)
action = parser.parse_action(FIXTURES_DIR / "javascript-action.yml")
assert action.name == "JavaScript Action"
assert action.is_javascript is True
assert action.is_composite is False
assert action.is_docker is False
# Check runs config
assert action.runs["using"] == "node20"
assert action.runs["main"] == "dist/index.js"
def test_parse_action_with_various_defaults() -> None:
"""Test parsing action with different input default types."""
parser = Parser(FIXTURES_DIR)
action = parser.parse_action(FIXTURES_DIR / "action-with-defaults.yml")
assert action.name == "Action with Various Defaults"
# String default
assert action.inputs["string-input"].default == "hello"
# Boolean default
assert action.inputs["boolean-input"].default is True
# Number default
assert action.inputs["number-input"].default == 42
# No default
assert action.inputs["no-default"].required is True
def test_parse_action_empty_inputs_outputs() -> None:
"""Test parsing action with empty inputs/outputs."""
parser = Parser(FIXTURES_DIR)
action = parser.parse_action(FIXTURES_DIR / "composite-action.yml")
# Even if action has inputs/outputs, the parser should handle missing ones
assert action.inputs is not None
assert action.outputs is not None
def test_parse_empty_action() -> None:
"""Test parsing an empty action file raises error."""
parser = Parser(FIXTURES_DIR)
with pytest.raises(ValueError, match="Empty action file"):
parser.parse_action(FIXTURES_DIR / "invalid-action.yml")
# ============================================================================
# Reusable Workflow Tests
# ============================================================================
def test_parse_reusable_workflow_caller() -> None:
"""Test parsing workflow that calls reusable workflows."""
parser = Parser(FIXTURES_DIR)
workflow = parser.parse_workflow(FIXTURES_DIR / "reusable-workflow-caller.yml")
assert workflow.name == "Reusable Workflow Caller"
assert "call-workflow" in workflow.jobs
assert "call-workflow-inherit" in workflow.jobs
assert "call-local-workflow" in workflow.jobs
# Test job with explicit secrets
call_job = workflow.jobs["call-workflow"]
assert call_job.uses == "owner/repo/.github/workflows/deploy.yml@v1"
assert call_job.with_inputs["environment"] == "production"
assert call_job.with_inputs["debug"] is False
assert call_job.with_inputs["version"] == "1.2.3"
assert call_job.secrets_passed is not None
assert "deploy-token" in call_job.secrets_passed
assert call_job.inherit_secrets is False
# Verify reusable workflow tracked as action
assert len(call_job.actions_used) == 1
assert call_job.actions_used[0].type == ActionType.REUSABLE_WORKFLOW
assert call_job.actions_used[0].owner == "owner"
assert call_job.actions_used[0].repo == "repo"
assert call_job.actions_used[0].path == ".github/workflows/deploy.yml"
assert call_job.actions_used[0].ref == "v1"
# Test job with inherited secrets
inherit_job = workflow.jobs["call-workflow-inherit"]
assert inherit_job.uses == "owner/repo/.github/workflows/test.yml@main"
assert inherit_job.inherit_secrets is True
assert inherit_job.secrets_passed is None
# Test local reusable workflow
local_job = workflow.jobs["call-local-workflow"]
assert local_job.uses == "./.github/workflows/shared.yml"
assert local_job.actions_used[0].type == ActionType.REUSABLE_WORKFLOW
assert local_job.actions_used[0].path == "./.github/workflows/shared.yml"
def test_parse_job_with_outputs() -> None:
"""Test parsing job with outputs."""
parser = Parser(FIXTURES_DIR)
workflow = parser.parse_workflow(FIXTURES_DIR / "job-with-outputs.yml")
assert "build" in workflow.jobs
build_job = workflow.jobs["build"]
assert build_job.outputs is not None
assert "version" in build_job.outputs
assert "artifact-url" in build_job.outputs
assert "status" in build_job.outputs
assert build_job.outputs["status"] == "success"
def test_parse_reusable_workflow_ref_local() -> None:
"""Test parsing local reusable workflow reference."""
parser = Parser(Path.cwd())
ref = parser._parse_reusable_workflow_ref("./.github/workflows/deploy.yml", Path("test.yml"))
assert ref.type == ActionType.REUSABLE_WORKFLOW
assert ref.path == "./.github/workflows/deploy.yml"
def test_parse_reusable_workflow_ref_github() -> None:
"""Test parsing GitHub reusable workflow reference."""
parser = Parser(Path.cwd())
ref = parser._parse_reusable_workflow_ref("actions/reusable/.github/workflows/build.yml@v1", Path("test.yml"))
assert ref.type == ActionType.REUSABLE_WORKFLOW
assert ref.owner == "actions"
assert ref.repo == "reusable"
assert ref.path == ".github/workflows/build.yml"
assert ref.ref == "v1"
def test_parse_reusable_workflow_ref_invalid() -> None:
"""Test parsing invalid reusable workflow reference raises error."""
parser = Parser(Path.cwd())
with pytest.raises(ValueError, match="Invalid reusable workflow reference"):
parser._parse_reusable_workflow_ref("invalid-workflow-ref", Path("test.yml"))
def test_parse_permissions_invalid_type(tmp_path: Path) -> None:
"""Test parsing permissions with invalid type."""
parser = Parser(tmp_path)
# Test with boolean (invalid type)
result = parser._parse_permissions(True)
assert result is None
# Test with int (invalid type)
result = parser._parse_permissions(123)
assert result is None
# Test with list (invalid type)
result = parser._parse_permissions(["read", "write"])
assert result is None
def test_parse_workflow_with_boolean_and_number_env(tmp_path: Path) -> None:
"""Test parsing workflow with boolean and number values in env."""
workflow_file = tmp_path / "test.yml"
workflow_file.write_text(
"""
name: Test
on: push
env:
STRING_VAR: "hello"
BOOL_VAR: true
NUMBER_VAR: 42
FLOAT_VAR: 3.14
jobs:
test:
runs-on: ubuntu-latest
steps:
- run: echo test
"""
)
parser = Parser(tmp_path)
workflow = parser.parse_workflow(workflow_file)
assert workflow.env["STRING_VAR"] == "hello"
assert workflow.env["BOOL_VAR"] is True
assert workflow.env["NUMBER_VAR"] == 42
assert workflow.env["FLOAT_VAR"] == 3.14

256
tests/test_policy.py Normal file
View File

@@ -0,0 +1,256 @@
"""Tests for policy validator."""
from ghaw_auditor.models import ActionRef, ActionType, JobMeta, Policy, WorkflowMeta
from ghaw_auditor.policy import PolicyValidator
def test_policy_validator_initialization() -> None:
"""Test validator initialization."""
policy = Policy()
validator = PolicyValidator(policy)
assert validator.policy == policy
def test_pinned_actions_validation() -> None:
"""Test pinned actions policy."""
policy = Policy(require_pinned_actions=True)
validator = PolicyValidator(policy)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={
"test": JobMeta(
name="test",
runs_on="ubuntu-latest",
actions_used=[
ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4", # Not pinned to SHA
source_file="test.yml",
)
],
)
},
actions_used=[
ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
)
],
)
violations = validator.validate({"test.yml": workflow}, [])
assert len(violations) > 0
assert violations[0]["rule"] == "require_pinned_actions"
assert violations[0]["severity"] == "error"
def test_pinned_actions_with_sha() -> None:
"""Test pinned actions with SHA pass validation."""
policy = Policy(require_pinned_actions=True)
validator = PolicyValidator(policy)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={
"test": JobMeta(
name="test",
runs_on="ubuntu-latest",
actions_used=[
ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="abc123def456789012345678901234567890abcd", # SHA
source_file="test.yml",
)
],
)
},
actions_used=[],
)
violations = validator.validate({"test.yml": workflow}, [])
assert len(violations) == 0
def test_branch_refs_validation() -> None:
"""Test forbid branch refs policy."""
policy = Policy(require_pinned_actions=False, forbid_branch_refs=True)
validator = PolicyValidator(policy)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={
"test": JobMeta(
name="test",
runs_on="ubuntu-latest",
actions_used=[
ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="main",
source_file="test.yml",
)
],
)
},
actions_used=[
ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="main",
source_file="test.yml",
)
],
)
violations = validator.validate({"test.yml": workflow}, [])
assert len(violations) > 0
assert violations[0]["rule"] == "forbid_branch_refs"
def test_allowed_actions_validation() -> None:
"""Test allowed actions whitelist."""
policy = Policy(require_pinned_actions=False, allowed_actions=["actions/*", "github/*"])
validator = PolicyValidator(policy)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={
"test": JobMeta(
name="test",
runs_on="ubuntu-latest",
actions_used=[
ActionRef(
type=ActionType.GITHUB,
owner="thirdparty",
repo="action",
ref="v1",
source_file="test.yml",
)
],
)
},
actions_used=[
ActionRef(
type=ActionType.GITHUB,
owner="thirdparty",
repo="action",
ref="v1",
source_file="test.yml",
)
],
)
violations = validator.validate({"test.yml": workflow}, [])
assert len(violations) > 0
assert violations[0]["rule"] == "allowed_actions"
def test_denied_actions_validation() -> None:
"""Test denied actions blacklist."""
policy = Policy(require_pinned_actions=False, denied_actions=["dangerous/*"])
validator = PolicyValidator(policy)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={
"test": JobMeta(
name="test",
runs_on="ubuntu-latest",
actions_used=[
ActionRef(
type=ActionType.GITHUB,
owner="dangerous",
repo="action",
ref="v1",
source_file="test.yml",
)
],
)
},
actions_used=[
ActionRef(
type=ActionType.GITHUB,
owner="dangerous",
repo="action",
ref="v1",
source_file="test.yml",
)
],
)
violations = validator.validate({"test.yml": workflow}, [])
assert len(violations) > 0
assert violations[0]["rule"] == "denied_actions"
def test_pr_concurrency_validation() -> None:
"""Test PR concurrency requirement."""
policy = Policy(require_concurrency_on_pr=True)
validator = PolicyValidator(policy)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["pull_request"],
concurrency=None,
jobs={},
)
violations = validator.validate({"test.yml": workflow}, [])
assert len(violations) > 0
assert violations[0]["rule"] == "require_concurrency_on_pr"
assert violations[0]["severity"] == "warning"
def test_pr_concurrency_with_group() -> None:
"""Test PR with concurrency group passes."""
policy = Policy(require_concurrency_on_pr=True)
validator = PolicyValidator(policy)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["pull_request"],
concurrency={"group": "${{ github.workflow }}"},
jobs={},
)
violations = validator.validate({"test.yml": workflow}, [])
assert len(violations) == 0
def test_matches_pattern() -> None:
"""Test pattern matching."""
policy = Policy()
validator = PolicyValidator(policy)
assert validator._matches_pattern("actions/checkout", "actions/*") is True
assert validator._matches_pattern("github/codeql-action", "github/*") is True
assert validator._matches_pattern("thirdparty/action", "actions/*") is False

755
tests/test_renderer.py Normal file
View File

@@ -0,0 +1,755 @@
"""Tests for renderer."""
import json
from pathlib import Path
from ghaw_auditor.models import ActionManifest, JobMeta, WorkflowMeta
from ghaw_auditor.renderer import Renderer
def test_renderer_initialization(tmp_path: Path) -> None:
"""Test renderer initialization."""
renderer = Renderer(tmp_path)
assert renderer.output_dir == tmp_path
assert renderer.output_dir.exists()
def test_render_json(tmp_path: Path) -> None:
"""Test JSON rendering."""
renderer = Renderer(tmp_path)
workflows = {
"test.yml": WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={"test": JobMeta(name="test", runs_on="ubuntu-latest")},
)
}
actions = {
"actions/checkout@v4": ActionManifest(
name="Checkout",
description="Checkout code",
)
}
violations = [
{
"workflow": "test.yml",
"rule": "test_rule",
"severity": "error",
"message": "Test violation",
}
]
renderer.render_json(workflows, actions, violations)
# Check files exist
assert (tmp_path / "workflows.json").exists()
assert (tmp_path / "actions.json").exists()
assert (tmp_path / "violations.json").exists()
# Verify JSON content
with open(tmp_path / "workflows.json") as f:
data = json.load(f)
assert "test.yml" in data
assert data["test.yml"]["name"] == "Test"
with open(tmp_path / "actions.json") as f:
data = json.load(f)
assert "actions/checkout@v4" in data
with open(tmp_path / "violations.json") as f:
data = json.load(f)
assert len(data) == 1
assert data[0]["rule"] == "test_rule"
def test_render_markdown(tmp_path: Path) -> None:
"""Test Markdown rendering."""
renderer = Renderer(tmp_path)
workflows = {
"test.yml": WorkflowMeta(
name="Test Workflow",
path="test.yml",
triggers=["push", "pull_request"],
jobs={"test": JobMeta(name="test", runs_on="ubuntu-latest")},
)
}
actions = {
"actions/checkout@v4": ActionManifest(
name="Checkout",
description="Checkout repository",
)
}
violations = [
{
"workflow": "test.yml",
"rule": "require_pinned_actions",
"severity": "error",
"message": "Action not pinned to SHA",
}
]
analysis = {
"total_jobs": 1,
"reusable_workflows": 0,
"triggers": {"push": 1, "pull_request": 1},
"runners": {"ubuntu-latest": 1},
"secrets": {"total_unique_secrets": 0, "secrets": []},
}
renderer.render_markdown(workflows, actions, violations, analysis)
report_file = tmp_path / "report.md"
assert report_file.exists()
content = report_file.read_text()
assert "# GitHub Actions & Workflows Audit Report" in content
assert "Test Workflow" in content
assert "Checkout" in content
assert "require_pinned_actions" in content
assert "push" in content
assert "pull_request" in content
def test_render_empty_data(tmp_path: Path) -> None:
"""Test rendering with empty data."""
renderer = Renderer(tmp_path)
renderer.render_json({}, {}, [])
assert (tmp_path / "workflows.json").exists()
assert (tmp_path / "actions.json").exists()
assert (tmp_path / "violations.json").exists()
with open(tmp_path / "workflows.json") as f:
assert json.load(f) == {}
with open(tmp_path / "violations.json") as f:
assert json.load(f) == []
def test_render_markdown_with_actions_used(tmp_path: Path) -> None:
"""Test Markdown rendering with job actions_used."""
from ghaw_auditor.models import ActionRef, ActionType
renderer = Renderer(tmp_path)
# Create a job with actions_used
action_ref = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
)
job = JobMeta(
name="test",
runs_on="ubuntu-latest",
actions_used=[action_ref],
)
workflows = {
"test.yml": WorkflowMeta(
name="Test Workflow",
path="test.yml",
triggers=["push"],
jobs={"test": job},
)
}
renderer.render_markdown(workflows, {}, [], {})
report_file = tmp_path / "report.md"
assert report_file.exists()
content = report_file.read_text()
# Should render the actions used with link
assert "Actions used:" in content
assert "[actions/checkout](#actions-checkout)" in content
def test_render_markdown_with_secrets(tmp_path: Path) -> None:
"""Test Markdown rendering with secrets."""
renderer = Renderer(tmp_path)
workflows = {
"test.yml": WorkflowMeta(
name="Test Workflow",
path="test.yml",
triggers=["push"],
jobs={},
)
}
analysis = {
"total_jobs": 0,
"reusable_workflows": 0,
"secrets": {
"total_unique_secrets": 2,
"secrets": ["API_KEY", "DATABASE_URL"],
},
}
renderer.render_markdown(workflows, {}, [], analysis)
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should render secrets
assert "API_KEY" in content
assert "DATABASE_URL" in content
def test_render_markdown_with_action_inputs(tmp_path: Path) -> None:
"""Test Markdown rendering with action inputs."""
from ghaw_auditor.models import ActionInput
renderer = Renderer(tmp_path)
action = ActionManifest(
name="Test Action",
description="A test action",
inputs={
"token": ActionInput(
name="token",
description="GitHub token",
required=True,
),
"debug": ActionInput(
name="debug",
description="Enable debug mode",
required=False,
),
},
)
renderer.render_markdown({}, {"test/action@v1": action}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should render inputs with required/optional status
assert "token" in content
assert "required" in content
assert "debug" in content
assert "optional" in content
assert "GitHub token" in content
assert "Enable debug mode" in content
def test_render_markdown_with_action_anchors(tmp_path: Path) -> None:
"""Test that action anchors are created for linking."""
from ghaw_auditor.models import ActionRef, ActionType
renderer = Renderer(tmp_path)
action_ref = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
resolved_sha="abc123",
source_file="test.yml",
)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={},
actions_used=[action_ref],
)
action = ActionManifest(
name="Checkout",
description="Checkout code",
)
renderer.render_markdown({"test.yml": workflow}, {"actions/checkout@abc123": action}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should have anchor tag
assert '<a id="actions-checkout"></a>' in content
def test_render_markdown_with_repo_urls(tmp_path: Path) -> None:
"""Test that GitHub action repository URLs are included."""
from ghaw_auditor.models import ActionRef, ActionType
renderer = Renderer(tmp_path)
action_ref = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="setup-node",
ref="v4",
resolved_sha="def456",
source_file="test.yml",
)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={},
actions_used=[action_ref],
)
action = ActionManifest(
name="Setup Node",
description="Setup Node.js",
)
renderer.render_markdown({"test.yml": workflow}, {"actions/setup-node@def456": action}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should have repository link
assert "https://github.com/actions/setup-node" in content
assert "[actions/setup-node](https://github.com/actions/setup-node)" in content
def test_render_markdown_with_details_tags(tmp_path: Path) -> None:
"""Test that inputs are wrapped in details tags."""
from ghaw_auditor.models import ActionInput, ActionRef, ActionType
renderer = Renderer(tmp_path)
action_ref = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={},
actions_used=[action_ref],
)
action = ActionManifest(
name="Checkout",
description="Checkout code",
inputs={
"token": ActionInput(
name="token",
description="GitHub token",
required=False,
),
},
)
renderer.render_markdown({"test.yml": workflow}, {"actions/checkout@v4": action}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should have details tags
assert "<details>" in content
assert "<summary><b>Inputs</b></summary>" in content
assert "</details>" in content
def test_render_markdown_with_job_action_links(tmp_path: Path) -> None:
"""Test that job actions are linked to inventory."""
from ghaw_auditor.models import ActionRef, ActionType
renderer = Renderer(tmp_path)
action_ref = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
)
job = JobMeta(
name="test",
runs_on="ubuntu-latest",
actions_used=[action_ref],
)
workflow = WorkflowMeta(
name="CI",
path="ci.yml",
triggers=["push"],
jobs={"test": job},
actions_used=[action_ref],
)
action = ActionManifest(
name="Checkout",
description="Checkout code",
)
renderer.render_markdown({"ci.yml": workflow}, {"actions/checkout@v4": action}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should have action link in jobs section
assert "Actions used:" in content
assert "[actions/checkout](#actions-checkout) (GitHub)" in content
def test_create_action_anchor() -> None:
"""Test anchor creation from action keys."""
# GitHub action
assert Renderer._create_action_anchor("actions/checkout@abc123") == "actions-checkout"
# Local action
assert Renderer._create_action_anchor("local:./sync-labels") == "local-sync-labels"
# Docker action
assert Renderer._create_action_anchor("docker://alpine:3.8") == "docker-alpine-3-8"
# Long SHA
assert (
Renderer._create_action_anchor("actions/setup-node@1234567890abcdef1234567890abcdef12345678")
== "actions-setup-node"
)
def test_get_action_repo_url() -> None:
"""Test repository URL generation."""
from ghaw_auditor.models import ActionRef, ActionType
# GitHub action
github_action = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
)
assert Renderer._get_action_repo_url(github_action) == "https://github.com/actions/checkout"
# Local action (no URL)
local_action = ActionRef(
type=ActionType.LOCAL,
path="./my-action",
source_file="test.yml",
)
assert Renderer._get_action_repo_url(local_action) is None
# Docker action (no URL)
docker_action = ActionRef(
type=ActionType.DOCKER,
path="docker://alpine:3.8",
source_file="test.yml",
)
assert Renderer._get_action_repo_url(docker_action) is None
def test_render_markdown_with_docker_action(tmp_path: Path) -> None:
"""Test Markdown rendering with Docker action in jobs."""
from ghaw_auditor.models import ActionRef, ActionType
renderer = Renderer(tmp_path)
docker_action = ActionRef(
type=ActionType.DOCKER,
path="docker://alpine:3.8",
source_file="test.yml",
)
job = JobMeta(
name="test",
runs_on="ubuntu-latest",
actions_used=[docker_action],
)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={"test": job},
)
renderer.render_markdown({"test.yml": workflow}, {}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should show Docker action with correct type label
assert "Actions used:" in content
assert "(Docker)" in content
assert "docker://alpine:3.8" in content
def test_render_markdown_with_reusable_workflow(tmp_path: Path) -> None:
"""Test Markdown rendering with reusable workflow in jobs."""
from ghaw_auditor.models import ActionRef, ActionType
renderer = Renderer(tmp_path)
reusable_wf = ActionRef(
type=ActionType.REUSABLE_WORKFLOW,
owner="org",
repo="workflows",
path=".github/workflows/reusable.yml",
ref="main",
source_file="test.yml",
)
job = JobMeta(
name="test",
runs_on="ubuntu-latest",
actions_used=[reusable_wf],
)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={"test": job},
)
renderer.render_markdown({"test.yml": workflow}, {}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should show reusable workflow with correct type label
assert "Actions used:" in content
assert "(Reusable Workflow)" in content
assert ".github/workflows/reusable.yml" in content
def test_render_markdown_with_docker_action_in_inventory(tmp_path: Path) -> None:
"""Test Markdown rendering with Docker action in inventory."""
from ghaw_auditor.models import ActionRef, ActionType
renderer = Renderer(tmp_path)
docker_action_ref = ActionRef(
type=ActionType.DOCKER,
path="docker://node:18-alpine",
source_file="test.yml",
)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={},
actions_used=[docker_action_ref],
)
action_manifest = ActionManifest(
name="Node Alpine",
description="Node.js on Alpine Linux",
)
renderer.render_markdown({"test.yml": workflow}, {"docker:docker://node:18-alpine": action_manifest}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Docker actions shouldn't have repository links or Local Action type
assert "**Repository:**" not in content or "node:18-alpine" not in content
assert "Node Alpine" in content
def test_render_markdown_with_local_action_without_path(tmp_path: Path) -> None:
"""Test Markdown rendering with LOCAL action that has no path."""
from ghaw_auditor.models import ActionRef, ActionType
renderer = Renderer(tmp_path)
local_action = ActionRef(
type=ActionType.LOCAL,
path=None,
source_file="test.yml",
)
job = JobMeta(
name="test",
runs_on="ubuntu-latest",
actions_used=[local_action],
)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={"test": job},
)
renderer.render_markdown({"test.yml": workflow}, {}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should show "local" as display name when path is None
assert "Actions used:" in content
assert "[local](#local-none) (Local)" in content
def test_render_markdown_with_local_action_in_inventory(tmp_path: Path) -> None:
"""Test Markdown rendering with LOCAL action in inventory showing Type label."""
from ghaw_auditor.models import ActionRef, ActionType
renderer = Renderer(tmp_path)
local_action_ref = ActionRef(
type=ActionType.LOCAL,
path="./my-custom-action",
source_file="test.yml",
)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={},
actions_used=[local_action_ref],
)
action_manifest = ActionManifest(
name="My Custom Action",
description="A custom local action",
)
renderer.render_markdown({"test.yml": workflow}, {"local:./my-custom-action": action_manifest}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Local actions should have "Type: Local Action" label
assert "**Type:** Local Action" in content
assert "My Custom Action" in content
def test_render_markdown_with_job_permissions(tmp_path: Path) -> None:
"""Test Markdown rendering with job permissions."""
from ghaw_auditor.models import PermissionLevel, Permissions
renderer = Renderer(tmp_path)
job = JobMeta(
name="test",
runs_on="ubuntu-latest",
permissions=Permissions(
contents=PermissionLevel.READ,
issues=PermissionLevel.WRITE,
security_events=PermissionLevel.WRITE,
),
)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={"test": job},
)
renderer.render_markdown({"test.yml": workflow}, {}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should show permissions
assert "Permissions:" in content
assert "`contents`: read" in content
assert "`issues`: write" in content
assert "`security-events`: write" in content
def test_render_markdown_without_job_permissions(tmp_path: Path) -> None:
"""Test Markdown rendering with job that has no permissions set."""
renderer = Renderer(tmp_path)
job = JobMeta(
name="test",
runs_on="ubuntu-latest",
permissions=None,
)
workflow = WorkflowMeta(
name="Test",
path="test.yml",
triggers=["push"],
jobs={"test": job},
)
renderer.render_markdown({"test.yml": workflow}, {}, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should not show permissions section
assert "Permissions:" not in content
def test_render_markdown_with_workflows_using_action(tmp_path: Path) -> None:
"""Test that actions show which workflows use them."""
from ghaw_auditor.models import ActionRef, ActionType
renderer = Renderer(tmp_path)
# Create an action reference
action_ref = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file=".github/workflows/ci.yml",
)
# Create two workflows that use the same action
workflow1 = WorkflowMeta(
name="CI Workflow",
path=".github/workflows/ci.yml",
triggers=["push"],
actions_used=[action_ref],
)
workflow2 = WorkflowMeta(
name="Deploy Workflow",
path=".github/workflows/deploy.yml",
triggers=["push"],
actions_used=[action_ref],
)
# Create the action manifest
action = ActionManifest(
name="Checkout",
description="Checkout repository",
)
workflows = {
".github/workflows/ci.yml": workflow1,
".github/workflows/deploy.yml": workflow2,
}
actions = {"actions/checkout@v4": action}
renderer.render_markdown(workflows, actions, [], {})
report_file = tmp_path / "report.md"
content = report_file.read_text()
# Should show "Used in Workflows" section
assert "Used in Workflows" in content
assert "CI Workflow" in content
assert "Deploy Workflow" in content
assert ".github/workflows/ci.yml" in content
assert ".github/workflows/deploy.yml" in content
# Should have links to workflow sections
assert "[CI Workflow](#ci-workflow)" in content
assert "[Deploy Workflow](#deploy-workflow)" in content

531
tests/test_resolver.py Normal file
View File

@@ -0,0 +1,531 @@
"""Tests for resolver with mocked API."""
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
from ghaw_auditor.cache import Cache
from ghaw_auditor.github_client import GitHubClient
from ghaw_auditor.models import ActionRef, ActionType
from ghaw_auditor.resolver import Resolver
@pytest.fixture
def mock_github_client() -> Mock:
"""Create mock GitHub client."""
client = Mock(spec=GitHubClient)
client.get_ref_sha.return_value = "abc123def456"
client.get_file_content.return_value = """
name: Test Action
description: A test action
runs:
using: node20
main: index.js
"""
return client
@pytest.fixture
def temp_cache(tmp_path: Path) -> Cache:
"""Create temporary cache."""
return Cache(tmp_path / "cache")
def test_resolver_initialization(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolver initialization."""
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
assert resolver.github_client == mock_github_client
assert resolver.cache == temp_cache
assert resolver.repo_path == tmp_path
def test_resolve_github_action(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving GitHub action."""
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
)
key, manifest = resolver._resolve_github_action(action)
assert key == "actions/checkout@abc123def456"
assert manifest is not None
assert manifest.name == "Test Action"
assert action.resolved_sha == "abc123def456"
def test_resolve_local_action(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving local action."""
# Create local action
action_dir = tmp_path / ".github" / "actions" / "custom"
action_dir.mkdir(parents=True)
action_file = action_dir / "action.yml"
# Write valid composite action YAML
action_file.write_text(
"""name: Custom Action
description: Local action
runs:
using: composite
steps:
- name: Test step
run: echo test
shell: bash
"""
)
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.LOCAL,
path="./.github/actions/custom", # With leading ./
source_file="test.yml",
)
key, manifest = resolver._resolve_local_action(action)
assert key == "local:./.github/actions/custom"
assert manifest is not None
assert manifest.name == "Custom Action"
assert manifest.is_composite is True
def test_resolve_docker_action(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving Docker action."""
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.DOCKER,
path="docker://alpine:3.8",
source_file="test.yml",
)
key, manifest = resolver._resolve_action(action)
assert key == "docker:docker://alpine:3.8"
assert manifest is None # Docker actions don't have manifests
def test_resolve_actions_parallel(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test parallel action resolution."""
resolver = Resolver(mock_github_client, temp_cache, tmp_path, concurrency=2)
actions = [
ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
),
ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="setup-node",
ref="v4",
source_file="test.yml",
),
]
resolved = resolver.resolve_actions(actions)
assert len(resolved) == 2
assert mock_github_client.get_ref_sha.call_count == 2
def test_resolve_action_with_cache(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test action resolution with caching."""
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
)
# First call
key1, manifest1 = resolver._resolve_github_action(action)
# Reset mock
mock_github_client.reset_mock()
# Second call should use cache
key2, manifest2 = resolver._resolve_github_action(action)
assert key1 == key2
# Cache should reduce API calls
assert mock_github_client.get_ref_sha.call_count <= 1
def test_resolve_action_api_error(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test handling API errors."""
mock_github_client.get_ref_sha.side_effect = Exception("API Error")
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
)
key, manifest = resolver._resolve_github_action(action)
assert key == ""
assert manifest is None
def test_resolve_monorepo_action(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving monorepo action with path."""
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.GITHUB,
owner="owner",
repo="repo",
path="subdir/action",
ref="v1",
source_file="test.yml",
)
key, manifest = resolver._resolve_github_action(action)
# Should try to fetch subdir/action/action.yml
mock_github_client.get_file_content.assert_called_with("owner", "repo", "subdir/action/action.yml", "abc123def456")
def test_resolve_action_unknown_type(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving action with unknown type returns empty."""
from ghaw_auditor.models import ActionType
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
# Create action with REUSABLE_WORKFLOW type (not handled by resolver)
action = ActionRef(
type=ActionType.REUSABLE_WORKFLOW,
owner="owner",
repo="repo",
path=".github/workflows/test.yml",
ref="v1",
source_file="test.yml",
)
key, manifest = resolver._resolve_action(action)
assert key == ""
assert manifest is None
def test_resolve_local_action_no_path(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving local action without path."""
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.LOCAL,
path=None,
source_file="test.yml",
)
key, manifest = resolver._resolve_local_action(action)
assert key == ""
assert manifest is None
def test_resolve_local_action_not_found(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving local action that doesn't exist."""
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.LOCAL,
path="./.github/actions/nonexistent",
source_file="test.yml",
)
key, manifest = resolver._resolve_local_action(action)
assert key == ""
assert manifest is None
def test_resolve_local_action_invalid_yaml(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving local action with invalid YAML."""
action_dir = tmp_path / ".github" / "actions" / "broken"
action_dir.mkdir(parents=True)
action_file = action_dir / "action.yml"
action_file.write_text("invalid: yaml: content: {{{")
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.LOCAL,
path="./.github/actions/broken",
source_file="test.yml",
)
key, manifest = resolver._resolve_local_action(action)
# Should handle parse error gracefully
assert key == ""
assert manifest is None
def test_resolve_github_action_missing_fields(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving GitHub action with missing required fields."""
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
# Missing owner
action = ActionRef(
type=ActionType.GITHUB,
owner=None,
repo="checkout",
ref="v4",
source_file="test.yml",
)
key, manifest = resolver._resolve_github_action(action)
assert key == ""
assert manifest is None
def test_resolve_github_action_manifest_not_found(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving GitHub action when manifest cannot be fetched."""
# Setup mock to fail fetching manifest
mock_github_client.get_ref_sha.return_value = "abc123"
mock_github_client.get_file_content.side_effect = Exception("404 Not Found")
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="missing",
ref="v1",
source_file="test.yml",
)
key, manifest = resolver._resolve_github_action(action)
# Should return key but no manifest
assert "actions/missing@abc123" in key
assert manifest is None
def test_resolve_monorepo_action_manifest_not_found(
mock_github_client: Mock, temp_cache: Cache, tmp_path: Path, caplog: pytest.LogCaptureFixture
) -> None:
"""Test resolving monorepo action when manifest cannot be fetched."""
import logging
# Setup mock to fail fetching manifest for both .yml and .yaml
mock_github_client.get_ref_sha.return_value = "abc123"
mock_github_client.get_file_content.side_effect = Exception("404 Not Found")
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.GITHUB,
owner="owner",
repo="repo",
path="subdir/action",
ref="v1",
source_file="test.yml",
)
with caplog.at_level(logging.ERROR):
key, manifest = resolver._resolve_github_action(action)
# Should return key but no manifest
assert "owner/repo@abc123" in key
assert manifest is None
# Should log error with path
assert "owner/repo/subdir/action" in caplog.text
assert "(tried action.yml and action.yaml)" in caplog.text
def test_resolve_github_action_invalid_manifest(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving GitHub action with invalid manifest content."""
# Setup mock to return invalid YAML
mock_github_client.get_ref_sha.return_value = "abc123"
mock_github_client.get_file_content.return_value = "invalid: yaml: {{{: bad"
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="broken",
ref="v1",
source_file="test.yml",
)
key, manifest = resolver._resolve_github_action(action)
# Should handle parse error gracefully
assert key == ""
assert manifest is None
def test_resolve_actions_with_exception(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test parallel resolution handles exceptions gracefully."""
# Setup one action to succeed, one to fail
def side_effect_get_ref(owner: str, repo: str, ref: str) -> str:
if repo == "fail":
raise Exception("API Error")
return "abc123"
mock_github_client.get_ref_sha.side_effect = side_effect_get_ref
resolver = Resolver(mock_github_client, temp_cache, tmp_path, concurrency=2)
actions = [
ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="checkout",
ref="v4",
source_file="test.yml",
),
ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="fail",
ref="v1",
source_file="test.yml",
),
]
resolved = resolver.resolve_actions(actions)
# Should only resolve the successful one
assert len(resolved) == 1
assert "actions/checkout" in list(resolved.keys())[0]
def test_resolve_actions_logs_exception(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test that exceptions during resolution are logged."""
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
# Patch _resolve_action to raise an exception directly
# This will propagate to future.result() and trigger the exception handler
with patch.object(resolver, "_resolve_action", side_effect=RuntimeError("Unexpected error")):
actions = [
ActionRef(
type=ActionType.GITHUB,
owner="actions",
repo="broken",
ref="v1",
source_file="test.yml",
),
]
resolved = resolver.resolve_actions(actions)
# Should handle exception gracefully and log error
assert len(resolved) == 0
def test_resolve_local_action_file_path_parse_error(
mock_github_client: Mock, temp_cache: Cache, tmp_path: Path
) -> None:
"""Test resolving local action when file path parsing fails."""
# Create a directory with invalid action.yml
action_dir = tmp_path / "my-action"
action_dir.mkdir()
action_file = action_dir / "action.yml"
action_file.write_text("invalid: yaml: content: {{{")
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
# Reference a file that starts with "action." so parent = action_path.parent
# This triggers the else branch where we look in parent directory
action = ActionRef(
type=ActionType.LOCAL,
path="./my-action/action.custom.yml",
source_file="test.yml",
)
key, manifest = resolver._resolve_local_action(action)
# Should handle parse error in file path branch (else branch)
# The code will look in parent (my-action/) for action.yml and fail to parse
assert key == ""
assert manifest is None
def test_resolve_action_local_type(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test _resolve_action with LOCAL action type."""
# Create valid local action
action_dir = tmp_path / "my-action"
action_dir.mkdir()
action_file = action_dir / "action.yml"
action_file.write_text("""
name: My Action
description: Test action
runs:
using: composite
steps:
- run: echo test
shell: bash
""")
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
action = ActionRef(
type=ActionType.LOCAL,
path="./my-action",
source_file="test.yml",
)
# Call _resolve_action to hit the LOCAL branch
key, manifest = resolver._resolve_action(action)
assert key == "local:./my-action"
assert manifest is not None
assert manifest.name == "My Action"
def test_resolve_local_action_file_path_success(mock_github_client: Mock, temp_cache: Cache, tmp_path: Path) -> None:
"""Test resolving local action via file path (else branch) with valid YAML."""
# Create a directory with valid action.yml
action_dir = tmp_path / "my-action"
action_dir.mkdir()
action_file = action_dir / "action.yml"
action_file.write_text("""
name: File Path Action
description: Test action via file path
runs:
using: composite
steps:
- run: echo test
shell: bash
""")
resolver = Resolver(mock_github_client, temp_cache, tmp_path)
# Reference a file that starts with "action." to trigger else branch
# with parent = action_path.parent
action = ActionRef(
type=ActionType.LOCAL,
path="./my-action/action.yml",
source_file="test.yml",
)
key, manifest = resolver._resolve_local_action(action)
# Should successfully parse from parent directory
assert key == "local:./my-action/action.yml"
assert manifest is not None
assert manifest.name == "File Path Action"

205
tests/test_scanner.py Normal file
View File

@@ -0,0 +1,205 @@
"""Tests for scanner module."""
from pathlib import Path
from ghaw_auditor.scanner import Scanner
def test_scanner_initialization() -> None:
"""Test scanner can be initialized."""
scanner = Scanner(".")
assert scanner.repo_path.exists()
def test_scanner_initialization_with_exclusions() -> None:
"""Test scanner initialization with exclusion patterns."""
scanner = Scanner(".", exclude_patterns=["**/node_modules/**", "**/dist/**"])
assert len(scanner.exclude_patterns) == 2
assert "**/node_modules/**" in scanner.exclude_patterns
def test_scanner_should_exclude(tmp_path: Path) -> None:
"""Test exclusion pattern matching."""
# Note: glob patterns need to match the full path including files
scanner = Scanner(tmp_path, exclude_patterns=["node_modules/**/*", ".git/**/*"])
# Create test directories and files
node_modules_path = tmp_path / "node_modules" / "test" / "action.yml"
node_modules_path.parent.mkdir(parents=True)
node_modules_path.touch()
git_path = tmp_path / ".git" / "hooks" / "pre-commit"
git_path.parent.mkdir(parents=True)
git_path.touch()
valid_path = tmp_path / ".github" / "actions" / "test" / "action.yml"
valid_path.parent.mkdir(parents=True)
valid_path.touch()
# Test exclusions
assert scanner._should_exclude(node_modules_path) is True
assert scanner._should_exclude(git_path) is True
assert scanner._should_exclude(valid_path) is False
def test_find_workflows_empty_dir(tmp_path: Path) -> None:
"""Test finding workflows in empty directory."""
scanner = Scanner(tmp_path)
workflows = scanner.find_workflows()
assert len(workflows) == 0
def test_find_workflows_with_files(tmp_path: Path) -> None:
"""Test finding workflow files."""
# Create workflow directory
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
# Create workflow files
(workflows_dir / "ci.yml").write_text("name: CI\non: push")
(workflows_dir / "release.yaml").write_text("name: Release\non: push")
(workflows_dir / "README.md").write_text("# Workflows") # Should be ignored
scanner = Scanner(tmp_path)
workflows = scanner.find_workflows()
assert len(workflows) == 2
assert workflows[0].name == "ci.yml"
assert workflows[1].name == "release.yaml"
def test_find_workflows_with_exclusions(tmp_path: Path) -> None:
"""Test finding workflows with exclusion patterns."""
# Create workflow directory
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
# Create workflow files
(workflows_dir / "ci.yml").write_text("name: CI")
(workflows_dir / "test.yml").write_text("name: Test")
scanner = Scanner(tmp_path, exclude_patterns=["**test.yml"])
workflows = scanner.find_workflows()
assert len(workflows) == 1
assert workflows[0].name == "ci.yml"
def test_find_actions_empty_dir(tmp_path: Path) -> None:
"""Test finding actions in empty directory."""
scanner = Scanner(tmp_path)
actions = scanner.find_actions()
assert len(actions) == 0
def test_find_actions_in_github_directory(tmp_path: Path) -> None:
"""Test finding actions in .github/actions directory."""
# Create actions directory
actions_dir = tmp_path / ".github" / "actions"
# Create multiple actions
action1_dir = actions_dir / "action1"
action1_dir.mkdir(parents=True)
(action1_dir / "action.yml").write_text("name: Action 1")
action2_dir = actions_dir / "action2"
action2_dir.mkdir(parents=True)
(action2_dir / "action.yaml").write_text("name: Action 2")
# Create nested action
nested_dir = actions_dir / "group" / "nested"
nested_dir.mkdir(parents=True)
(nested_dir / "action.yml").write_text("name: Nested Action")
scanner = Scanner(tmp_path)
actions = scanner.find_actions()
assert len(actions) == 3
assert any("action1" in str(a) for a in actions)
assert any("action2" in str(a) for a in actions)
assert any("nested" in str(a) for a in actions)
def test_find_actions_in_root(tmp_path: Path) -> None:
"""Test finding action in root directory."""
# Create action in root
(tmp_path / "action.yml").write_text("name: Root Action")
scanner = Scanner(tmp_path)
actions = scanner.find_actions()
assert len(actions) == 1
assert actions[0].name == "action.yml"
def test_find_actions_excludes_workflows_dir(tmp_path: Path) -> None:
"""Test that actions in workflows directory are excluded."""
# Create workflow directory with action file (should be ignored)
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "action.yml").write_text("name: Not an action")
# Create real action
actions_dir = tmp_path / ".github" / "actions" / "real"
actions_dir.mkdir(parents=True)
(actions_dir / "action.yml").write_text("name: Real Action")
scanner = Scanner(tmp_path)
actions = scanner.find_actions()
# Should only find the action in .github/actions, not in workflows
assert len(actions) == 1
assert "actions/real" in str(actions[0])
def test_find_actions_with_exclusions(tmp_path: Path) -> None:
"""Test finding actions with exclusion patterns."""
# Create actions
actions_dir = tmp_path / ".github" / "actions"
action1_dir = actions_dir / "include-me"
action1_dir.mkdir(parents=True)
(action1_dir / "action.yml").write_text("name: Include")
action2_dir = actions_dir / "exclude-me"
action2_dir.mkdir(parents=True)
(action2_dir / "action.yml").write_text("name: Exclude")
scanner = Scanner(tmp_path, exclude_patterns=["**/exclude-me/**"])
actions = scanner.find_actions()
assert len(actions) == 1
assert "include-me" in str(actions[0])
def test_find_actions_deduplication(tmp_path: Path) -> None:
"""Test that duplicate actions are not included."""
# Create action in .github/actions
actions_dir = tmp_path / ".github" / "actions" / "my-action"
actions_dir.mkdir(parents=True)
action_file = actions_dir / "action.yml"
action_file.write_text("name: My Action")
scanner = Scanner(tmp_path)
actions = scanner.find_actions()
# Should find it exactly once
assert len(actions) == 1
assert actions[0] == action_file
def test_find_actions_monorepo_structure(tmp_path: Path) -> None:
"""Test finding actions in monorepo with multiple root-level action directories."""
# Create monorepo structure: ./action1/, ./action2/, etc.
for name in ["sync-labels", "deploy-action", "test-action"]:
action_dir = tmp_path / name
action_dir.mkdir()
(action_dir / "action.yml").write_text(f"name: {name}\ndescription: Test action")
scanner = Scanner(tmp_path)
actions = scanner.find_actions()
assert len(actions) == 3
assert any("sync-labels" in str(a) for a in actions)
assert any("deploy-action" in str(a) for a in actions)
assert any("test-action" in str(a) for a in actions)

227
tests/test_services.py Normal file
View File

@@ -0,0 +1,227 @@
"""Tests for service layer."""
from pathlib import Path
from unittest.mock import Mock
import pytest
from ghaw_auditor.analyzer import Analyzer
from ghaw_auditor.differ import Differ
from ghaw_auditor.models import (
ActionManifest,
Policy,
WorkflowMeta,
)
from ghaw_auditor.parser import Parser
from ghaw_auditor.policy import PolicyValidator
from ghaw_auditor.scanner import Scanner
from ghaw_auditor.services import AuditService, DiffService
def test_audit_service_scan_basic(tmp_path: Path) -> None:
"""Test basic scan without workflows."""
scanner = Scanner(tmp_path)
parser = Parser(tmp_path)
analyzer = Analyzer()
service = AuditService(scanner, parser, analyzer)
result = service.scan(offline=True)
assert result.workflow_count == 0
assert result.action_count == 0
assert result.unique_action_count == 0
assert len(result.workflows) == 0
assert len(result.actions) == 0
assert len(result.violations) == 0
def test_audit_service_scan_with_workflow(tmp_path: Path) -> None:
"""Test scan with a simple workflow."""
# Create test workflow
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
"""
)
scanner = Scanner(tmp_path)
parser = Parser(tmp_path)
analyzer = Analyzer()
service = AuditService(scanner, parser, analyzer)
result = service.scan(offline=True)
assert result.workflow_count == 1
assert len(result.workflows) == 1
assert ".github/workflows/ci.yml" in result.workflows
assert result.unique_action_count == 1
def test_audit_service_scan_with_policy_violations(tmp_path: Path) -> None:
"""Test scan with policy violations."""
# Create workflow with branch ref (violates pinning policy)
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@main
"""
)
scanner = Scanner(tmp_path)
parser = Parser(tmp_path)
analyzer = Analyzer()
policy = Policy(require_pinned_actions=True)
validator = PolicyValidator(policy)
service = AuditService(scanner, parser, analyzer, validator=validator)
result = service.scan(offline=True)
assert len(result.violations) > 0
assert any("pinned" in v["message"].lower() for v in result.violations)
def test_audit_service_scan_parse_error(tmp_path: Path) -> None:
"""Test scan handles parse errors gracefully."""
# Create invalid workflow
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "invalid.yml").write_text("invalid: yaml: {{{")
scanner = Scanner(tmp_path)
parser = Parser(tmp_path)
analyzer = Analyzer()
service = AuditService(scanner, parser, analyzer)
result = service.scan(offline=True)
# Should continue despite parse error
assert result.workflow_count == 1
assert len(result.workflows) == 0 # Workflow not parsed
def test_audit_service_scan_with_resolver(tmp_path: Path) -> None:
"""Test scan with resolver (mocked)."""
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on: push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
"""
)
scanner = Scanner(tmp_path)
parser = Parser(tmp_path)
analyzer = Analyzer()
# Mock resolver
mock_resolver = Mock()
mock_resolver.resolve_actions.return_value = {
"actions/checkout@abc123": ActionManifest(
name="Checkout",
description="Checkout code",
)
}
service = AuditService(scanner, parser, analyzer, resolver=mock_resolver)
result = service.scan(offline=False)
# Should call resolver
assert mock_resolver.resolve_actions.called
assert len(result.actions) == 1
def test_audit_service_scan_analysis(tmp_path: Path) -> None:
"""Test that scan includes analysis."""
workflows_dir = tmp_path / ".github" / "workflows"
workflows_dir.mkdir(parents=True)
(workflows_dir / "ci.yml").write_text(
"""
name: CI
on:
- push
- pull_request
jobs:
test:
runs-on: ubuntu-latest
steps:
- run: echo test
"""
)
scanner = Scanner(tmp_path)
parser = Parser(tmp_path)
analyzer = Analyzer()
service = AuditService(scanner, parser, analyzer)
result = service.scan(offline=True)
# Check analysis
assert "total_workflows" in result.analysis
assert result.analysis["total_workflows"] == 1
assert "triggers" in result.analysis
assert "push" in result.analysis["triggers"]
assert "pull_request" in result.analysis["triggers"]
def test_diff_service_compare(tmp_path: Path) -> None:
"""Test diff service comparison."""
baseline_dir = tmp_path / "baseline"
baseline_dir.mkdir()
# Create baseline
differ = Differ(baseline_dir)
old_workflow = WorkflowMeta(
name="Old",
path="test.yml",
triggers=["push"],
jobs={},
)
differ.save_baseline({"test.yml": old_workflow}, {})
# Create diff service
diff_service = DiffService(differ)
# New workflow
new_workflow = WorkflowMeta(
name="New",
path="test.yml",
triggers=["push", "pull_request"],
jobs={},
)
workflow_diffs, action_diffs = diff_service.compare({"test.yml": new_workflow}, {})
assert len(workflow_diffs) == 1
assert workflow_diffs[0].status == "modified"
def test_diff_service_compare_no_baseline(tmp_path: Path) -> None:
"""Test diff service with missing baseline."""
baseline_dir = tmp_path / "nonexistent"
differ = Differ(baseline_dir)
diff_service = DiffService(differ)
with pytest.raises(FileNotFoundError):
diff_service.compare({}, {})

1079
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff