Files
ghaw-auditor/ghaw_auditor/scanner.py
2025-10-19 09:52:13 +03:00

85 lines
3.1 KiB
Python

"""File scanner for discovering GitHub Actions and workflows."""
from __future__ import annotations
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
class Scanner:
"""Scans repository for workflow and action files."""
WORKFLOW_PATTERNS = [
".github/workflows/*.yml",
".github/workflows/*.yaml",
]
ACTION_PATTERNS = [
"**/action.yml",
"**/action.yaml",
".github/actions/*/action.yml",
".github/actions/*/action.yaml",
]
def __init__(self, repo_path: str | Path, exclude_patterns: list[str] | None = None) -> None:
"""Initialize scanner."""
self.repo_path = Path(repo_path).resolve()
self.exclude_patterns = exclude_patterns or []
def _should_exclude(self, path: Path) -> bool:
"""Check if path should be excluded."""
rel_path = path.relative_to(self.repo_path)
return any(rel_path.match(pattern) for pattern in self.exclude_patterns)
def find_workflows(self) -> list[Path]:
"""Find all workflow files."""
workflows = []
workflow_dir = self.repo_path / ".github" / "workflows"
if not workflow_dir.exists():
logger.warning(f"Workflow directory not found: {workflow_dir}")
return workflows
for pattern in ["*.yml", "*.yaml"]:
for file_path in workflow_dir.glob(pattern):
if not self._should_exclude(file_path):
workflows.append(file_path)
logger.info(f"Found {len(workflows)} workflow files")
return sorted(workflows)
def find_actions(self) -> list[Path]:
"""Find all action manifest files.
Supports multiple action discovery patterns:
- .github/actions/*/action.yml (standard GitHub location)
- ./action-name/action.yml (monorepo root-level actions)
- Any depth: path/to/action/action.yml (recursive search)
Excludes .github/workflows directory to avoid false positives.
"""
actions = []
# Check .github/actions directory
actions_dir = self.repo_path / ".github" / "actions"
if actions_dir.exists():
for action_file in actions_dir.rglob("action.y*ml"):
if action_file.name in ("action.yml", "action.yaml") and not self._should_exclude(action_file):
actions.append(action_file)
logger.debug(f"Found action: {action_file.relative_to(self.repo_path)}")
# Check for action files in root and subdirectories (supports monorepo structure)
for name in ("action.yml", "action.yaml"):
for action_file in self.repo_path.rglob(name):
# Skip if in .github/workflows
if ".github/workflows" in str(action_file.relative_to(self.repo_path)):
continue
if not self._should_exclude(action_file) and action_file not in actions:
actions.append(action_file)
logger.debug(f"Found action: {action_file.relative_to(self.repo_path)}")
logger.info(f"Found {len(actions)} action files")
return sorted(actions)