#!/usr/bin/env python3 """update-validators.py Automatically generates validation rules for GitHub Actions by scanning action.yml files and applying convention-based detection. Usage: python update-validators.py [--dry-run] [--action action-name] """ from __future__ import annotations import argparse import re import sys from pathlib import Path from typing import Any import yaml # pylint: disable=import-error class ValidationRuleGenerator: """Generate validation rules for GitHub Actions automatically. This class scans GitHub Action YAML files and generates validation rules based on convention-based detection patterns and special case handling. """ def __init__(self, *, dry_run: bool = False, specific_action: str | None = None) -> None: """Initialize the validation rule generator. Args: dry_run: If True, show what would be generated without writing files specific_action: If provided, only generate rules for this action """ self.dry_run = dry_run self.specific_action = specific_action self.actions_dir = Path(__file__).parent.parent.parent.resolve() # Convention patterns for automatic detection # Order matters - more specific patterns should come first self.conventions = { # CodeQL-specific patterns (high priority) "codeql_language": re.compile(r"\blanguage\b", re.IGNORECASE), "codeql_queries": re.compile(r"\bquer(y|ies)\b", re.IGNORECASE), "codeql_packs": re.compile(r"\bpacks?\b", re.IGNORECASE), "codeql_build_mode": re.compile(r"\bbuild[_-]?mode\b", re.IGNORECASE), "codeql_config": re.compile(r"\bconfig\b", re.IGNORECASE), "category_format": re.compile(r"\bcategor(y|ies)\b", re.IGNORECASE), # GitHub token patterns (high priority) "github_token": re.compile( r"\b(github[_-]?token|gh[_-]?token|token|auth[_-]?token|api[_-]?key)\b", re.IGNORECASE, ), # CalVer version patterns (high priority - check before semantic) "calver_version": re.compile( r"\b(release[_-]?tag|release[_-]?version|monthly[_-]?version|date[_-]?version)\b", re.IGNORECASE, ), # Specific version types (high priority) "dotnet_version": re.compile(r"\bdotnet[_-]?version\b", re.IGNORECASE), "terraform_version": re.compile(r"\bterraform[_-]?version\b", re.IGNORECASE), "node_version": re.compile(r"\bnode[_-]?version\b", re.IGNORECASE), # Docker-specific patterns (high priority) "docker_image_name": re.compile(r"\bimage[_-]?name\b", re.IGNORECASE), "docker_tag": re.compile(r"\b(tags?|image[_-]?tags?)\b", re.IGNORECASE), "docker_architectures": re.compile( r"\b(arch|architecture|platform)s?\b", re.IGNORECASE, ), # Namespace with lookahead (specific pattern) "namespace_with_lookahead": re.compile(r"\bnamespace\b", re.IGNORECASE), # Numeric ranges (specific ranges) "numeric_range_0_16": re.compile( r"\b(parallel[_-]?builds?|builds?[_-]?parallel)\b", re.IGNORECASE, ), "numeric_range_1_10": re.compile( r"\b(retry|retries|attempt|attempts|max[_-]?retry)\b", re.IGNORECASE, ), "numeric_range_1_128": re.compile(r"\bthreads?\b", re.IGNORECASE), "numeric_range_256_32768": re.compile(r"\bram\b", re.IGNORECASE), "numeric_range_0_100": re.compile(r"\b(quality|percent|percentage)\b", re.IGNORECASE), # File and path patterns "file_path": re.compile( r"\b(paths?|files?|dir|directory|config|dockerfile" r"|ignore[_-]?file|key[_-]?files?)\b", re.IGNORECASE, ), "file_pattern": re.compile(r"\b(file[_-]?pattern|glob[_-]?pattern)\b", re.IGNORECASE), "branch_name": re.compile(r"\b(branch|ref|base[_-]?branch)\b", re.IGNORECASE), # User and identity patterns "email": re.compile(r"\b(email|mail)\b", re.IGNORECASE), "username": re.compile(r"\b(user|username|commit[_-]?user)\b", re.IGNORECASE), # URL patterns (high priority) "url": re.compile(r"\b(url|registry[_-]?url|api[_-]?url|endpoint)\b", re.IGNORECASE), # Scope and namespace patterns "scope": re.compile(r"\b(scope|namespace)\b", re.IGNORECASE), # Security patterns for text content that could contain injection "security_patterns": re.compile( r"\b(changelog|notes|message|content|description|body|text|comment|summary|release[_-]?notes)\b", re.IGNORECASE, ), # Regex pattern validation (ReDoS detection) "regex_pattern": re.compile( r"\b(regex|pattern|validation[_-]?regex|regex[_-]?pattern)\b", re.IGNORECASE, ), # Additional validation types "report_format": re.compile(r"\b(report[_-]?format|format)\b", re.IGNORECASE), "plugin_list": re.compile(r"\b(plugins?|plugin[_-]?list)\b", re.IGNORECASE), "prefix": re.compile(r"\b(prefix|tag[_-]?prefix)\b", re.IGNORECASE), # Boolean patterns (broad, should be lower priority) "boolean": re.compile( r"\b(dry-?run|verbose|enable|disable|auto|skip|force|cache|provenance|sbom|scan|sign|fail[_-]?on[_-]?error|nightly)\b", re.IGNORECASE, ), # File extensions pattern "file_extensions": re.compile(r"\b(file[_-]?extensions?|extensions?)\b", re.IGNORECASE), # Registry pattern "registry": re.compile(r"\bregistry\b", re.IGNORECASE), # PHP-specific patterns "php_extensions": re.compile(r"\b(extensions?|php[_-]?extensions?)\b", re.IGNORECASE), "coverage_driver": re.compile(r"\b(coverage|coverage[_-]?driver)\b", re.IGNORECASE), # Generic version pattern (lowest priority - catches remaining version fields) "semantic_version": re.compile(r"\bversion\b", re.IGNORECASE), } # Special cases that need manual handling self.special_cases = { # CalVer fields that might not be detected "release-tag": "calver_version", # Flexible version fields (support both CalVer and SemVer) "version": "flexible_version", # For github-release action # File paths that might not be detected "pre-commit-config": "file_path", "config-file": "file_path", "ignore-file": "file_path", "readme-file": "file_path", "working-directory": "file_path", # Numeric fields that need positive integer validation "days-before-stale": "positive_integer", "days-before-close": "positive_integer", # Version fields with specific types "buildx-version": "semantic_version", "buildkit-version": "semantic_version", "tflint-version": "terraform_version", "default-version": "semantic_version", "force-version": "semantic_version", "golangci-lint-version": "semantic_version", "prettier-version": "semantic_version", "eslint-version": "strict_semantic_version", "flake8-version": "semantic_version", "autopep8-version": "semantic_version", "composer-version": "semantic_version", # Tokens and passwords "dockerhub-password": "github_token", "npm_token": "github_token", "password": "github_token", # Complex fields that should skip validation "build-args": None, # Can be empty "context": None, # Default handled "cache-from": None, # Complex cache syntax "cache-export": None, # Complex cache syntax "cache-import": None, # Complex cache syntax "build-contexts": None, # Complex syntax "secrets": None, # Complex syntax "platform-build-args": None, # JSON format "extensions": None, # PHP extensions list "tools": None, # PHP tools list "args": None, # Composer args "stability": None, # Composer stability "registry-url": "url", # URL format "scope": "scope", # NPM scope "plugins": None, # Prettier plugins "file-extensions": "file_extensions", # File extension list "file-pattern": None, # Glob pattern "enable-linters": None, # Linter list "disable-linters": None, # Linter list "success-codes": None, # Exit code list "retry-codes": None, # Exit code list "ignore-paths": None, # Path patterns "key-files": None, # Cache key files "restore-keys": None, # Cache restore keys "env-vars": None, # Environment variables # Action-specific fields that need special handling "type": None, # Cache type enum (npm, composer, go, etc.) - complex enum, # skip validation "paths": None, # File paths for caching (comma-separated) - complex format, # skip validation "command": None, # Shell command - complex format, skip validation for safety "backoff-strategy": None, # Retry strategy enum - complex enum, skip validation "shell": None, # Shell type enum - simple enum, skip validation # Removed image-name and tag - now handled by docker_image_name and docker_tag patterns # Numeric inputs with different ranges "timeout": "numeric_range_1_3600", # Timeout should support higher values "retry-delay": "numeric_range_1_300", # Retry delay should support higher values "max-warnings": "numeric_range_0_10000", # version-file-parser specific fields "language": None, # Simple enum (node, php, python, go, dotnet) "tool-versions-key": None, # Simple string (nodejs, python, php, golang, dotnet) "dockerfile-image": None, # Simple string (node, python, php, golang, dotnet) "validation-regex": "regex_pattern", # Regex pattern - validate for ReDoS } def get_action_directories(self) -> list[str]: """Get all action directories""" entries = [] for item in self.actions_dir.iterdir(): if ( item.is_dir() and not item.name.startswith(".") and item.name != "validate-inputs" and (item / "action.yml").exists() ): entries.append(item.name) return entries def parse_action_file(self, action_name: str) -> dict[str, Any] | None: """Parse action.yml file to extract inputs""" action_file = self.actions_dir / action_name / "action.yml" try: with action_file.open(encoding="utf-8") as f: content = f.read() action_data = yaml.safe_load(content) return { "name": action_data.get("name", action_name), "description": action_data.get("description", ""), "inputs": action_data.get("inputs", {}), } except Exception as error: print(f"Failed to parse {action_file}: {error}") return None def detect_validation_type(self, input_name: str, input_data: dict[str, Any]) -> str | None: """Detect validation type based on input name and description""" description = input_data.get("description", "") # Check special cases first - highest priority if input_name in self.special_cases: return self.special_cases[input_name] # Special handling for version fields that might be CalVer # Check if description mentions calendar/date/monthly/release if input_name == "version" and any( word in description.lower() for word in ["calendar", "date", "monthly", "release"] ): return "calver_version" # Apply convention patterns in order (more specific first) # Test input name first (highest confidence), then description for validator, pattern in self.conventions.items(): if pattern.search(input_name): return validator # Direct name match has highest confidence # If no name match, try description for validator, pattern in self.conventions.items(): if pattern.search(description): return validator # Description match has lower confidence return None # No validation detected def sort_object_by_keys(self, obj: dict[str, Any]) -> dict[str, Any]: """Sort object keys alphabetically for consistent output""" return {key: obj[key] for key in sorted(obj.keys())} def generate_rules_for_action(self, action_name: str) -> dict[str, Any] | None: """Generate validation rules for a single action""" action_data = self.parse_action_file(action_name) if not action_data: return None required_inputs = [] optional_inputs = [] conventions = {} overrides = {} # Process each input for input_name, input_data in action_data["inputs"].items(): is_required = input_data.get("required") in [True, "true"] if is_required: required_inputs.append(input_name) else: optional_inputs.append(input_name) # Detect validation type validation_type = self.detect_validation_type(input_name, input_data) if validation_type: conventions[input_name] = validation_type # Handle action-specific overrides using data-driven approach action_overrides = { "php-version-detect": {"default-version": "php_version"}, "python-version-detect": {"default-version": "python_version"}, "python-version-detect-v2": {"default-version": "python_version"}, "dotnet-version-detect": {"default-version": "dotnet_version"}, "go-version-detect": {"default-version": "go_version"}, "npm-publish": {"package-version": "strict_semantic_version"}, "docker-build": { "cache-mode": "cache_mode", "sbom-format": "sbom_format", }, "common-file-check": { "file-pattern": "file_path", }, "common-retry": { "backoff-strategy": "backoff_strategy", "shell": "shell_type", }, "docker-publish": { "registry": "registry_enum", "cache-mode": "cache_mode", "platforms": None, # Skip validation - complex platform format }, "docker-publish-hub": { "password": "docker_password", }, "go-lint": { "go-version": "go_version", "timeout": "timeout_with_unit", "only-new-issues": "boolean", "enable-linters": "linter_list", "disable-linters": "linter_list", }, "prettier-check": { "check-only": "boolean", "file-pattern": "file_pattern", "plugins": "plugin_list", }, "codeql-analysis": { "language": "codeql_language", "queries": "codeql_queries", "packs": "codeql_packs", "config": "codeql_config", "build-mode": "codeql_build_mode", "source-root": "file_path", "category": "category_format", "token": "github_token", "ram": "numeric_range_256_32768", "threads": "numeric_range_1_128", "output": "file_path", "skip-queries": "boolean", }, "biome-lint": { "mode": "mode_enum", }, "eslint-lint": { "mode": "mode_enum", }, "prettier-lint": { "mode": "mode_enum", }, } if action_name in action_overrides: # Apply overrides for existing conventions overrides.update( { input_name: override_value for input_name, override_value in action_overrides[action_name].items() if input_name in conventions }, ) # Add missing inputs from overrides to conventions for input_name, override_value in action_overrides[action_name].items(): if input_name not in conventions and input_name in action_data["inputs"]: conventions[input_name] = override_value # Calculate statistics total_inputs = len(action_data["inputs"]) validated_inputs = len(conventions) skipped_inputs = sum(1 for v in overrides.values() if v is None) coverage = round((validated_inputs / total_inputs) * 100) if total_inputs > 0 else 0 # Generate rules object with enhanced metadata rules = { "schema_version": "1.0", "action": action_name, "description": action_data["description"], "generator_version": "1.0.0", "required_inputs": sorted(required_inputs), "optional_inputs": sorted(optional_inputs), "conventions": self.sort_object_by_keys(conventions), "overrides": self.sort_object_by_keys(overrides), "statistics": { "total_inputs": total_inputs, "validated_inputs": validated_inputs, "skipped_inputs": skipped_inputs, "coverage_percentage": coverage, }, "validation_coverage": coverage, "auto_detected": True, "manual_review_required": coverage < 80 or validated_inputs == 0, "quality_indicators": { "has_required_inputs": len(required_inputs) > 0, "has_token_validation": "token" in conventions or "github-token" in conventions, "has_version_validation": any("version" in v for v in conventions.values() if v), "has_file_validation": any(v == "file_path" for v in conventions.values()), "has_security_validation": any( v in ["github_token", "security_patterns"] for v in conventions.values() ), }, } return rules def write_rules_file(self, action_name: str, rules: dict[str, Any]) -> None: """Write rules to YAML file in action folder""" rules_file = self.actions_dir / action_name / "rules.yml" generator_version = rules.get("generator_version", "unknown") schema_version = rules.get("schema_version", "unknown") validation_coverage = rules.get("validation_coverage", 0) validated_inputs = rules["statistics"].get("validated_inputs", 0) total_inputs = rules["statistics"].get("total_inputs", 0) header = f"""--- # Validation rules for {action_name} action # Generated by update-validators.py v{generator_version} - DO NOT EDIT MANUALLY # Schema version: {schema_version} # Coverage: {validation_coverage}% ({validated_inputs}/{total_inputs} inputs) # # This file defines validation rules for the {action_name} GitHub Action. # Rules are automatically applied by validate-inputs action when this # action is used. # """ # Use a custom yaml dumper to ensure proper indentation class CustomYamlDumper(yaml.SafeDumper): def increase_indent(self, flow: bool = False, *, indentless: bool = False) -> None: # noqa: FBT001, FBT002 return super().increase_indent(flow, indentless=indentless) yaml_content = yaml.dump( rules, Dumper=CustomYamlDumper, indent=2, width=120, default_flow_style=False, allow_unicode=True, sort_keys=False, ) content = header + yaml_content if self.dry_run: print(f"[DRY RUN] Would write {rules_file}:") print(content) print("---") else: with rules_file.open("w", encoding="utf-8") as f: f.write(content) print(f"✅ Generated {rules_file}") def generate_rules(self) -> None: """Generate rules for all actions or a specific action""" print("🔍 Scanning for GitHub Actions...") actions = self.get_action_directories() filtered_actions = actions if self.specific_action: filtered_actions = [name for name in actions if name == self.specific_action] if not filtered_actions: print(f"❌ Action '{self.specific_action}' not found") sys.exit(1) print(f"📝 Found {len(actions)} actions, processing {len(filtered_actions)}:") for name in filtered_actions: print(f" - {name}") print() processed = 0 failed = 0 for action_name in filtered_actions: try: rules = self.generate_rules_for_action(action_name) if rules: self.write_rules_file(action_name, rules) processed += 1 else: print(f"⚠️ Failed to generate rules for {action_name}") failed += 1 except Exception as error: print(f"❌ Error processing {action_name}: {error}") failed += 1 print() print("📊 Summary:") print(f" - Processed: {processed}") print(f" - Failed: {failed}") coverage = ( round((processed / (processed + failed)) * 100) if (processed + failed) > 0 else 0 ) print(f" - Coverage: {coverage}%") if not self.dry_run and processed > 0: print() print( "✨ Validation rules updated! Run 'git diff */rules.yml' to review changes.", ) def validate_rules_files(self) -> bool: """Validate existing rules files""" print("🔍 Validating existing rules files...") # Find all rules.yml files in action directories rules_files = [] for action_dir in self.actions_dir.iterdir(): if action_dir.is_dir() and not action_dir.name.startswith("."): rules_file = action_dir / "rules.yml" if rules_file.exists(): rules_files.append(rules_file) valid = 0 invalid = 0 for rules_file in rules_files: try: with rules_file.open(encoding="utf-8") as f: content = f.read() rules = yaml.safe_load(content) # Basic validation required = ["action", "required_inputs", "optional_inputs", "conventions"] missing = [field for field in required if field not in rules] if missing: print(f"⚠️ {rules_file.name}: Missing fields: {', '.join(missing)}") invalid += 1 else: valid += 1 except Exception as error: print(f"❌ {rules_file.name}: {error}") invalid += 1 print(f"✅ Validation complete: {valid} valid, {invalid} invalid") return invalid == 0 def main() -> None: """CLI handling""" parser = argparse.ArgumentParser( description="Automatically generates validation rules for GitHub Actions", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python update-validators.py --dry-run python update-validators.py --action csharp-publish python update-validators.py --validate """, ) parser.add_argument( "--dry-run", action="store_true", help="Show what would be generated without writing files", ) parser.add_argument("--action", metavar="NAME", help="Generate rules for specific action only") parser.add_argument("--validate", action="store_true", help="Validate existing rules files") args = parser.parse_args() generator = ValidationRuleGenerator(dry_run=args.dry_run, specific_action=args.action) if args.validate: success = generator.validate_rules_files() sys.exit(0 if success else 1) else: generator.generate_rules() if __name__ == "__main__": main()