Files
actions/validate-inputs/scripts/update-validators.py
Ismo Vuorinen 97238212eb docs: remove common-cache references from documentation and tooling
Remove all remaining references to common-cache from project documentation,
test workflows, and build tooling after action deletion.

Updated:
- CLAUDE.md: Remove from action catalog (28 → 27 actions)
- README.md: Regenerate catalog without common-cache
- SECURITY.md: Update caching optimization notes
- Test workflows: Remove common-cache test references
- spec_helper.sh: Remove common-cache test helpers
- generate_listing.cjs: Remove from category/language mappings
- update-validators.py: Remove custom validator entry
2025-11-20 15:23:19 +02:00

586 lines
25 KiB
Python
Executable File

#!/usr/bin/env python3
"""update-validators.py
Automatically generates validation rules for GitHub Actions
by scanning action.yml files and applying convention-based detection.
Usage:
python update-validators.py [--dry-run] [--action action-name]
"""
from __future__ import annotations
import argparse
import re
import sys
from pathlib import Path
from typing import Any
import yaml # pylint: disable=import-error
class ValidationRuleGenerator:
"""Generate validation rules for GitHub Actions automatically.
This class scans GitHub Action YAML files and generates validation rules
based on convention-based detection patterns and special case handling.
"""
def __init__(self, *, dry_run: bool = False, specific_action: str | None = None) -> None:
"""Initialize the validation rule generator.
Args:
dry_run: If True, show what would be generated without writing files
specific_action: If provided, only generate rules for this action
"""
self.dry_run = dry_run
self.specific_action = specific_action
self.actions_dir = Path(__file__).parent.parent.parent.resolve()
# Convention patterns for automatic detection
# Order matters - more specific patterns should come first
self.conventions = {
# CodeQL-specific patterns (high priority)
"codeql_language": re.compile(r"\blanguage\b", re.IGNORECASE),
"codeql_queries": re.compile(r"\bquer(y|ies)\b", re.IGNORECASE),
"codeql_packs": re.compile(r"\bpacks?\b", re.IGNORECASE),
"codeql_build_mode": re.compile(r"\bbuild[_-]?mode\b", re.IGNORECASE),
"codeql_config": re.compile(r"\bconfig\b", re.IGNORECASE),
"category_format": re.compile(r"\bcategor(y|ies)\b", re.IGNORECASE),
# GitHub token patterns (high priority)
"github_token": re.compile(
r"\b(github[_-]?token|gh[_-]?token|token|auth[_-]?token|api[_-]?key)\b",
re.IGNORECASE,
),
# CalVer version patterns (high priority - check before semantic)
"calver_version": re.compile(
r"\b(release[_-]?tag|release[_-]?version|monthly[_-]?version|date[_-]?version)\b",
re.IGNORECASE,
),
# Specific version types (high priority)
"dotnet_version": re.compile(r"\bdotnet[_-]?version\b", re.IGNORECASE),
"terraform_version": re.compile(r"\bterraform[_-]?version\b", re.IGNORECASE),
"node_version": re.compile(r"\bnode[_-]?version\b", re.IGNORECASE),
# Docker-specific patterns (high priority)
"docker_image_name": re.compile(r"\bimage[_-]?name\b", re.IGNORECASE),
"docker_tag": re.compile(r"\b(tags?|image[_-]?tags?)\b", re.IGNORECASE),
"docker_architectures": re.compile(
r"\b(arch|architecture|platform)s?\b",
re.IGNORECASE,
),
# Namespace with lookahead (specific pattern)
"namespace_with_lookahead": re.compile(r"\bnamespace\b", re.IGNORECASE),
# Numeric ranges (specific ranges)
"numeric_range_0_16": re.compile(
r"\b(parallel[_-]?builds?|builds?[_-]?parallel)\b",
re.IGNORECASE,
),
"numeric_range_1_10": re.compile(
r"\b(retry|retries|attempt|attempts|max[_-]?retry)\b",
re.IGNORECASE,
),
"numeric_range_1_128": re.compile(r"\bthreads?\b", re.IGNORECASE),
"numeric_range_256_32768": re.compile(r"\bram\b", re.IGNORECASE),
"numeric_range_0_100": re.compile(r"\b(quality|percent|percentage)\b", re.IGNORECASE),
# File and path patterns
"file_path": re.compile(
r"\b(paths?|files?|dir|directory|config|dockerfile"
r"|ignore[_-]?file|key[_-]?files?)\b",
re.IGNORECASE,
),
"file_pattern": re.compile(r"\b(file[_-]?pattern|glob[_-]?pattern)\b", re.IGNORECASE),
"branch_name": re.compile(r"\b(branch|ref|base[_-]?branch)\b", re.IGNORECASE),
# User and identity patterns
"email": re.compile(r"\b(email|mail)\b", re.IGNORECASE),
"username": re.compile(r"\b(user|username|commit[_-]?user)\b", re.IGNORECASE),
# URL patterns (high priority)
"url": re.compile(r"\b(url|registry[_-]?url|api[_-]?url|endpoint)\b", re.IGNORECASE),
# Scope and namespace patterns
"scope": re.compile(r"\b(scope|namespace)\b", re.IGNORECASE),
# Security patterns for text content that could contain injection
"security_patterns": re.compile(
r"\b(changelog|notes|message|content|description|body|text|comment|summary|release[_-]?notes)\b",
re.IGNORECASE,
),
# Regex pattern validation (ReDoS detection)
"regex_pattern": re.compile(
r"\b(regex|pattern|validation[_-]?regex|regex[_-]?pattern)\b",
re.IGNORECASE,
),
# Additional validation types
"report_format": re.compile(r"\b(report[_-]?format|format)\b", re.IGNORECASE),
"plugin_list": re.compile(r"\b(plugins?|plugin[_-]?list)\b", re.IGNORECASE),
"prefix": re.compile(r"\b(prefix|tag[_-]?prefix)\b", re.IGNORECASE),
# Boolean patterns (broad, should be lower priority)
"boolean": re.compile(
r"\b(dry-?run|verbose|enable|disable|auto|skip|force|cache|provenance|sbom|scan|sign|fail[_-]?on[_-]?error|nightly)\b",
re.IGNORECASE,
),
# File extensions pattern
"file_extensions": re.compile(r"\b(file[_-]?extensions?|extensions?)\b", re.IGNORECASE),
# Registry pattern
"registry": re.compile(r"\bregistry\b", re.IGNORECASE),
# PHP-specific patterns
"php_extensions": re.compile(r"\b(extensions?|php[_-]?extensions?)\b", re.IGNORECASE),
"coverage_driver": re.compile(r"\b(coverage|coverage[_-]?driver)\b", re.IGNORECASE),
# Generic version pattern (lowest priority - catches remaining version fields)
"semantic_version": re.compile(r"\bversion\b", re.IGNORECASE),
}
# Special cases that need manual handling
self.special_cases = {
# CalVer fields that might not be detected
"release-tag": "calver_version",
# Flexible version fields (support both CalVer and SemVer)
"version": "flexible_version", # For github-release action
# File paths that might not be detected
"pre-commit-config": "file_path",
"config-file": "file_path",
"ignore-file": "file_path",
"readme-file": "file_path",
"working-directory": "file_path",
# Numeric fields that need positive integer validation
"days-before-stale": "positive_integer",
"days-before-close": "positive_integer",
# Version fields with specific types
"buildx-version": "semantic_version",
"buildkit-version": "semantic_version",
"tflint-version": "terraform_version",
"default-version": "semantic_version",
"force-version": "semantic_version",
"golangci-lint-version": "semantic_version",
"prettier-version": "semantic_version",
"eslint-version": "strict_semantic_version",
"flake8-version": "semantic_version",
"autopep8-version": "semantic_version",
"composer-version": "semantic_version",
# Tokens and passwords
"dockerhub-password": "github_token",
"npm_token": "github_token",
"password": "github_token",
# Complex fields that should skip validation
"build-args": None, # Can be empty
"context": None, # Default handled
"cache-from": None, # Complex cache syntax
"cache-export": None, # Complex cache syntax
"cache-import": None, # Complex cache syntax
"build-contexts": None, # Complex syntax
"secrets": None, # Complex syntax
"platform-build-args": None, # JSON format
"extensions": None, # PHP extensions list
"tools": None, # PHP tools list
"args": None, # Composer args
"stability": None, # Composer stability
"registry-url": "url", # URL format
"scope": "scope", # NPM scope
"plugins": None, # Prettier plugins
"file-extensions": "file_extensions", # File extension list
"file-pattern": None, # Glob pattern
"enable-linters": None, # Linter list
"disable-linters": None, # Linter list
"success-codes": None, # Exit code list
"retry-codes": None, # Exit code list
"ignore-paths": None, # Path patterns
"key-files": None, # Cache key files
"restore-keys": None, # Cache restore keys
"env-vars": None, # Environment variables
# Action-specific fields that need special handling
"type": None, # Cache type enum (npm, composer, go, etc.) - complex enum,
# skip validation
"paths": None, # File paths for caching (comma-separated) - complex format,
# skip validation
"command": None, # Shell command - complex format, skip validation for safety
"backoff-strategy": None, # Retry strategy enum - complex enum, skip validation
"shell": None, # Shell type enum - simple enum, skip validation
# Removed image-name and tag - now handled by docker_image_name and docker_tag patterns
# Numeric inputs with different ranges
"timeout": "numeric_range_1_3600", # Timeout should support higher values
"retry-delay": "numeric_range_1_300", # Retry delay should support higher values
"max-warnings": "numeric_range_0_10000",
# version-file-parser specific fields
"language": None, # Simple enum (node, php, python, go, dotnet)
"tool-versions-key": None, # Simple string (nodejs, python, php, golang, dotnet)
"dockerfile-image": None, # Simple string (node, python, php, golang, dotnet)
"validation-regex": "regex_pattern", # Regex pattern - validate for ReDoS
}
def get_action_directories(self) -> list[str]:
"""Get all action directories"""
entries = []
for item in self.actions_dir.iterdir():
if (
item.is_dir()
and not item.name.startswith(".")
and item.name != "validate-inputs"
and (item / "action.yml").exists()
):
entries.append(item.name)
return entries
def parse_action_file(self, action_name: str) -> dict[str, Any] | None:
"""Parse action.yml file to extract inputs"""
action_file = self.actions_dir / action_name / "action.yml"
try:
with action_file.open(encoding="utf-8") as f:
content = f.read()
action_data = yaml.safe_load(content)
return {
"name": action_data.get("name", action_name),
"description": action_data.get("description", ""),
"inputs": action_data.get("inputs", {}),
}
except Exception as error:
print(f"Failed to parse {action_file}: {error}")
return None
def detect_validation_type(self, input_name: str, input_data: dict[str, Any]) -> str | None:
"""Detect validation type based on input name and description"""
description = input_data.get("description", "")
# Check special cases first - highest priority
if input_name in self.special_cases:
return self.special_cases[input_name]
# Special handling for version fields that might be CalVer
# Check if description mentions calendar/date/monthly/release
if input_name == "version" and any(
word in description.lower() for word in ["calendar", "date", "monthly", "release"]
):
return "calver_version"
# Apply convention patterns in order (more specific first)
# Test input name first (highest confidence), then description
for validator, pattern in self.conventions.items():
if pattern.search(input_name):
return validator # Direct name match has highest confidence
# If no name match, try description
for validator, pattern in self.conventions.items():
if pattern.search(description):
return validator # Description match has lower confidence
return None # No validation detected
def sort_object_by_keys(self, obj: dict[str, Any]) -> dict[str, Any]:
"""Sort object keys alphabetically for consistent output"""
return {key: obj[key] for key in sorted(obj.keys())}
def generate_rules_for_action(self, action_name: str) -> dict[str, Any] | None:
"""Generate validation rules for a single action"""
action_data = self.parse_action_file(action_name)
if not action_data:
return None
required_inputs = []
optional_inputs = []
conventions = {}
overrides = {}
# Process each input
for input_name, input_data in action_data["inputs"].items():
is_required = input_data.get("required") in [True, "true"]
if is_required:
required_inputs.append(input_name)
else:
optional_inputs.append(input_name)
# Detect validation type
validation_type = self.detect_validation_type(input_name, input_data)
if validation_type:
conventions[input_name] = validation_type
# Handle action-specific overrides using data-driven approach
action_overrides = {
"php-version-detect": {"default-version": "php_version"},
"python-version-detect": {"default-version": "python_version"},
"python-version-detect-v2": {"default-version": "python_version"},
"dotnet-version-detect": {"default-version": "dotnet_version"},
"go-version-detect": {"default-version": "go_version"},
"npm-publish": {"package-version": "strict_semantic_version"},
"docker-build": {
"cache-mode": "cache_mode",
"sbom-format": "sbom_format",
},
"common-file-check": {
"file-pattern": "file_path",
},
"common-retry": {
"backoff-strategy": "backoff_strategy",
"shell": "shell_type",
},
"node-setup": {
"package-manager": "package_manager_enum",
},
"docker-publish": {
"registry": "registry_enum",
"cache-mode": "cache_mode",
"platforms": None, # Skip validation - complex platform format
},
"docker-publish-hub": {
"password": "docker_password",
},
"go-lint": {
"go-version": "go_version",
"timeout": "timeout_with_unit",
"only-new-issues": "boolean",
"enable-linters": "linter_list",
"disable-linters": "linter_list",
},
"prettier-check": {
"check-only": "boolean",
"file-pattern": "file_pattern",
"plugins": "plugin_list",
},
"php-laravel-phpunit": {
"extensions": "php_extensions",
},
"codeql-analysis": {
"language": "codeql_language",
"queries": "codeql_queries",
"packs": "codeql_packs",
"config": "codeql_config",
"build-mode": "codeql_build_mode",
"source-root": "file_path",
"category": "category_format",
"token": "github_token",
"ram": "numeric_range_256_32768",
"threads": "numeric_range_1_128",
"output": "file_path",
"skip-queries": "boolean",
},
"biome-lint": {
"mode": "mode_enum",
},
"eslint-lint": {
"mode": "mode_enum",
},
"prettier-lint": {
"mode": "mode_enum",
},
}
if action_name in action_overrides:
# Apply overrides for existing conventions
overrides.update(
{
input_name: override_value
for input_name, override_value in action_overrides[action_name].items()
if input_name in conventions
},
)
# Add missing inputs from overrides to conventions
for input_name, override_value in action_overrides[action_name].items():
if input_name not in conventions and input_name in action_data["inputs"]:
conventions[input_name] = override_value
# Calculate statistics
total_inputs = len(action_data["inputs"])
validated_inputs = len(conventions)
skipped_inputs = sum(1 for v in overrides.values() if v is None)
coverage = round((validated_inputs / total_inputs) * 100) if total_inputs > 0 else 0
# Generate rules object with enhanced metadata
rules = {
"schema_version": "1.0",
"action": action_name,
"description": action_data["description"],
"generator_version": "1.0.0",
"required_inputs": sorted(required_inputs),
"optional_inputs": sorted(optional_inputs),
"conventions": self.sort_object_by_keys(conventions),
"overrides": self.sort_object_by_keys(overrides),
"statistics": {
"total_inputs": total_inputs,
"validated_inputs": validated_inputs,
"skipped_inputs": skipped_inputs,
"coverage_percentage": coverage,
},
"validation_coverage": coverage,
"auto_detected": True,
"manual_review_required": coverage < 80 or validated_inputs == 0,
"quality_indicators": {
"has_required_inputs": len(required_inputs) > 0,
"has_token_validation": "token" in conventions or "github-token" in conventions,
"has_version_validation": any("version" in v for v in conventions.values() if v),
"has_file_validation": any(v == "file_path" for v in conventions.values()),
"has_security_validation": any(
v in ["github_token", "security_patterns"] for v in conventions.values()
),
},
}
return rules
def write_rules_file(self, action_name: str, rules: dict[str, Any]) -> None:
"""Write rules to YAML file in action folder"""
rules_file = self.actions_dir / action_name / "rules.yml"
generator_version = rules.get("generator_version", "unknown")
schema_version = rules.get("schema_version", "unknown")
validation_coverage = rules.get("validation_coverage", 0)
validated_inputs = rules["statistics"].get("validated_inputs", 0)
total_inputs = rules["statistics"].get("total_inputs", 0)
header = f"""---
# Validation rules for {action_name} action
# Generated by update-validators.py v{generator_version} - DO NOT EDIT MANUALLY
# Schema version: {schema_version}
# Coverage: {validation_coverage}% ({validated_inputs}/{total_inputs} inputs)
#
# This file defines validation rules for the {action_name} GitHub Action.
# Rules are automatically applied by validate-inputs action when this
# action is used.
#
"""
# Use a custom yaml dumper to ensure proper indentation
class CustomYamlDumper(yaml.SafeDumper):
def increase_indent(self, flow: bool = False, *, indentless: bool = False) -> None: # noqa: FBT001, FBT002
return super().increase_indent(flow, indentless=indentless)
yaml_content = yaml.dump(
rules,
Dumper=CustomYamlDumper,
indent=2,
width=120,
default_flow_style=False,
allow_unicode=True,
sort_keys=False,
)
content = header + yaml_content
if self.dry_run:
print(f"[DRY RUN] Would write {rules_file}:")
print(content)
print("---")
else:
with rules_file.open("w", encoding="utf-8") as f:
f.write(content)
print(f"✅ Generated {rules_file}")
def generate_rules(self) -> None:
"""Generate rules for all actions or a specific action"""
print("🔍 Scanning for GitHub Actions...")
actions = self.get_action_directories()
filtered_actions = actions
if self.specific_action:
filtered_actions = [name for name in actions if name == self.specific_action]
if not filtered_actions:
print(f"❌ Action '{self.specific_action}' not found")
sys.exit(1)
print(f"📝 Found {len(actions)} actions, processing {len(filtered_actions)}:")
for name in filtered_actions:
print(f" - {name}")
print()
processed = 0
failed = 0
for action_name in filtered_actions:
try:
rules = self.generate_rules_for_action(action_name)
if rules:
self.write_rules_file(action_name, rules)
processed += 1
else:
print(f"⚠️ Failed to generate rules for {action_name}")
failed += 1
except Exception as error:
print(f"❌ Error processing {action_name}: {error}")
failed += 1
print()
print("📊 Summary:")
print(f" - Processed: {processed}")
print(f" - Failed: {failed}")
coverage = (
round((processed / (processed + failed)) * 100) if (processed + failed) > 0 else 0
)
print(f" - Coverage: {coverage}%")
if not self.dry_run and processed > 0:
print()
print(
"✨ Validation rules updated! Run 'git diff */rules.yml' to review changes.",
)
def validate_rules_files(self) -> bool:
"""Validate existing rules files"""
print("🔍 Validating existing rules files...")
# Find all rules.yml files in action directories
rules_files = []
for action_dir in self.actions_dir.iterdir():
if action_dir.is_dir() and not action_dir.name.startswith("."):
rules_file = action_dir / "rules.yml"
if rules_file.exists():
rules_files.append(rules_file)
valid = 0
invalid = 0
for rules_file in rules_files:
try:
with rules_file.open(encoding="utf-8") as f:
content = f.read()
rules = yaml.safe_load(content)
# Basic validation
required = ["action", "required_inputs", "optional_inputs", "conventions"]
missing = [field for field in required if field not in rules]
if missing:
print(f"⚠️ {rules_file.name}: Missing fields: {', '.join(missing)}")
invalid += 1
else:
valid += 1
except Exception as error:
print(f"{rules_file.name}: {error}")
invalid += 1
print(f"✅ Validation complete: {valid} valid, {invalid} invalid")
return invalid == 0
def main() -> None:
"""CLI handling"""
parser = argparse.ArgumentParser(
description="Automatically generates validation rules for GitHub Actions",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python update-validators.py --dry-run
python update-validators.py --action csharp-publish
python update-validators.py --validate
""",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be generated without writing files",
)
parser.add_argument("--action", metavar="NAME", help="Generate rules for specific action only")
parser.add_argument("--validate", action="store_true", help="Validate existing rules files")
args = parser.parse_args()
generator = ValidationRuleGenerator(dry_run=args.dry_run, specific_action=args.action)
if args.validate:
success = generator.validate_rules_files()
sys.exit(0 if success else 1)
else:
generator.generate_rules()
if __name__ == "__main__":
main()