Files
Ismo Vuorinen 7061aafd35 chore: add tests, update docs and actions (#299)
* docs: update documentation

* feat: validate-inputs has it's own pyproject

* security: mask DOCKERHUB_PASSWORD

* chore: add tokens, checkout, recrete docs, integration tests

* fix: add `statuses: write` permission to pr-lint
2025-10-18 13:09:19 +03:00

361 lines
11 KiB
Python

"""File and path validators."""
from __future__ import annotations
import re
from pathlib import Path
from .base import BaseValidator
class FileValidator(BaseValidator):
"""Validator for file paths, extensions, and related inputs."""
def validate_inputs(self, inputs: dict[str, str]) -> bool:
"""Validate file-related inputs."""
valid = True
for input_name, value in inputs.items():
if "file" in input_name or "path" in input_name or "directory" in input_name:
valid &= self.validate_file_path(value, input_name)
elif "branch" in input_name:
valid &= self.validate_branch_name(value)
elif "extension" in input_name:
valid &= self.validate_file_extensions(value, input_name)
return valid
def get_required_inputs(self) -> list[str]:
"""File validators typically don't define required inputs."""
return []
def get_validation_rules(self) -> dict:
"""Return file validation rules."""
return {
"file_path": "Relative paths only, no path traversal",
"branch_name": "Valid git branch name",
"file_extensions": "Comma-separated list starting with dots",
}
def validate_path(self, path: str, name: str = "path") -> bool:
"""Validate general file paths.
Args:
path: The file path to validate
name: The input name for error messages
Returns:
True if valid, False otherwise
"""
if not path or path.strip() == "":
return True # Path is often optional
# Allow GitHub Actions expressions
if self.is_github_expression(path):
return True
p = Path(path)
try:
safe_path = p.resolve(strict=True)
except FileNotFoundError:
self.add_error(f'Invalid {name}: "{path}". Path does not exist')
return False
# Use base class security validation
return self.validate_path_security(str(safe_path.absolute()), name)
def validate_file_path(self, path: str, name: str = "path") -> bool:
"""Validate file paths for security.
Args:
path: The file path to validate
name: The input name for error messages
Returns:
True if valid, False otherwise
"""
if not path or path.strip() == "":
return True # Path is often optional
# Allow GitHub Actions expressions
if self.is_github_expression(path):
return True
# Use base class security validation
if not self.validate_path_security(path, name):
return False
# Additional file path validation
# Check for valid characters
if not re.match(r"^[a-zA-Z0-9._/\-\s]+$", path):
self.add_error(f'Invalid {name}: "{path}". Contains invalid characters')
return False
return True
def validate_branch_name(self, branch: str, name: str = "branch") -> bool:
"""Validate git branch name.
Args:
branch: The branch name to validate
name: The input name for error messages
Returns:
True if valid, False otherwise
"""
if not branch or branch.strip() == "":
return True # Branch name is often optional
# Check for command injection
injection_patterns = [";", "&&", "||", "|", "`", "$("]
for pattern in injection_patterns:
if pattern in branch:
self.add_error(
f'Invalid {name}: "{branch}". '
f'Command injection pattern "{pattern}" not allowed',
)
return False
# Check for invalid git characters
if ".." in branch or "~" in branch or "^" in branch or ":" in branch:
self.add_error(
f'Invalid {name}: "{branch}". Contains invalid git branch characters',
)
return False
# Check for valid characters
if not re.match(r"^[a-zA-Z0-9/_.\-]+$", branch):
self.add_error(
f'Invalid {name}: "{branch}". '
"Must contain only alphanumeric, slash, underscore, dot, and hyphen",
)
return False
# Check for invalid start/end characters
if branch.startswith((".", "-", "/")) or branch.endswith((".", "/")):
self.add_error(
f'Invalid {name}: "{branch}". Cannot start/end with ".", "-", or "/"',
)
return False
# Check for consecutive slashes
if "//" in branch:
self.add_error(f'Invalid branch name: "{branch}". Cannot contain consecutive slashes')
return False
return True
def validate_file_extensions(self, value: str, name: str = "file-extensions") -> bool:
"""Validate file extensions format.
Args:
value: Comma-separated list of file extensions
name: The input name for error messages
Returns:
True if valid, False otherwise
"""
if not value or value.strip() == "":
return True # File extensions are optional
extensions = [ext.strip() for ext in value.split(",")]
for ext in extensions:
if not ext:
continue # Skip empty entries
# Must start with a dot
if not ext.startswith("."):
self.add_error(
f'Invalid file extension: "{ext}" in {name}. Extensions must start with a dot',
)
return False
# Check for valid extension format
if not re.match(r"^\.[a-zA-Z0-9]+$", ext):
self.add_error(
f'Invalid file extension format: "{ext}" in {name}. '
"Must be dot followed by alphanumeric characters",
)
return False
# Check for security patterns
if self.validate_security_patterns(ext, f"{name} extension"):
continue
return False
return True
def validate_yaml_file(self, path: str, name: str = "yaml-file") -> bool:
"""Validate YAML file path.
Args:
path: The YAML file path to validate
name: The input name for error messages
Returns:
True if valid, False otherwise
"""
# Allow GitHub Actions expressions
if self.is_github_expression(path):
return True
if not self.validate_file_path(path, name):
return False
if path and not (path.endswith((".yml", ".yaml"))):
self.add_error(f'Invalid {name}: "{path}". Must be a .yml or .yaml file')
return False
return True
def validate_json_file(self, path: str, name: str = "json-file") -> bool:
"""Validate JSON file path.
Args:
path: The JSON file path to validate
name: The input name for error messages
Returns:
True if valid, False otherwise
"""
if not self.validate_file_path(path, name):
return False
if path and not path.endswith(".json"):
self.add_error(f'Invalid {name}: "{path}". Must be a .json file')
return False
return True
def validate_config_file(self, path: str, name: str = "config-file") -> bool:
"""Validate configuration file path.
Args:
path: The config file path to validate
name: The input name for error messages
Returns:
True if valid, False otherwise
"""
if not self.validate_file_path(path, name):
return False
# Config files typically have specific extensions
valid_extensions = [
".yml",
".yaml",
".json",
".toml",
".ini",
".conf",
".config",
".cfg",
".xml",
]
if path:
has_valid_ext = any(path.endswith(ext) for ext in valid_extensions)
if not has_valid_ext:
self.add_error(
f'Invalid {name}: "{path}". '
f"Expected config file extension: {', '.join(valid_extensions)}",
)
return False
return True
def validate_dockerfile_path(self, path: str, name: str = "dockerfile") -> bool:
"""Validate Dockerfile path.
Args:
path: The Dockerfile path to validate
name: The input name for error messages
Returns:
True if valid, False otherwise
"""
if not path or path.strip() == "":
return True # Dockerfile path is often optional
# First validate general file path security
if not self.validate_file_path(path, name):
return False
# Check if it looks like a Dockerfile
# Accept: Dockerfile, dockerfile, Dockerfile.*, docker/Dockerfile, etc.
basename = Path(path).name.lower()
# Must contain 'dockerfile' in the basename
if "dockerfile" not in basename:
self.add_error(
f"Invalid {name}: \"{path}\". File name must contain 'Dockerfile' or 'dockerfile'",
)
return False
return True
def validate_executable_file(self, path: str, name: str = "executable") -> bool:
"""Validate executable file path.
Args:
path: The executable file path to validate
name: The input name for error messages
Returns:
True if valid, False otherwise
"""
if not path or path.strip() == "":
return True # Executable path is often optional
# First validate general file path security
if not self.validate_file_path(path, name):
return False
# Check for common executable extensions (for Windows)
# Check for potential security issues with executables
basename = Path(path).name.lower()
# Block obviously dangerous executable names
dangerous_names = [
"cmd",
"powershell",
"bash",
"sh",
"rm",
"del",
"format",
"fdisk",
"shutdown",
"reboot",
]
name_without_ext = Path(basename).stem
if name_without_ext in dangerous_names:
self.add_error(
f'Invalid {name}: "{path}". '
f"Potentially dangerous executable name: {name_without_ext}",
)
return False
return True
def validate_required_file(self, path: str, name: str = "file") -> bool:
"""Validate a required file path (cannot be empty).
Args:
path: The file path to validate
name: The input name for error messages
Returns:
True if valid, False otherwise
"""
if not path or path.strip() == "":
self.add_error(f"Required {name} path cannot be empty")
return False
# Validate the path itself
return self.validate_file_path(path, name)