feat: add advanced architecture, documentation, and coverage improvements (#65)

* fix(style): resolve PHPCS line-length warnings in source files

* fix(style): resolve PHPCS line-length warnings in test files

* feat(audit): add structured audit logging with ErrorContext and AuditContext

- ErrorContext: standardized error information with sensitive data sanitization
- AuditContext: structured context for audit entries with operation types
- StructuredAuditLogger: enhanced audit logger wrapper with timing support

* feat(recovery): add recovery mechanism for failed masking operations

- FailureMode enum: FAIL_OPEN, FAIL_CLOSED, FAIL_SAFE modes
- RecoveryStrategy interface and RecoveryResult value object
- RetryStrategy: exponential backoff with configurable attempts
- FallbackMaskStrategy: type-aware fallback values

* feat(strategies): add CallbackMaskingStrategy for custom masking logic

- Wraps custom callbacks as MaskingStrategy implementations
- Factory methods: constant(), hash(), partial() for common use cases
- Supports exact match and prefix match for field paths

* docs: add framework integration guides and examples

- symfony-integration.md: Symfony service configuration and Monolog setup
- psr3-decorator.md: PSR-3 logger decorator pattern implementation
- framework-examples.md: CakePHP, CodeIgniter 4, Laminas, Yii2, PSR-15
- docker-development.md: Docker development environment guide

* chore(docker): add Docker development environment

- Dockerfile: PHP 8.2-cli-alpine with Xdebug for coverage
- docker-compose.yml: development services with volume mounts

* feat(demo): add interactive GDPR pattern tester playground

- PatternTester.php: pattern testing utility with strategy support
- index.php: web API endpoint with JSON response handling
- playground.html: interactive web interface for testing patterns

* docs(todo): update with completed medium priority items

- Mark all PHPCS warnings as fixed (81 → 0)
- Document new Audit and Recovery features
- Update test count to 1,068 tests with 2,953 assertions
- Move remaining items to low priority

* feat: add advanced architecture, documentation, and coverage improvements

- Add architecture improvements:
  - ArrayAccessorInterface and DotArrayAccessor for decoupled array access
  - MaskingOrchestrator for single-responsibility masking coordination
  - GdprProcessorBuilder for fluent configuration
  - MaskingPluginInterface and AbstractMaskingPlugin for plugin architecture
  - PluginAwareProcessor for plugin hook execution
  - AuditLoggerFactory for instance-based audit logger creation

- Add advanced features:
  - SerializedDataProcessor for handling print_r/var_export/serialize output
  - KAnonymizer with GeneralizationStrategy for GDPR k-anonymity
  - RetentionPolicy for configurable data retention periods
  - StreamingProcessor for memory-efficient large log processing

- Add comprehensive documentation:
  - docs/performance-tuning.md - benchmarking, optimization, caching
  - docs/troubleshooting.md - common issues and solutions
  - docs/logging-integrations.md - ELK, Graylog, Datadog, etc.
  - docs/plugin-development.md - complete plugin development guide

- Improve test coverage (84.41% → 85.07%):
  - ConditionalRuleFactoryInstanceTest (100% coverage)
  - GdprProcessorBuilderEdgeCasesTest (100% coverage)
  - StrategyEdgeCasesTest for ReDoS detection and type parsing
  - 78 new tests, 119 new assertions

- Update TODO.md with current statistics:
  - 141 PHP files, 1,346 tests, 85.07% line coverage

* chore: tests, update actions, sonarcloud issues

* chore: rector

* fix: more sonarcloud fixes

* chore: more fixes

* refactor: copilot review fix

* chore: rector
This commit is contained in:
2025-12-22 13:38:18 +02:00
committed by GitHub
parent b1eb567b92
commit 8866daaf33
112 changed files with 15391 additions and 607 deletions

View File

@@ -0,0 +1,48 @@
<?php
declare(strict_types=1);
namespace Ivuorinen\MonologGdprFilter\Anonymization;
/**
* Represents a generalization strategy for k-anonymity.
*
* @api
*/
final class GeneralizationStrategy
{
/**
* @var callable(mixed):string
*/
private $generalizer;
/**
* @param callable(mixed):string $generalizer Function that generalizes a value
* @param string $type Type identifier for the strategy
*/
public function __construct(
callable $generalizer,
private readonly string $type = 'custom'
) {
$this->generalizer = $generalizer;
}
/**
* Apply the generalization to a value.
*
* @param mixed $value The value to generalize
* @return string The generalized value
*/
public function generalize(mixed $value): string
{
return ($this->generalizer)($value);
}
/**
* Get the strategy type.
*/
public function getType(): string
{
return $this->type;
}
}

View File

@@ -0,0 +1,212 @@
<?php
declare(strict_types=1);
namespace Ivuorinen\MonologGdprFilter\Anonymization;
/**
* K-Anonymity implementation for GDPR compliance.
*
* K-anonymity is a privacy model ensuring that each record in a dataset
* is indistinguishable from at least k-1 other records with respect to
* certain identifying attributes (quasi-identifiers).
*
* Common use cases:
* - Age generalization (25 -> "20-29")
* - Location generalization (specific address -> region)
* - Date generalization (specific date -> month/year)
*
* @api
*/
final class KAnonymizer
{
/**
* @var array<string,GeneralizationStrategy>
*/
private array $strategies = [];
/**
* @var callable(string,mixed,mixed):void|null
*/
private $auditLogger;
/**
* @param callable(string,mixed,mixed):void|null $auditLogger Optional audit logger
*/
public function __construct(?callable $auditLogger = null)
{
$this->auditLogger = $auditLogger;
}
/**
* Register a generalization strategy for a field.
*/
public function registerStrategy(string $field, GeneralizationStrategy $strategy): self
{
$this->strategies[$field] = $strategy;
return $this;
}
/**
* Register an age generalization strategy.
*
* @param int $rangeSize Size of age ranges (e.g., 10 for 20-29, 30-39)
*/
public function registerAgeStrategy(string $field, int $rangeSize = 10): self
{
$this->strategies[$field] = new GeneralizationStrategy(
static function (mixed $value) use ($rangeSize): string {
$age = (int) $value;
$lowerBound = (int) floor($age / $rangeSize) * $rangeSize;
$upperBound = $lowerBound + $rangeSize - 1;
return "{$lowerBound}-{$upperBound}";
},
'age'
);
return $this;
}
/**
* Register a date generalization strategy.
*
* @param string $precision 'year', 'month', 'quarter'
*/
public function registerDateStrategy(string $field, string $precision = 'month'): self
{
$this->strategies[$field] = new GeneralizationStrategy(
static function (mixed $value) use ($precision): string {
if (!$value instanceof \DateTimeInterface) {
$value = new \DateTimeImmutable((string) $value);
}
return match ($precision) {
'year' => $value->format('Y'),
'quarter' => $value->format('Y') . '-Q' . (int) ceil((int) $value->format('n') / 3),
default => $value->format('Y-m'),
};
},
'date'
);
return $this;
}
/**
* Register a location/ZIP code generalization strategy.
*
* @param int $prefixLength Number of characters to keep
*/
public function registerLocationStrategy(string $field, int $prefixLength = 3): self
{
$this->strategies[$field] = new GeneralizationStrategy(
static function (mixed $value) use ($prefixLength): string {
$value = (string) $value;
if (strlen($value) <= $prefixLength) {
return $value;
}
return substr($value, 0, $prefixLength) . str_repeat('*', strlen($value) - $prefixLength);
},
'location'
);
return $this;
}
/**
* Register a numeric range generalization strategy.
*
* @param int $rangeSize Size of numeric ranges
*/
public function registerNumericRangeStrategy(string $field, int $rangeSize = 10): self
{
$this->strategies[$field] = new GeneralizationStrategy(
static function (mixed $value) use ($rangeSize): string {
$num = (int) $value;
$lowerBound = (int) floor($num / $rangeSize) * $rangeSize;
$upperBound = $lowerBound + $rangeSize - 1;
return "{$lowerBound}-{$upperBound}";
},
'numeric_range'
);
return $this;
}
/**
* Register a custom generalization strategy.
*
* @param callable(mixed):string $generalizer
*/
public function registerCustomStrategy(string $field, callable $generalizer): self
{
$this->strategies[$field] = new GeneralizationStrategy($generalizer, 'custom');
return $this;
}
/**
* Anonymize a single record.
*
* @param array<string,mixed> $record The record to anonymize
* @return array<string,mixed> The anonymized record
*/
public function anonymize(array $record): array
{
foreach ($this->strategies as $field => $strategy) {
if (isset($record[$field])) {
$original = $record[$field];
$record[$field] = $strategy->generalize($original);
if ($this->auditLogger !== null && $record[$field] !== $original) {
($this->auditLogger)(
"k-anonymity.{$field}",
$original,
$record[$field]
);
}
}
}
return $record;
}
/**
* Anonymize a batch of records.
*
* @param list<array<string,mixed>> $records
* @return list<array<string,mixed>>
*/
public function anonymizeBatch(array $records): array
{
return array_map($this->anonymize(...), $records);
}
/**
* Get registered strategies.
*
* @return array<string,GeneralizationStrategy>
*/
public function getStrategies(): array
{
return $this->strategies;
}
/**
* Set the audit logger.
*
* @param callable(string,mixed,mixed):void|null $auditLogger
*/
public function setAuditLogger(?callable $auditLogger): void
{
$this->auditLogger = $auditLogger;
}
/**
* Create a pre-configured anonymizer for common GDPR scenarios.
*/
public static function createGdprDefault(?callable $auditLogger = null): self
{
return (new self($auditLogger))
->registerAgeStrategy('age')
->registerDateStrategy('birth_date', 'year')
->registerDateStrategy('created_at', 'month')
->registerLocationStrategy('zip_code', 3)
->registerLocationStrategy('postal_code', 3);
}
}