Skip to content

Commit

Permalink
misc: replace regex-based type parser with character-based one
Browse files Browse the repository at this point in the history
This commit introduces a complete rewrite of the first layer of the type
parser. The previous one would use regex to split a raw type in tokens,
but that led to limitations — mostly concerning quoted strings — that
are now fixed.

Example of previous limitations, now solved:

```php
// Union of strings containing space chars
(new MapperBuilder())
    ->mapper()
    ->map(
        "'foo bar'|'baz fiz'",
        'baz fiz'
    );

// Shaped array with special chars in the key
(new MapperBuilder())
    ->mapper()
    ->map(
        "array{'some & key': string}",
        ['some & key' => 'value']
    );
```
  • Loading branch information
romm committed Aug 17, 2023
1 parent 1964d41 commit 075d917
Show file tree
Hide file tree
Showing 15 changed files with 726 additions and 500 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use CuyZ\Valinor\Definition\PropertyDefinition;
use CuyZ\Valinor\Definition\Repository\AttributesRepository;
use CuyZ\Valinor\Definition\Repository\ClassDefinitionRepository;
use CuyZ\Valinor\Type\ClassType;
use CuyZ\Valinor\Type\GenericType;
use CuyZ\Valinor\Type\Parser\Exception\InvalidType;
use CuyZ\Valinor\Type\Parser\Factory\Specifications\AliasSpecification;
Expand All @@ -23,11 +24,11 @@
use CuyZ\Valinor\Type\Parser\Factory\TypeParserFactory;
use CuyZ\Valinor\Type\Parser\TypeParser;
use CuyZ\Valinor\Type\Type;
use CuyZ\Valinor\Type\ClassType;
use CuyZ\Valinor\Type\Types\UnresolvableType;
use CuyZ\Valinor\Utility\Reflection\Reflection;
use ReflectionMethod;
use ReflectionProperty;
use CuyZ\Valinor\Utility\Reflection\DocParser;

use function array_filter;
use function array_keys;
Expand Down Expand Up @@ -156,7 +157,7 @@ private function typeResolver(ClassType $type, string $targetClass): ReflectionT
private function localTypeAliases(ClassType $type): array
{
$reflection = Reflection::class($type->className());
$rawTypes = Reflection::localTypeAliases($reflection);
$rawTypes = DocParser::localTypeAliases($reflection);

$typeParser = $this->typeParser($type);

Expand All @@ -181,7 +182,7 @@ private function localTypeAliases(ClassType $type): array
private function importedTypeAliases(ClassType $type): array
{
$reflection = Reflection::class($type->className());
$importedTypesRaw = Reflection::importedTypeAliases($reflection);
$importedTypesRaw = DocParser::importedTypeAliases($reflection);

$typeParser = $this->typeParser($type);

Expand Down
28 changes: 21 additions & 7 deletions src/Definition/Repository/Reflection/ReflectionTypeResolver.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use CuyZ\Valinor\Type\Type;
use CuyZ\Valinor\Type\Types\MixedType;
use CuyZ\Valinor\Type\Types\UnresolvableType;
use CuyZ\Valinor\Utility\Reflection\DocParser;
use CuyZ\Valinor\Utility\Reflection\Reflection;
use ReflectionFunctionAbstract;
use ReflectionParameter;
Expand All @@ -23,7 +24,7 @@ public function __construct(
private TypeParser $advancedParser
) {}

public function resolveType(\ReflectionProperty|\ReflectionParameter|\ReflectionFunctionAbstract $reflection): Type
public function resolveType(ReflectionProperty|ReflectionParameter|ReflectionFunctionAbstract $reflection): Type
{
$nativeType = $this->nativeType($reflection);
$typeFromDocBlock = $this->typeFromDocBlock($reflection);
Expand Down Expand Up @@ -51,11 +52,24 @@ public function resolveType(\ReflectionProperty|\ReflectionParameter|\Reflection
return $typeFromDocBlock;
}

private function typeFromDocBlock(\ReflectionProperty|\ReflectionParameter|\ReflectionFunctionAbstract $reflection): ?Type
private function typeFromDocBlock(ReflectionProperty|ReflectionParameter|ReflectionFunctionAbstract $reflection): ?Type
{
$type = $reflection instanceof ReflectionFunctionAbstract
? Reflection::docBlockReturnType($reflection)
: Reflection::docBlockType($reflection);
if ($reflection instanceof ReflectionFunctionAbstract) {
$type = DocParser::functionReturnType($reflection);
} elseif ($reflection instanceof ReflectionProperty) {
$type = DocParser::propertyType($reflection);
} else {
$type = null;

if ($reflection->isPromoted()) {
// @phpstan-ignore-next-line / parameter is promoted so class exists for sure
$type = DocParser::propertyType($reflection->getDeclaringClass()->getProperty($reflection->name));
}

if ($type === null) {
$type = DocParser::parameterType($reflection);
}
}

if ($type === null) {
return null;
Expand All @@ -64,7 +78,7 @@ private function typeFromDocBlock(\ReflectionProperty|\ReflectionParameter|\Refl
return $this->parseType($type, $reflection, $this->advancedParser);
}

private function nativeType(\ReflectionProperty|\ReflectionParameter|\ReflectionFunctionAbstract $reflection): ?Type
private function nativeType(ReflectionProperty|ReflectionParameter|ReflectionFunctionAbstract $reflection): ?Type
{
$reflectionType = $reflection instanceof ReflectionFunctionAbstract
? $reflection->getReturnType()
Expand All @@ -83,7 +97,7 @@ private function nativeType(\ReflectionProperty|\ReflectionParameter|\Reflection
return $this->parseType($type, $reflection, $this->nativeParser);
}

private function parseType(string $raw, \ReflectionProperty|\ReflectionParameter|\ReflectionFunctionAbstract $reflection, TypeParser $parser): Type
private function parseType(string $raw, ReflectionProperty|ReflectionParameter|ReflectionFunctionAbstract $reflection, TypeParser $parser): Type
{
try {
return $parser->parse($raw);
Expand Down
4 changes: 2 additions & 2 deletions src/Type/Parser/Factory/LexingTypeParserFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ private function nativeParser(): TypeParser
{
$lexer = new NativeLexer();
$lexer = new AdvancedClassLexer($lexer, $this, $this->templateParser);
$lexer = new LexingParser($lexer);
$parser = new LexingParser($lexer);

return new CachedParser($lexer);
return new CachedParser($parser);
}
}
3 changes: 2 additions & 1 deletion src/Type/Parser/Lexer/Token/AdvancedClassNameToken.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
use CuyZ\Valinor\Type\Types\ArrayKeyType;
use CuyZ\Valinor\Type\ClassType;
use CuyZ\Valinor\Type\Types\NativeClassType;
use CuyZ\Valinor\Utility\Reflection\DocParser;
use CuyZ\Valinor\Utility\Reflection\Reflection;
use ReflectionClass;

Expand Down Expand Up @@ -182,7 +183,7 @@ private function assignGenerics(string $className, array $templates, array $gene
*/
private function parentType(ReflectionClass $reflection, ReflectionClass $parentReflection, TypeParser $typeParser): NativeClassType
{
$extendedClass = Reflection::extendedClassAnnotation($reflection);
$extendedClass = DocParser::classExtendsTypes($reflection);

if (count($extendedClass) > 1) {
throw new SeveralExtendTagsFound($reflection);
Expand Down
99 changes: 68 additions & 31 deletions src/Type/Parser/LexingParser.php
Original file line number Diff line number Diff line change
@@ -1,26 +1,77 @@
<?php

declare(strict_types=1);

namespace CuyZ\Valinor\Type\Parser;

use CuyZ\Valinor\Type\Parser\Lexer\TokenStream;
use CuyZ\Valinor\Type\Parser\Lexer\TypeLexer;
use CuyZ\Valinor\Type\Type;

use function array_filter;
use function array_map;
use function preg_split;
use function str_contains;
use function array_pop;
use function array_splice;
use function in_array;
use function str_split;
use function str_starts_with;

/** @internal */
final class LexingParser implements TypeParser
class LexingParser implements TypeParser
{
public function __construct(private TypeLexer $lexer) {}

public function parse(string $raw): Type
{
$symbols = $this->splitTokens($raw);
$operators = [' ', '|', '&', '<', '>', '[', ']', '{', '}', ':', '?', ','];

$symbols = [];
$current = null;
$quote = null;

$chars = str_split($raw);

foreach ($chars as $key => $char) {
if ($quote !== null) {
if ($char === $quote) {
if ($current !== null) {
$symbols[] = $current;
$current = null;
}

$symbols[] = $char;
$quote = null;
} else {
$current .= $char;
}
} elseif ($char === '"' || $char === "'") {
if ($current !== null) {

Check warning on line 44 in src/Type/Parser/LexingParser.php

View workflow job for this annotation

GitHub Actions / Mutation tests

Escaped Mutant for Mutator "NotIdentical": --- Original +++ New @@ @@ $current .= $char; } } elseif ($char === '"' || $char === "'") { - if ($current !== null) { + if ($current === null) { $symbols[] = $current; $current = null; }
$symbols[] = $current;
$current = null;
}

$quote = $char;
$symbols[] = $char;
} elseif (in_array($char, $operators, true)) {
if ($char === ':' && ($chars[$key - 1] ?? '') === ':') {
array_pop($symbols);
$symbols[] = '::';
continue;
}

if ($current !== null) {
$symbols[] = $current;
$current = null;
}

$symbols[] = $char;
} else {
$current .= $char;
}
}

if ($current !== null) {
$symbols[] = $current;
}

$symbols = $this->detectAnonymousClass($symbols);

$symbols = array_map('trim', $symbols);
$symbols = array_filter($symbols, static fn ($value) => $value !== '');

Expand All @@ -33,33 +84,19 @@ public function parse(string $raw): Type
}

/**
* @return string[]
* @param list<string> $symbols
* @return list<string>
*/
private function splitTokens(string $raw): array
private function detectAnonymousClass(array $symbols): array
{
if (str_contains($raw, "@anonymous\0")) {
return $this->splitTokensContainingAnonymousClass($raw);
}
foreach ($symbols as $key => $symbol) {
if (! str_starts_with($symbol, "class@anonymous\0")) {
continue;
}

/** @phpstan-ignore-next-line */
return preg_split('/(::|[\s?|&<>,\[\]{}:\'"])/', $raw, -1, PREG_SPLIT_DELIM_CAPTURE);
}
$symbols[$key] = $symbol . $symbols[$key + 1] . $symbols[$key + 2];

/**
* @return string[]
*/
private function splitTokensContainingAnonymousClass(string $raw): array
{
/** @var string[] $splits */
$splits = preg_split('/([a-zA-Z_\x7f-\xff][\\\\\w\x7f-\xff]*+@anonymous\x00.*?\.php(?:0x?|:\d++\$)[\da-fA-F]++)/', $raw, -1, PREG_SPLIT_DELIM_CAPTURE);
$symbols = [];

foreach ($splits as $symbol) {
if (str_contains($symbol, "@anonymous\0")) {
$symbols[] = $symbol;
} else {
$symbols = [...$symbols, ...$this->splitTokens($symbol)];
}
array_splice($symbols, $key + 1, 2);
}

return $symbols;
Expand Down
Loading

0 comments on commit 075d917

Please sign in to comment.