Files
firefly-iii/app/Support/Search/QueryParser/QueryParser.php

178 lines
6.2 KiB
PHP
Raw Normal View History

2025-01-01 04:17:55 +01:00
<?php
declare(strict_types=1);
2025-01-02 22:17:56 +01:00
namespace FireflyIII\Support\Search\QueryParser;
2025-01-01 04:17:55 +01:00
/**
* Represents a result from parsing a query node
*
* Contains the parsed node and a flag indicating if this is the end of the query.
* Used to handle subquery parsing and termination.
*/
class NodeResult
{
public function __construct(
public readonly ?Node $node,
2025-01-01 05:13:12 +01:00
public readonly bool $isSubqueryEnd
) {
}
}
/**
* Single-pass parser that processes query strings into structured nodes.
* Scans each character once (O(n)) to build field searches, quoted strings,
* prohibited terms and nested subqueries without backtracking.
2025-01-01 04:17:55 +01:00
*/
2025-01-01 04:28:06 +01:00
class QueryParser implements QueryParserInterface
2025-01-01 04:17:55 +01:00
{
private string $query;
private int $position = 0;
/** @return NodeGroup */
public function parse(string $query): NodeGroup
2025-01-01 04:17:55 +01:00
{
$this->query = $query;
$this->position = 0;
return $this->buildNodeGroup(false);
2025-01-01 04:17:55 +01:00
}
/** @return NodeGroup */
private function buildNodeGroup(bool $isSubquery, bool $prohibited = false): NodeGroup
2025-01-01 04:17:55 +01:00
{
$nodes = [];
$nodeResult = $this->buildNextNode($isSubquery);
2025-01-01 04:17:55 +01:00
while ($nodeResult->node !== null) {
$nodes[] = $nodeResult->node;
2025-01-01 05:13:12 +01:00
if($nodeResult->isSubqueryEnd) {
break;
}
$nodeResult = $this->buildNextNode($isSubquery);
2025-01-01 04:17:55 +01:00
}
return new NodeGroup($nodes, $prohibited);
2025-01-01 04:17:55 +01:00
}
private function buildNextNode(bool $isSubquery): NodeResult
2025-01-01 04:17:55 +01:00
{
$tokenUnderConstruction = '';
$inQuotes = false;
$fieldName = '';
$prohibited = false;
while ($this->position < strlen($this->query)) {
$char = $this->query[$this->position];
// If we're in a quoted string, we treat all characters except another quote as ordinary characters
if ($inQuotes) {
if ($char !== '"') {
2025-01-01 04:17:55 +01:00
$tokenUnderConstruction .= $char;
$this->position++;
continue;
} else {
$this->position++;
return new NodeResult(
$this->createNode($tokenUnderConstruction, $fieldName, $prohibited),
false
);
2025-01-01 04:17:55 +01:00
}
}
switch ($char) {
case '-':
if ($tokenUnderConstruction === '') {
// A minus sign at the beginning of a token indicates prohibition
$prohibited = true;
} else {
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case '"':
if ($tokenUnderConstruction === '') {
// A quote sign at the beginning of a token indicates the start of a quoted string
$inQuotes = true;
} else {
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case '(':
if ($tokenUnderConstruction === '') {
// A left parentheses at the beginning of a token indicates the start of a subquery
$this->position++;
return new NodeResult($this->buildNodeGroup(true, $prohibited),
false
);
2025-01-01 04:17:55 +01:00
} else {
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case ')':
// A right parentheses while in a subquery means the subquery ended,
// thus also signaling the end of any node currently being built
if ($isSubquery) {
2025-01-01 04:17:55 +01:00
$this->position++;
return new NodeResult(
$tokenUnderConstruction !== ''
? $this->createNode($tokenUnderConstruction, $fieldName, $prohibited)
: null,
true
);
2025-01-01 04:17:55 +01:00
}
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
break;
2025-01-01 04:17:55 +01:00
case ':':
if ($tokenUnderConstruction !== '') {
// If we meet a colon with a left-hand side string, we know we're in a field and are about to set up the value
$fieldName = $tokenUnderConstruction;
$tokenUnderConstruction = '';
} else {
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case ' ':
// A space indicates the end of a token construction if non-empty, otherwise it's just ignored
if ($tokenUnderConstruction !== '') {
$this->position++;
return new NodeResult(
$this->createNode($tokenUnderConstruction, $fieldName, $prohibited),
false
);
2025-01-01 04:17:55 +01:00
}
break;
default:
$tokenUnderConstruction .= $char;
}
$this->position++;
}
$finalNode = $tokenUnderConstruction !== '' || $fieldName !== ''
? $this->createNode($tokenUnderConstruction, $fieldName, $prohibited)
: null;
return new NodeResult($finalNode, true);
2025-01-01 04:17:55 +01:00
}
private function createNode(string $token, string $fieldName, bool $prohibited): Node
{
if (strlen($fieldName) > 0) {
return new FieldNode(trim($fieldName), trim($token), $prohibited);
2025-01-01 04:17:55 +01:00
}
return new StringNode(trim($token), $prohibited);
2025-01-01 04:17:55 +01:00
}
}