Files
firefly-iii/app/Support/Search/QueryParser/QueryParser.php

206 lines
7.5 KiB
PHP
Raw Normal View History

2025-01-01 04:17:55 +01:00
<?php
2025-01-05 07:40:30 +01:00
/*
* QueryParser.php
* Copyright (c) 2025 https://github.com/Sobuno
*
* This file is part of Firefly III (https://github.com/firefly-iii).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
2025-01-01 04:17:55 +01:00
declare(strict_types=1);
2025-01-02 22:17:56 +01:00
namespace FireflyIII\Support\Search\QueryParser;
2025-01-01 04:17:55 +01:00
2025-01-05 09:18:03 +01:00
use Illuminate\Support\Facades\Log;
2025-01-01 04:17:55 +01:00
/**
* Represents a result from parsing a query node
*
* Contains the parsed node and a flag indicating if this is the end of the query.
* Used to handle subquery parsing and termination.
*/
class NodeResult
{
public function __construct(
public readonly ?Node $node,
2025-01-05 09:18:03 +01:00
public readonly bool $isSubqueryEnd
) {}
}
/**
* Single-pass parser that processes query strings into structured nodes.
* Scans each character once (O(n)) to build field searches, quoted strings,
* prohibited terms and nested subqueries without backtracking.
2025-01-01 04:17:55 +01:00
*/
2025-01-01 04:28:06 +01:00
class QueryParser implements QueryParserInterface
2025-01-01 04:17:55 +01:00
{
private string $query;
2025-01-05 09:18:03 +01:00
private int $position = 0;
2025-01-01 04:17:55 +01:00
/** @return NodeGroup */
public function parse(string $query): NodeGroup
2025-01-01 04:17:55 +01:00
{
2025-01-05 09:18:03 +01:00
Log::debug(sprintf('Parsing query in QueryParser: "%s"', $query));
$this->query = $query;
2025-01-01 04:17:55 +01:00
$this->position = 0;
return $this->buildNodeGroup(false);
2025-01-01 04:17:55 +01:00
}
/** @return NodeGroup */
private function buildNodeGroup(bool $isSubquery, bool $prohibited = false): NodeGroup
2025-01-01 04:17:55 +01:00
{
2025-01-05 09:18:03 +01:00
$nodes = [];
$nodeResult = $this->buildNextNode($isSubquery);
2025-01-01 04:17:55 +01:00
while ($nodeResult->node !== null) {
$nodes[] = $nodeResult->node;
2025-01-05 09:18:03 +01:00
if ($nodeResult->isSubqueryEnd) {
break;
}
$nodeResult = $this->buildNextNode($isSubquery);
2025-01-01 04:17:55 +01:00
}
return new NodeGroup($nodes, $prohibited);
2025-01-01 04:17:55 +01:00
}
private function buildNextNode(bool $isSubquery): NodeResult
2025-01-01 04:17:55 +01:00
{
$tokenUnderConstruction = '';
2025-01-05 09:18:03 +01:00
$inQuotes = false;
$fieldName = '';
$prohibited = false;
2025-01-01 04:17:55 +01:00
while ($this->position < strlen($this->query)) {
$char = $this->query[$this->position];
// If we're in a quoted string, we treat all characters except another quote as ordinary characters
if ($inQuotes) {
if ($char !== '"') {
2025-01-01 04:17:55 +01:00
$tokenUnderConstruction .= $char;
$this->position++;
continue;
}
2025-01-05 09:18:03 +01:00
// char is "
$this->position++;
return new NodeResult(
$this->createNode($tokenUnderConstruction, $fieldName, $prohibited),
false
);
2025-01-01 04:17:55 +01:00
}
switch ($char) {
case '-':
if ($tokenUnderConstruction === '') {
// A minus sign at the beginning of a token indicates prohibition
2025-01-05 09:18:03 +01:00
Log::debug('Indicate prohibition');
2025-01-01 04:17:55 +01:00
$prohibited = true;
}
if ($tokenUnderConstruction !== '') {
2025-01-01 04:17:55 +01:00
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case '"':
if ($tokenUnderConstruction === '') {
// A quote sign at the beginning of a token indicates the start of a quoted string
$inQuotes = true;
}
if ($tokenUnderConstruction !== '') {
2025-01-01 04:17:55 +01:00
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case '(':
if ($tokenUnderConstruction === '') {
// A left parentheses at the beginning of a token indicates the start of a subquery
$this->position++;
return new NodeResult($this->buildNodeGroup(true, $prohibited),
2025-01-05 09:18:03 +01:00
false
);
}
2025-01-05 09:18:03 +01:00
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
2025-01-01 04:17:55 +01:00
break;
case ')':
// A right parentheses while in a subquery means the subquery ended,
// thus also signaling the end of any node currently being built
if ($isSubquery) {
2025-01-01 04:17:55 +01:00
$this->position++;
return new NodeResult(
$tokenUnderConstruction !== ''
2025-01-05 09:18:03 +01:00
? $this->createNode($tokenUnderConstruction, $fieldName, $prohibited)
: null,
true
);
2025-01-01 04:17:55 +01:00
}
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
break;
2025-01-01 04:17:55 +01:00
case ':':
if ($tokenUnderConstruction !== '') {
// If we meet a colon with a left-hand side string, we know we're in a field and are about to set up the value
2025-01-05 09:18:03 +01:00
$fieldName = $tokenUnderConstruction;
2025-01-01 04:17:55 +01:00
$tokenUnderConstruction = '';
}
if ($tokenUnderConstruction === '') {
2025-01-01 04:17:55 +01:00
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case ' ':
// A space indicates the end of a token construction if non-empty, otherwise it's just ignored
if ($tokenUnderConstruction !== '') {
$this->position++;
return new NodeResult(
$this->createNode($tokenUnderConstruction, $fieldName, $prohibited),
false
);
2025-01-01 04:17:55 +01:00
}
break;
default:
$tokenUnderConstruction .= $char;
}
$this->position++;
}
$finalNode = $tokenUnderConstruction !== '' || $fieldName !== ''
2025-01-05 09:18:03 +01:00
? $this->createNode($tokenUnderConstruction, $fieldName, $prohibited)
: null;
return new NodeResult($finalNode, true);
2025-01-01 04:17:55 +01:00
}
private function createNode(string $token, string $fieldName, bool $prohibited): Node
{
if (strlen($fieldName) > 0) {
2025-01-05 09:18:03 +01:00
Log::debug(sprintf('Create FieldNode %s:%s (%s)', $fieldName, $token, var_export($prohibited, true)));
return new FieldNode(trim($fieldName), trim($token), $prohibited);
2025-01-01 04:17:55 +01:00
}
2025-01-05 09:18:03 +01:00
Log::debug(sprintf('Create StringNode "%s" (%s)', $token, var_export($prohibited, true)));
return new StringNode(trim($token), $prohibited);
2025-01-01 04:17:55 +01:00
}
}