2025-01-01 04:17:55 +01:00
|
|
|
<?php
|
|
|
|
|
2025-01-05 07:40:30 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* QueryParser.php
|
|
|
|
* Copyright (c) 2025 https://github.com/Sobuno
|
|
|
|
*
|
|
|
|
* This file is part of Firefly III (https://github.com/firefly-iii).
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU Affero General Public License as
|
|
|
|
* published by the Free Software Foundation, either version 3 of the
|
|
|
|
* License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Affero General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
|
|
* along with this program. If not, see https://www.gnu.org/licenses/.
|
|
|
|
*/
|
|
|
|
|
2025-01-01 04:17:55 +01:00
|
|
|
declare(strict_types=1);
|
|
|
|
|
2025-01-02 22:17:56 +01:00
|
|
|
namespace FireflyIII\Support\Search\QueryParser;
|
2025-01-01 04:17:55 +01:00
|
|
|
|
2025-01-05 09:18:03 +01:00
|
|
|
use Illuminate\Support\Facades\Log;
|
|
|
|
|
2025-01-01 05:08:01 +01:00
|
|
|
/**
|
|
|
|
* Single-pass parser that processes query strings into structured nodes.
|
|
|
|
* Scans each character once (O(n)) to build field searches, quoted strings,
|
|
|
|
* prohibited terms and nested subqueries without backtracking.
|
2025-01-01 04:17:55 +01:00
|
|
|
*/
|
2025-01-01 04:28:06 +01:00
|
|
|
class QueryParser implements QueryParserInterface
|
2025-01-01 04:17:55 +01:00
|
|
|
{
|
|
|
|
private string $query;
|
2025-01-05 09:18:03 +01:00
|
|
|
private int $position = 0;
|
2025-01-01 04:17:55 +01:00
|
|
|
|
2025-01-02 23:19:21 +01:00
|
|
|
public function parse(string $query): NodeGroup
|
2025-01-01 04:17:55 +01:00
|
|
|
{
|
2025-01-05 09:18:03 +01:00
|
|
|
Log::debug(sprintf('Parsing query in QueryParser: "%s"', $query));
|
|
|
|
$this->query = $query;
|
2025-01-01 04:17:55 +01:00
|
|
|
$this->position = 0;
|
2025-01-05 09:23:02 +01:00
|
|
|
|
2025-01-02 23:19:21 +01:00
|
|
|
return $this->buildNodeGroup(false);
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
|
|
|
|
2025-01-02 23:19:21 +01:00
|
|
|
private function buildNodeGroup(bool $isSubquery, bool $prohibited = false): NodeGroup
|
2025-01-01 04:17:55 +01:00
|
|
|
{
|
2025-01-05 09:18:03 +01:00
|
|
|
$nodes = [];
|
2025-01-01 05:08:01 +01:00
|
|
|
$nodeResult = $this->buildNextNode($isSubquery);
|
2025-01-01 04:17:55 +01:00
|
|
|
|
2025-01-05 09:23:02 +01:00
|
|
|
while (null !== $nodeResult->node) {
|
|
|
|
$nodes[] = $nodeResult->node;
|
2025-01-05 09:18:03 +01:00
|
|
|
if ($nodeResult->isSubqueryEnd) {
|
2025-01-01 05:08:01 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
$nodeResult = $this->buildNextNode($isSubquery);
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
|
|
|
|
2025-01-02 23:19:21 +01:00
|
|
|
return new NodeGroup($nodes, $prohibited);
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
|
|
|
|
2025-01-01 05:08:01 +01:00
|
|
|
private function buildNextNode(bool $isSubquery): NodeResult
|
2025-01-01 04:17:55 +01:00
|
|
|
{
|
|
|
|
$tokenUnderConstruction = '';
|
2025-01-05 09:18:03 +01:00
|
|
|
$inQuotes = false;
|
|
|
|
$fieldName = '';
|
|
|
|
$prohibited = false;
|
2025-01-01 04:17:55 +01:00
|
|
|
|
|
|
|
while ($this->position < strlen($this->query)) {
|
|
|
|
$char = $this->query[$this->position];
|
2025-02-08 08:18:18 +01:00
|
|
|
// Log::debug(sprintf('Char #%d: %s', $this->position, $char));
|
2025-01-01 04:17:55 +01:00
|
|
|
|
|
|
|
// If we're in a quoted string, we treat all characters except another quote as ordinary characters
|
|
|
|
if ($inQuotes) {
|
2025-01-05 09:23:02 +01:00
|
|
|
if ('"' !== $char) {
|
2025-01-01 04:17:55 +01:00
|
|
|
$tokenUnderConstruction .= $char;
|
2025-01-05 09:23:02 +01:00
|
|
|
++$this->position;
|
|
|
|
|
2025-01-01 04:17:55 +01:00
|
|
|
continue;
|
2025-01-05 07:45:29 +01:00
|
|
|
}
|
2025-01-05 09:18:03 +01:00
|
|
|
// char is "
|
2025-01-05 09:23:02 +01:00
|
|
|
++$this->position;
|
|
|
|
|
2025-01-05 09:18:03 +01:00
|
|
|
return new NodeResult(
|
|
|
|
$this->createNode($tokenUnderConstruction, $fieldName, $prohibited),
|
|
|
|
false
|
|
|
|
);
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
switch ($char) {
|
|
|
|
case '-':
|
2025-01-05 09:23:02 +01:00
|
|
|
if ('' === $tokenUnderConstruction) {
|
2025-01-01 04:17:55 +01:00
|
|
|
// A minus sign at the beginning of a token indicates prohibition
|
2025-01-06 06:55:34 +01:00
|
|
|
// Log::debug('Indicate prohibition');
|
2025-01-01 04:17:55 +01:00
|
|
|
$prohibited = true;
|
2025-01-05 07:45:29 +01:00
|
|
|
}
|
2025-01-05 09:23:02 +01:00
|
|
|
if ('' !== $tokenUnderConstruction) {
|
2025-01-01 04:17:55 +01:00
|
|
|
// In any other location, it's just a normal character
|
|
|
|
$tokenUnderConstruction .= $char;
|
|
|
|
}
|
2025-01-05 09:23:02 +01:00
|
|
|
|
2025-01-01 04:17:55 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '"':
|
2025-01-05 09:23:02 +01:00
|
|
|
if ('' === $tokenUnderConstruction) {
|
2025-01-01 04:17:55 +01:00
|
|
|
// A quote sign at the beginning of a token indicates the start of a quoted string
|
|
|
|
$inQuotes = true;
|
2025-01-05 07:45:29 +01:00
|
|
|
}
|
2025-01-05 09:23:02 +01:00
|
|
|
if ('' !== $tokenUnderConstruction) {
|
2025-01-01 04:17:55 +01:00
|
|
|
// In any other location, it's just a normal character
|
|
|
|
$tokenUnderConstruction .= $char;
|
|
|
|
}
|
2025-01-05 09:23:02 +01:00
|
|
|
|
2025-01-01 04:17:55 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '(':
|
2025-01-05 09:23:02 +01:00
|
|
|
if ('' === $tokenUnderConstruction) {
|
2025-01-01 04:17:55 +01:00
|
|
|
// A left parentheses at the beginning of a token indicates the start of a subquery
|
2025-01-05 09:23:02 +01:00
|
|
|
++$this->position;
|
|
|
|
|
|
|
|
return new NodeResult(
|
|
|
|
$this->buildNodeGroup(true, $prohibited),
|
|
|
|
false
|
2025-01-01 05:08:01 +01:00
|
|
|
);
|
2025-01-05 07:45:29 +01:00
|
|
|
}
|
2025-01-05 09:18:03 +01:00
|
|
|
// In any other location, it's just a normal character
|
|
|
|
$tokenUnderConstruction .= $char;
|
2025-01-05 09:23:02 +01:00
|
|
|
|
2025-01-01 04:17:55 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ')':
|
2025-01-01 05:08:01 +01:00
|
|
|
// A right parentheses while in a subquery means the subquery ended,
|
|
|
|
// thus also signaling the end of any node currently being built
|
|
|
|
if ($isSubquery) {
|
2025-01-05 09:23:02 +01:00
|
|
|
++$this->position;
|
|
|
|
|
2025-01-01 05:08:01 +01:00
|
|
|
return new NodeResult(
|
2025-01-05 09:23:02 +01:00
|
|
|
'' !== $tokenUnderConstruction
|
2025-01-05 09:18:03 +01:00
|
|
|
? $this->createNode($tokenUnderConstruction, $fieldName, $prohibited)
|
|
|
|
: null,
|
2025-01-01 05:08:01 +01:00
|
|
|
true
|
|
|
|
);
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
2025-01-01 05:08:01 +01:00
|
|
|
// In any other location, it's just a normal character
|
|
|
|
$tokenUnderConstruction .= $char;
|
2025-01-05 09:23:02 +01:00
|
|
|
|
2025-01-01 05:08:01 +01:00
|
|
|
break;
|
2025-01-01 04:17:55 +01:00
|
|
|
|
|
|
|
|
|
|
|
case ':':
|
2025-02-06 08:56:58 +01:00
|
|
|
$skipNext = false;
|
2025-02-09 07:02:12 +01:00
|
|
|
if ('' === $tokenUnderConstruction) {
|
2025-01-01 04:17:55 +01:00
|
|
|
// In any other location, it's just a normal character
|
|
|
|
$tokenUnderConstruction .= $char;
|
2025-02-06 08:56:58 +01:00
|
|
|
$skipNext = true;
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
2025-02-09 07:02:12 +01:00
|
|
|
if ('' !== $tokenUnderConstruction && !$skipNext) { // @phpstan-ignore-line
|
2025-02-06 08:56:58 +01:00
|
|
|
Log::debug(sprintf('Turns out that "%s" is a field name. Reset the token.', $tokenUnderConstruction));
|
|
|
|
// If we meet a colon with a left-hand side string, we know we're in a field and are about to set up the value
|
|
|
|
$fieldName = $tokenUnderConstruction;
|
|
|
|
$tokenUnderConstruction = '';
|
|
|
|
}
|
|
|
|
|
2025-01-05 09:23:02 +01:00
|
|
|
|
2025-01-01 04:17:55 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ' ':
|
|
|
|
// A space indicates the end of a token construction if non-empty, otherwise it's just ignored
|
2025-01-05 09:23:02 +01:00
|
|
|
if ('' !== $tokenUnderConstruction) {
|
|
|
|
++$this->position;
|
|
|
|
|
2025-01-01 05:08:01 +01:00
|
|
|
return new NodeResult(
|
|
|
|
$this->createNode($tokenUnderConstruction, $fieldName, $prohibited),
|
|
|
|
false
|
|
|
|
);
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
2025-01-05 09:23:02 +01:00
|
|
|
|
2025-01-01 04:17:55 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
$tokenUnderConstruction .= $char;
|
|
|
|
}
|
|
|
|
|
2025-01-05 09:23:02 +01:00
|
|
|
++$this->position;
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
|
|
|
|
2025-01-05 09:23:02 +01:00
|
|
|
$finalNode = '' !== $tokenUnderConstruction || '' !== $fieldName
|
2025-01-05 09:18:03 +01:00
|
|
|
? $this->createNode($tokenUnderConstruction, $fieldName, $prohibited)
|
|
|
|
: null;
|
2025-01-02 23:19:21 +01:00
|
|
|
|
|
|
|
return new NodeResult($finalNode, true);
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
private function createNode(string $token, string $fieldName, bool $prohibited): Node
|
|
|
|
{
|
2025-01-05 09:23:02 +01:00
|
|
|
if ('' !== $fieldName) {
|
2025-01-05 09:18:03 +01:00
|
|
|
Log::debug(sprintf('Create FieldNode %s:%s (%s)', $fieldName, $token, var_export($prohibited, true)));
|
2025-01-06 06:55:34 +01:00
|
|
|
$token = ltrim($token, ':"');
|
|
|
|
$token = rtrim($token, '"');
|
2025-01-05 09:23:02 +01:00
|
|
|
|
2025-01-02 23:19:21 +01:00
|
|
|
return new FieldNode(trim($fieldName), trim($token), $prohibited);
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
2025-01-05 09:18:03 +01:00
|
|
|
Log::debug(sprintf('Create StringNode "%s" (%s)', $token, var_export($prohibited, true)));
|
2025-01-05 09:23:02 +01:00
|
|
|
|
2025-01-02 23:19:21 +01:00
|
|
|
return new StringNode(trim($token), $prohibited);
|
2025-01-01 04:17:55 +01:00
|
|
|
}
|
|
|
|
}
|