Files
firefly-iii/app/Support/Search/QueryParser/QueryParser.php

211 lines
7.6 KiB
PHP
Raw Normal View History

2025-01-01 04:17:55 +01:00
<?php
2025-01-05 07:40:30 +01:00
/*
* QueryParser.php
* Copyright (c) 2025 https://github.com/Sobuno
*
* This file is part of Firefly III (https://github.com/firefly-iii).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
2025-01-01 04:17:55 +01:00
declare(strict_types=1);
2025-01-02 22:17:56 +01:00
namespace FireflyIII\Support\Search\QueryParser;
2025-01-01 04:17:55 +01:00
2025-01-05 09:18:03 +01:00
use Illuminate\Support\Facades\Log;
/**
* Single-pass parser that processes query strings into structured nodes.
* Scans each character once (O(n)) to build field searches, quoted strings,
* prohibited terms and nested subqueries without backtracking.
2025-01-01 04:17:55 +01:00
*/
2025-01-01 04:28:06 +01:00
class QueryParser implements QueryParserInterface
2025-01-01 04:17:55 +01:00
{
private string $query;
2025-01-05 09:18:03 +01:00
private int $position = 0;
2025-01-01 04:17:55 +01:00
public function parse(string $query): NodeGroup
2025-01-01 04:17:55 +01:00
{
2025-01-05 09:18:03 +01:00
Log::debug(sprintf('Parsing query in QueryParser: "%s"', $query));
$this->query = $query;
2025-01-01 04:17:55 +01:00
$this->position = 0;
return $this->buildNodeGroup(false);
2025-01-01 04:17:55 +01:00
}
private function buildNodeGroup(bool $isSubquery, bool $prohibited = false): NodeGroup
2025-01-01 04:17:55 +01:00
{
2025-01-05 09:18:03 +01:00
$nodes = [];
$nodeResult = $this->buildNextNode($isSubquery);
2025-01-01 04:17:55 +01:00
while (null !== $nodeResult->node) {
$nodes[] = $nodeResult->node;
2025-01-05 09:18:03 +01:00
if ($nodeResult->isSubqueryEnd) {
break;
}
$nodeResult = $this->buildNextNode($isSubquery);
2025-01-01 04:17:55 +01:00
}
return new NodeGroup($nodes, $prohibited);
2025-01-01 04:17:55 +01:00
}
private function buildNextNode(bool $isSubquery): NodeResult
2025-01-01 04:17:55 +01:00
{
$tokenUnderConstruction = '';
2025-01-05 09:18:03 +01:00
$inQuotes = false;
$fieldName = '';
$prohibited = false;
2025-01-01 04:17:55 +01:00
while ($this->position < strlen($this->query)) {
$char = $this->query[$this->position];
// Log::debug(sprintf('Char #%d: %s', $this->position, $char));
2025-01-01 04:17:55 +01:00
// If we're in a quoted string, we treat all characters except another quote as ordinary characters
if ($inQuotes) {
if ('"' !== $char) {
2025-01-01 04:17:55 +01:00
$tokenUnderConstruction .= $char;
++$this->position;
2025-01-01 04:17:55 +01:00
continue;
}
2025-01-05 09:18:03 +01:00
// char is "
++$this->position;
2025-01-05 09:18:03 +01:00
return new NodeResult(
$this->createNode($tokenUnderConstruction, $fieldName, $prohibited),
false
);
2025-01-01 04:17:55 +01:00
}
switch ($char) {
case '-':
if ('' === $tokenUnderConstruction) {
2025-01-01 04:17:55 +01:00
// A minus sign at the beginning of a token indicates prohibition
2025-01-06 06:55:34 +01:00
// Log::debug('Indicate prohibition');
2025-01-01 04:17:55 +01:00
$prohibited = true;
}
if ('' !== $tokenUnderConstruction) {
2025-01-01 04:17:55 +01:00
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
2025-01-01 04:17:55 +01:00
break;
case '"':
if ('' === $tokenUnderConstruction) {
2025-01-01 04:17:55 +01:00
// A quote sign at the beginning of a token indicates the start of a quoted string
$inQuotes = true;
}
if ('' !== $tokenUnderConstruction) {
2025-01-01 04:17:55 +01:00
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
2025-01-01 04:17:55 +01:00
break;
case '(':
if ('' === $tokenUnderConstruction) {
2025-01-01 04:17:55 +01:00
// A left parentheses at the beginning of a token indicates the start of a subquery
++$this->position;
return new NodeResult(
$this->buildNodeGroup(true, $prohibited),
false
);
}
2025-01-05 09:18:03 +01:00
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
2025-01-01 04:17:55 +01:00
break;
case ')':
// A right parentheses while in a subquery means the subquery ended,
// thus also signaling the end of any node currently being built
if ($isSubquery) {
++$this->position;
return new NodeResult(
'' !== $tokenUnderConstruction
2025-01-05 09:18:03 +01:00
? $this->createNode($tokenUnderConstruction, $fieldName, $prohibited)
: null,
true
);
2025-01-01 04:17:55 +01:00
}
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
break;
2025-01-01 04:17:55 +01:00
case ':':
2025-02-06 08:56:58 +01:00
$skipNext = false;
2025-02-09 07:02:12 +01:00
if ('' === $tokenUnderConstruction) {
2025-01-01 04:17:55 +01:00
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
2025-02-06 08:56:58 +01:00
$skipNext = true;
2025-01-01 04:17:55 +01:00
}
2025-02-09 07:02:12 +01:00
if ('' !== $tokenUnderConstruction && !$skipNext) { // @phpstan-ignore-line
2025-02-06 08:56:58 +01:00
Log::debug(sprintf('Turns out that "%s" is a field name. Reset the token.', $tokenUnderConstruction));
// If we meet a colon with a left-hand side string, we know we're in a field and are about to set up the value
$fieldName = $tokenUnderConstruction;
$tokenUnderConstruction = '';
}
2025-01-01 04:17:55 +01:00
break;
case ' ':
// A space indicates the end of a token construction if non-empty, otherwise it's just ignored
if ('' !== $tokenUnderConstruction) {
++$this->position;
return new NodeResult(
$this->createNode($tokenUnderConstruction, $fieldName, $prohibited),
false
);
2025-01-01 04:17:55 +01:00
}
2025-01-01 04:17:55 +01:00
break;
default:
$tokenUnderConstruction .= $char;
}
++$this->position;
2025-01-01 04:17:55 +01:00
}
$finalNode = '' !== $tokenUnderConstruction || '' !== $fieldName
2025-01-05 09:18:03 +01:00
? $this->createNode($tokenUnderConstruction, $fieldName, $prohibited)
: null;
return new NodeResult($finalNode, true);
2025-01-01 04:17:55 +01:00
}
private function createNode(string $token, string $fieldName, bool $prohibited): Node
{
if ('' !== $fieldName) {
2025-01-05 09:18:03 +01:00
Log::debug(sprintf('Create FieldNode %s:%s (%s)', $fieldName, $token, var_export($prohibited, true)));
2025-01-06 06:55:34 +01:00
$token = ltrim($token, ':"');
$token = rtrim($token, '"');
return new FieldNode(trim($fieldName), trim($token), $prohibited);
2025-01-01 04:17:55 +01:00
}
2025-01-05 09:18:03 +01:00
Log::debug(sprintf('Create StringNode "%s" (%s)', $token, var_export($prohibited, true)));
return new StringNode(trim($token), $prohibited);
2025-01-01 04:17:55 +01:00
}
}