Merge pull request #9598 from Sobuno/NewQueryParserV3

New Query Parser for Search Engine and Rules
This commit is contained in:
James Cole
2025-01-05 07:32:55 +01:00
committed by GitHub
21 changed files with 843 additions and 126 deletions

View File

@@ -39,22 +39,14 @@ use FireflyIII\Repositories\Budget\BudgetRepositoryInterface;
use FireflyIII\Repositories\Category\CategoryRepositoryInterface;
use FireflyIII\Repositories\Tag\TagRepositoryInterface;
use FireflyIII\Repositories\UserGroups\Currency\CurrencyRepositoryInterface;
use FireflyIII\Support\Search\QueryParser\QueryParserInterface;
use FireflyIII\Support\Search\QueryParser\Node;
use FireflyIII\Support\Search\QueryParser\FieldNode;
use FireflyIII\Support\Search\QueryParser\StringNode;
use FireflyIII\Support\Search\QueryParser\NodeGroup;
use FireflyIII\Support\ParseDateString;
use FireflyIII\User;
use Gdbots\QueryParser\Enum\BoolOperator;
use Gdbots\QueryParser\Node\Date;
use Gdbots\QueryParser\Node\Emoji;
use Gdbots\QueryParser\Node\Emoticon;
use Gdbots\QueryParser\Node\Field;
use Gdbots\QueryParser\Node\Hashtag;
use Gdbots\QueryParser\Node\Mention;
use Gdbots\QueryParser\Node\Node;
use Gdbots\QueryParser\Node\Numbr;
use Gdbots\QueryParser\Node\Phrase;
use Gdbots\QueryParser\Node\Subquery;
use Gdbots\QueryParser\Node\Url;
use Gdbots\QueryParser\Node\Word;
use Gdbots\QueryParser\QueryParser;
use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Support\Collection;
@@ -131,6 +123,16 @@ class OperatorQuerySearch implements SearchInterface
return implode(' ', $this->words);
}
public function getWords(): array
{
return $this->words;
}
public function getExcludedWords(): array
{
return $this->prohibitedWords;
}
/**
* @throws FireflyException
*/
@@ -145,10 +147,11 @@ class OperatorQuerySearch implements SearchInterface
public function parseQuery(string $query): void
{
app('log')->debug(sprintf('Now in parseQuery(%s)', $query));
$parser = new QueryParser();
$parser = app(QueryParserInterface::class);
app('log')->debug(sprintf('Using %s as implementation for QueryParserInterface', get_class($parser)));
try {
$query1 = $parser->parse($query);
$parsedQuery = $parser->parse($query);
} catch (\LogicException|\TypeError $e) {
app('log')->error($e->getMessage());
app('log')->error(sprintf('Could not parse search: "%s".', $query));
@@ -156,10 +159,8 @@ class OperatorQuerySearch implements SearchInterface
throw new FireflyException(sprintf('Invalid search value "%s". See the logs.', e($query)), 0, $e);
}
app('log')->debug(sprintf('Found %d node(s)', count($query1->getNodes())));
foreach ($query1->getNodes() as $searchNode) {
$this->handleSearchNode($searchNode);
}
app('log')->debug(sprintf('Found %d node(s) at top-level', count($parsedQuery->getNodes())));
$this->handleSearchNode($parsedQuery, $parsedQuery->isProhibited(false));
// add missing information
$this->collector->withBillInformation();
@@ -173,81 +174,93 @@ class OperatorQuerySearch implements SearchInterface
*
* @SuppressWarnings("PHPMD.CyclomaticComplexity")
*/
private function handleSearchNode(Node $searchNode): void
private function handleSearchNode(Node $node, $flipProhibitedFlag): void
{
$class = get_class($searchNode);
app('log')->debug(sprintf('Now in handleSearchNode(%s)', $class));
app('log')->debug(sprintf('Now in handleSearchNode(%s)', get_class($node)));
switch (true) {
case $node instanceof StringNode:
$this->handleStringNode($node, $flipProhibitedFlag);
break;
case $node instanceof FieldNode:
$this->handleFieldNode($node, $flipProhibitedFlag);
break;
case $node instanceof NodeGroup:
$this->handleNodeGroup($node, $flipProhibitedFlag);
break;
switch ($class) {
default:
app('log')->error(sprintf('Cannot handle node %s', $class));
app('log')->error(sprintf('Cannot handle node %s', get_class($node)));
throw new FireflyException(sprintf('Firefly III search can\'t handle "%s"-nodes', get_class($node)));
}
}
throw new FireflyException(sprintf('Firefly III search can\'t handle "%s"-nodes', $class));
private function handleNodeGroup(NodeGroup $node, $flipProhibitedFlag): void
{
$prohibited = $node->isProhibited($flipProhibitedFlag);
case Subquery::class:
// loop all notes in subquery:
foreach ($searchNode->getNodes() as $subNode) { // @phpstan-ignore-line PHPStan thinks getNodes() does not exist but it does.
$this->handleSearchNode($subNode); // let's hope it's not too recursive
}
foreach ($node->getNodes() as $subNode) {
$this->handleSearchNode($subNode, $prohibited);
}
}
break;
case Word::class:
case Phrase::class:
case Numbr::class:
case Url::class:
case Date::class:
case Hashtag::class:
case Emoticon::class:
case Emoji::class:
case Mention::class:
$allWords = (string) $searchNode->getValue();
app('log')->debug(sprintf('Add words "%s" to search string, because Node class is "%s"', $allWords, $class));
$this->words[] = $allWords;
break;
private function handleStringNode(StringNode $node, $flipProhibitedFlag): void
{
$string = (string) $node->getValue();
case Field::class:
app('log')->debug(sprintf('Now handle Node class %s', $class));
$prohibited = $node->isProhibited($flipProhibitedFlag);
/** @var Field $searchNode */
// used to search for x:y
$operator = strtolower($searchNode->getValue());
$value = $searchNode->getNode()->getValue();
$prohibited = BoolOperator::PROHIBITED === $searchNode->getBoolOperator();
$context = config(sprintf('search.operators.%s.needs_context', $operator));
if($prohibited) {
app('log')->debug(sprintf('Exclude string "%s" from search string', $string));
$this->prohibitedWords[] = $string;
} else {
app('log')->debug(sprintf('Add string "%s" to search string', $string));
$this->words[] = $string;
}
}
// is an operator that needs no context, and value is false, then prohibited = true.
if ('false' === $value && in_array($operator, $this->validOperators, true) && false === $context && !$prohibited) {
$prohibited = true;
$value = 'true';
}
// if the operator is prohibited, but the value is false, do an uno reverse
if ('false' === $value && $prohibited && in_array($operator, $this->validOperators, true) && false === $context) {
$prohibited = false;
$value = 'true';
}
/**
* @throws FireflyException
*/
private function handleFieldNode(FieldNode $node, $flipProhibitedFlag): void
{
$operator = strtolower($node->getOperator());
$value = $node->getValue();
$prohibited = $node->isProhibited($flipProhibitedFlag);
// must be valid operator:
if (
in_array($operator, $this->validOperators, true)
&& $this->updateCollector($operator, (string) $value, $prohibited)) {
$this->operators->push(
[
'type' => self::getRootOperator($operator),
'value' => (string) $value,
'prohibited' => $prohibited,
]
);
app('log')->debug(sprintf('Added operator type "%s"', $operator));
}
if (!in_array($operator, $this->validOperators, true)) {
app('log')->debug(sprintf('Added INVALID operator type "%s"', $operator));
$this->invalidOperators[] = [
'type' => $operator,
'value' => (string) $value,
];
}
$context = config(sprintf('search.operators.%s.needs_context', $operator));
// is an operator that needs no context, and value is false, then prohibited = true.
if ('false' === $value && in_array($operator, $this->validOperators, true) && false === $context && !$prohibited) {
$prohibited = true;
$value = 'true';
}
// if the operator is prohibited, but the value is false, do an uno reverse
if ('false' === $value && $prohibited && in_array($operator, $this->validOperators, true) && false === $context) {
$prohibited = false;
$value = 'true';
}
// must be valid operator:
if (in_array($operator, $this->validOperators, true)) {
if ($this->updateCollector($operator, (string)$value, $prohibited)) {
$this->operators->push([
'type' => self::getRootOperator($operator),
'value' => (string)$value,
'prohibited' => $prohibited,
]);
app('log')->debug(sprintf('Added operator type "%s"', $operator));
}
} else {
app('log')->debug(sprintf('Added INVALID operator type "%s"', $operator));
$this->invalidOperators[] = [
'type' => $operator,
'value' => (string)$value,
];
}
}
@@ -2766,7 +2779,7 @@ class OperatorQuerySearch implements SearchInterface
public function searchTransactions(): LengthAwarePaginator
{
$this->parseTagInstructions();
if (0 === count($this->getWords()) && 0 === count($this->getOperators())) {
if (0 === count($this->getWords()) && 0 === count($this->getExcludedWords()) && 0 === count($this->getOperators())) {
return new LengthAwarePaginator([], 0, 5, 1);
}
@@ -2818,11 +2831,6 @@ class OperatorQuerySearch implements SearchInterface
}
}
public function getWords(): array
{
return $this->words;
}
public function setDate(Carbon $date): void
{
$this->date = $date;

View File

@@ -0,0 +1,31 @@
<?php
declare(strict_types=1);
namespace FireflyIII\Support\Search\QueryParser;
/**
* Represents a field operator with value (e.g. amount:100)
*/
class FieldNode extends Node
{
private string $operator;
private string $value;
public function __construct(string $operator, string $value, bool $prohibited = false)
{
$this->operator = $operator;
$this->value = $value;
$this->prohibited = $prohibited;
}
public function getOperator(): string
{
return $this->operator;
}
public function getValue(): string
{
return $this->value;
}
}

View File

@@ -0,0 +1,81 @@
<?php
declare(strict_types=1);
namespace FireflyIII\Support\Search\QueryParser;
use FireflyIII\Exceptions\FireflyException;
use Gdbots\QueryParser\QueryParser as BaseQueryParser;
use Gdbots\QueryParser\Node as GdbotsNode;
use Gdbots\QueryParser\Enum\BoolOperator;
class GdbotsQueryParser implements QueryParserInterface
{
private BaseQueryParser $parser;
public function __construct()
{
$this->parser = new BaseQueryParser();
}
/**
* @return NodeGroup
* @throws FireflyException
*/
public function parse(string $query): NodeGroup
{
try {
$result = $this->parser->parse($query);
$nodes = array_map(
fn(GdbotsNode\Node $node) => $this->convertNode($node),
$result->getNodes()
);
return new NodeGroup($nodes);
} catch (\LogicException|\TypeError $e) {
fwrite(STDERR, "Setting up GdbotsQueryParserTest\n");
dd('Creating GdbotsQueryParser');
app('log')->error($e->getMessage());
app('log')->error(sprintf('Could not parse search: "%s".', $query));
throw new FireflyException(sprintf('Invalid search value "%s". See the logs.', e($query)), 0, $e);
}
}
private function convertNode(GdbotsNode\Node $node): Node
{
switch (true) {
case $node instanceof GdbotsNode\Word:
return new StringNode($node->getValue());
case $node instanceof GdbotsNode\Field:
return new FieldNode(
$node->getValue(),
(string) $node->getNode()->getValue(),
BoolOperator::PROHIBITED === $node->getBoolOperator()
);
case $node instanceof GdbotsNode\Subquery:
return new NodeGroup(
array_map(
fn(GdbotsNode\Node $subNode) => $this->convertNode($subNode),
$node->getNodes()
)
);
case $node instanceof GdbotsNode\Phrase:
case $node instanceof GdbotsNode\Numbr:
case $node instanceof GdbotsNode\Date:
case $node instanceof GdbotsNode\Url:
case $node instanceof GdbotsNode\Hashtag:
case $node instanceof GdbotsNode\Mention:
case $node instanceof GdbotsNode\Emoticon:
case $node instanceof GdbotsNode\Emoji:
return new StringNode((string) $node->getValue());
default:
throw new FireflyException(
sprintf('Unsupported node type: %s', get_class($node))
);
}
}
}

View File

@@ -0,0 +1,31 @@
<?php
declare(strict_types=1);
namespace FireflyIII\Support\Search\QueryParser;
/**
* Base class for all nodes
*/
abstract class Node
{
protected bool $prohibited;
/**
* Returns the prohibited status of the node, optionally inverted based on flipFlag
*
* Flipping is used when a node is inside a NodeGroup that has a prohibited status itself, causing inversion of the query parts inside
*
* @param bool $flipFlag When true, inverts the prohibited status
* @return bool The (potentially inverted) prohibited status
*/
public function isProhibited(bool $flipFlag): bool
{
if ($flipFlag === true) {
return !$this->prohibited;
} else {
return $this->prohibited;
}
}
}

View File

@@ -0,0 +1,34 @@
<?php
declare(strict_types=1);
namespace FireflyIII\Support\Search\QueryParser;
/**
* Represents a group of nodes.
*
* NodeGroups can be nested inside other NodeGroups, making them subqueries
*/
class NodeGroup extends Node
{
/** @var Node[] */
private array $nodes;
/**
* @param Node[] $nodes
* @param bool $prohibited
*/
public function __construct(array $nodes, bool $prohibited = false)
{
$this->nodes = $nodes;
$this->prohibited = $prohibited;
}
/**
* @return Node[]
*/
public function getNodes(): array
{
return $this->nodes;
}
}

View File

@@ -0,0 +1,177 @@
<?php
declare(strict_types=1);
namespace FireflyIII\Support\Search\QueryParser;
/**
* Represents a result from parsing a query node
*
* Contains the parsed node and a flag indicating if this is the end of the query.
* Used to handle subquery parsing and termination.
*/
class NodeResult
{
public function __construct(
public readonly ?Node $node,
public readonly bool $isSubqueryEnd
) {
}
}
/**
* Single-pass parser that processes query strings into structured nodes.
* Scans each character once (O(n)) to build field searches, quoted strings,
* prohibited terms and nested subqueries without backtracking.
*/
class QueryParser implements QueryParserInterface
{
private string $query;
private int $position = 0;
/** @return NodeGroup */
public function parse(string $query): NodeGroup
{
$this->query = $query;
$this->position = 0;
return $this->buildNodeGroup(false);
}
/** @return NodeGroup */
private function buildNodeGroup(bool $isSubquery, bool $prohibited = false): NodeGroup
{
$nodes = [];
$nodeResult = $this->buildNextNode($isSubquery);
while ($nodeResult->node !== null) {
$nodes[] = $nodeResult->node;
if($nodeResult->isSubqueryEnd) {
break;
}
$nodeResult = $this->buildNextNode($isSubquery);
}
return new NodeGroup($nodes, $prohibited);
}
private function buildNextNode(bool $isSubquery): NodeResult
{
$tokenUnderConstruction = '';
$inQuotes = false;
$fieldName = '';
$prohibited = false;
while ($this->position < strlen($this->query)) {
$char = $this->query[$this->position];
// If we're in a quoted string, we treat all characters except another quote as ordinary characters
if ($inQuotes) {
if ($char !== '"') {
$tokenUnderConstruction .= $char;
$this->position++;
continue;
} else {
$this->position++;
return new NodeResult(
$this->createNode($tokenUnderConstruction, $fieldName, $prohibited),
false
);
}
}
switch ($char) {
case '-':
if ($tokenUnderConstruction === '') {
// A minus sign at the beginning of a token indicates prohibition
$prohibited = true;
} else {
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case '"':
if ($tokenUnderConstruction === '') {
// A quote sign at the beginning of a token indicates the start of a quoted string
$inQuotes = true;
} else {
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case '(':
if ($tokenUnderConstruction === '') {
// A left parentheses at the beginning of a token indicates the start of a subquery
$this->position++;
return new NodeResult($this->buildNodeGroup(true, $prohibited),
false
);
} else {
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case ')':
// A right parentheses while in a subquery means the subquery ended,
// thus also signaling the end of any node currently being built
if ($isSubquery) {
$this->position++;
return new NodeResult(
$tokenUnderConstruction !== ''
? $this->createNode($tokenUnderConstruction, $fieldName, $prohibited)
: null,
true
);
}
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
break;
case ':':
if ($tokenUnderConstruction !== '') {
// If we meet a colon with a left-hand side string, we know we're in a field and are about to set up the value
$fieldName = $tokenUnderConstruction;
$tokenUnderConstruction = '';
} else {
// In any other location, it's just a normal character
$tokenUnderConstruction .= $char;
}
break;
case ' ':
// A space indicates the end of a token construction if non-empty, otherwise it's just ignored
if ($tokenUnderConstruction !== '') {
$this->position++;
return new NodeResult(
$this->createNode($tokenUnderConstruction, $fieldName, $prohibited),
false
);
}
break;
default:
$tokenUnderConstruction .= $char;
}
$this->position++;
}
$finalNode = $tokenUnderConstruction !== '' || $fieldName !== ''
? $this->createNode($tokenUnderConstruction, $fieldName, $prohibited)
: null;
return new NodeResult($finalNode, true);
}
private function createNode(string $token, string $fieldName, bool $prohibited): Node
{
if (strlen($fieldName) > 0) {
return new FieldNode(trim($fieldName), trim($token), $prohibited);
}
return new StringNode(trim($token), $prohibited);
}
}

View File

@@ -0,0 +1,13 @@
<?php
declare(strict_types=1);
namespace FireflyIII\Support\Search\QueryParser;
interface QueryParserInterface
{
/**
* @return NodeGroup
*/
public function parse(string $query): NodeGroup;
}

View File

@@ -0,0 +1,24 @@
<?php
declare(strict_types=1);
namespace FireflyIII\Support\Search\QueryParser;
/**
* Represents a string in the search query, meaning either a single-word without spaces or a quote-delimited string
*/
class StringNode extends Node
{
private string $value;
public function __construct(string $value, bool $prohibited = false)
{
$this->value = $value;
$this->prohibited = $prohibited;
}
public function getValue(): string
{
return $this->value;
}
}

View File

@@ -38,8 +38,10 @@ interface SearchInterface
public function getModifiers(): Collection;
public function getOperators(): Collection;
public function getWords(): array;
public function getWordsAsString(): string;
public function getExcludedWords(): array;
public function hasModifiers(): bool;