From 8ec41efb04ee45854d18ffaf765e4a4800dd3879 Mon Sep 17 00:00:00 2001 From: David Steele Date: Fri, 28 Feb 2020 17:41:34 -0500 Subject: [PATCH] Improve poor man's regular expression common prefix generator. The S3 driver depends on being able to generate a common prefix to limit the number of results from list commands, which saves on bandwidth. The prior implementation could be tricked by an expression like ^ABC|^DEF where there is more than one possible prefix. To fix this disallow any prefix when another ^ anchor is found in the expression. [^ and \^ are OK since they are not anchors. Note that this was not an active bug because there are currently no expressions with multiple ^ anchors. --- src/common/regExp.c | 28 ++++++++++++++++++++-------- src/common/regExp.h | 4 ++++ test/src/module/common/regExpTest.c | 6 +++++- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/common/regExp.c b/src/common/regExp.c index 6f76dd092..1fe93e66f 100644 --- a/src/common/regExp.c +++ b/src/common/regExp.c @@ -199,11 +199,7 @@ regExpMatchOne(const String *expression, const String *string) FUNCTION_TEST_RETURN(result); } -/*********************************************************************************************************************************** -Return the constant first part of the regular expression if it has a beginning anchor - -This works by scanning the string until the first special regex character is found so escaped characters will not be included. -***********************************************************************************************************************************/ +/**********************************************************************************************************************************/ String * regExpPrefix(const String *expression) { @@ -216,11 +212,13 @@ regExpPrefix(const String *expression) // Only generate prefix if expression is defined and has a beginning anchor if (expression != NULL && strPtr(expression)[0] == '^') { + const char *expressionZ = strPtr(expression); + size_t expressionSize = strSize(expression); unsigned int expressionIdx = 1; - for (; expressionIdx < strSize(expression); expressionIdx++) + for (; expressionIdx < expressionSize; expressionIdx++) { - char expressionChr = strPtr(expression)[expressionIdx]; + char expressionChr = expressionZ[expressionIdx]; // Search for characters that will end the prefix if (expressionChr == '.' || expressionChr == '^' || expressionChr == '$' || expressionChr == '*' || @@ -234,7 +232,21 @@ regExpPrefix(const String *expression) // Will there be any characters in the prefix? if (expressionIdx > 1) - result = strSubN(expression, 1, expressionIdx - 1); + { + // Search again and make sure there is not another begin anchor. If so we won't be able to use the prefix + unsigned int anchorIdx = expressionIdx; + + for (anchorIdx = 1; anchorIdx < expressionSize; anchorIdx++) + { + // [^ and \^ are not begin anchors + if (expressionZ[anchorIdx] == '^' && expressionZ[anchorIdx - 1] != '[' && expressionZ[anchorIdx - 1] != '\\') + break; + } + + // If another begin anchor was not found then return the prefix + if (anchorIdx == expressionSize) + result = strSubN(expression, 1, expressionIdx - 1); + } } FUNCTION_TEST_RETURN(result); diff --git a/src/common/regExp.h b/src/common/regExp.h index c59b6f2ae..149b1ec3d 100644 --- a/src/common/regExp.h +++ b/src/common/regExp.h @@ -22,6 +22,10 @@ bool regExpMatch(RegExp *this, const String *string); void regExpFree(RegExp *this); bool regExpMatchOne(const String *expression, const String *string); + +// Return the common prefix of a regular expression, if it has one. The common prefix consists of fixed characters that must always +// be found at the beginning of the string to be matched. Escaped characters will not be included in the prefix. If there is no +// usable prefix then NULL is returned. String *regExpPrefix(const String *expression); /*********************************************************************************************************************************** diff --git a/test/src/module/common/regExpTest.c b/test/src/module/common/regExpTest.c index e8f397ab6..e7245b025 100644 --- a/test/src/module/common/regExpTest.c +++ b/test/src/module/common/regExpTest.c @@ -61,7 +61,6 @@ testRun(void) TEST_RESULT_PTR(regExpPrefix(strNew("ABC")), NULL, "expression without begin anchor has no prefix"); TEST_RESULT_PTR(regExpPrefix(strNew("^.")), NULL, "expression with no regular character has no prefix"); - TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC^")), "ABC", "prefix stops at special character"); TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC$")), "ABC", "prefix stops at special character"); TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC*")), "ABC", "prefix stops at special character"); TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC+")), "ABC", "prefix stops at special character"); @@ -74,6 +73,11 @@ testRun(void) TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC|")), "ABC", "prefix stops at special character"); TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC\\")), "ABC", "prefix stops at special character"); + TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC^")), NULL, "no prefix when more than one begin anchor"); + TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC|^DEF")), NULL, "no prefix when more than one begin anchor"); + TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC[^DEF]")), "ABC", "prefix when ^ used for exclusion"); + TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC\\^DEF]")), "ABC", "prefix when ^ is escaped"); + TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABCDEF")), "ABCDEF", "prefix is entire expression"); }