1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2026-05-22 10:15:16 +02:00

Improve poor man's regular expression common prefix generator.

The S3 driver depends on being able to generate a common prefix to limit the number of results from list commands, which saves on bandwidth.

The prior implementation could be tricked by an expression like ^ABC|^DEF where there is more than one possible prefix.  To fix this disallow any prefix when another ^ anchor is found in the expression.  [^ and \^ are OK since they are not anchors.

Note that this was not an active bug because there are currently no expressions with multiple ^ anchors.
This commit is contained in:
David Steele
2020-02-28 17:41:34 -05:00
parent 3bbead5480
commit 8ec41efb04
3 changed files with 29 additions and 9 deletions
+19 -7
View File
@@ -199,11 +199,7 @@ regExpMatchOne(const String *expression, const String *string)
FUNCTION_TEST_RETURN(result); FUNCTION_TEST_RETURN(result);
} }
/*********************************************************************************************************************************** /**********************************************************************************************************************************/
Return the constant first part of the regular expression if it has a beginning anchor
This works by scanning the string until the first special regex character is found so escaped characters will not be included.
***********************************************************************************************************************************/
String * String *
regExpPrefix(const String *expression) regExpPrefix(const String *expression)
{ {
@@ -216,11 +212,13 @@ regExpPrefix(const String *expression)
// Only generate prefix if expression is defined and has a beginning anchor // Only generate prefix if expression is defined and has a beginning anchor
if (expression != NULL && strPtr(expression)[0] == '^') if (expression != NULL && strPtr(expression)[0] == '^')
{ {
const char *expressionZ = strPtr(expression);
size_t expressionSize = strSize(expression);
unsigned int expressionIdx = 1; unsigned int expressionIdx = 1;
for (; expressionIdx < strSize(expression); expressionIdx++) for (; expressionIdx < expressionSize; expressionIdx++)
{ {
char expressionChr = strPtr(expression)[expressionIdx]; char expressionChr = expressionZ[expressionIdx];
// Search for characters that will end the prefix // Search for characters that will end the prefix
if (expressionChr == '.' || expressionChr == '^' || expressionChr == '$' || expressionChr == '*' || if (expressionChr == '.' || expressionChr == '^' || expressionChr == '$' || expressionChr == '*' ||
@@ -234,8 +232,22 @@ regExpPrefix(const String *expression)
// Will there be any characters in the prefix? // Will there be any characters in the prefix?
if (expressionIdx > 1) if (expressionIdx > 1)
{
// Search again and make sure there is not another begin anchor. If so we won't be able to use the prefix
unsigned int anchorIdx = expressionIdx;
for (anchorIdx = 1; anchorIdx < expressionSize; anchorIdx++)
{
// [^ and \^ are not begin anchors
if (expressionZ[anchorIdx] == '^' && expressionZ[anchorIdx - 1] != '[' && expressionZ[anchorIdx - 1] != '\\')
break;
}
// If another begin anchor was not found then return the prefix
if (anchorIdx == expressionSize)
result = strSubN(expression, 1, expressionIdx - 1); result = strSubN(expression, 1, expressionIdx - 1);
} }
}
FUNCTION_TEST_RETURN(result); FUNCTION_TEST_RETURN(result);
} }
+4
View File
@@ -22,6 +22,10 @@ bool regExpMatch(RegExp *this, const String *string);
void regExpFree(RegExp *this); void regExpFree(RegExp *this);
bool regExpMatchOne(const String *expression, const String *string); bool regExpMatchOne(const String *expression, const String *string);
// Return the common prefix of a regular expression, if it has one. The common prefix consists of fixed characters that must always
// be found at the beginning of the string to be matched. Escaped characters will not be included in the prefix. If there is no
// usable prefix then NULL is returned.
String *regExpPrefix(const String *expression); String *regExpPrefix(const String *expression);
/*********************************************************************************************************************************** /***********************************************************************************************************************************
+5 -1
View File
@@ -61,7 +61,6 @@ testRun(void)
TEST_RESULT_PTR(regExpPrefix(strNew("ABC")), NULL, "expression without begin anchor has no prefix"); TEST_RESULT_PTR(regExpPrefix(strNew("ABC")), NULL, "expression without begin anchor has no prefix");
TEST_RESULT_PTR(regExpPrefix(strNew("^.")), NULL, "expression with no regular character has no prefix"); TEST_RESULT_PTR(regExpPrefix(strNew("^.")), NULL, "expression with no regular character has no prefix");
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC^")), "ABC", "prefix stops at special character");
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC$")), "ABC", "prefix stops at special character"); TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC$")), "ABC", "prefix stops at special character");
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC*")), "ABC", "prefix stops at special character"); TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC*")), "ABC", "prefix stops at special character");
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC+")), "ABC", "prefix stops at special character"); TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC+")), "ABC", "prefix stops at special character");
@@ -74,6 +73,11 @@ testRun(void)
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC|")), "ABC", "prefix stops at special character"); TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC|")), "ABC", "prefix stops at special character");
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC\\")), "ABC", "prefix stops at special character"); TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC\\")), "ABC", "prefix stops at special character");
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC^")), NULL, "no prefix when more than one begin anchor");
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC|^DEF")), NULL, "no prefix when more than one begin anchor");
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC[^DEF]")), "ABC", "prefix when ^ used for exclusion");
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC\\^DEF]")), "ABC", "prefix when ^ is escaped");
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABCDEF")), "ABCDEF", "prefix is entire expression"); TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABCDEF")), "ABCDEF", "prefix is entire expression");
} }