You've already forked pgbackrest
mirror of
https://github.com/pgbackrest/pgbackrest.git
synced 2026-05-22 10:15:16 +02:00
Improve poor man's regular expression common prefix generator.
The S3 driver depends on being able to generate a common prefix to limit the number of results from list commands, which saves on bandwidth. The prior implementation could be tricked by an expression like ^ABC|^DEF where there is more than one possible prefix. To fix this disallow any prefix when another ^ anchor is found in the expression. [^ and \^ are OK since they are not anchors. Note that this was not an active bug because there are currently no expressions with multiple ^ anchors.
This commit is contained in:
+20
-8
@@ -199,11 +199,7 @@ regExpMatchOne(const String *expression, const String *string)
|
||||
FUNCTION_TEST_RETURN(result);
|
||||
}
|
||||
|
||||
/***********************************************************************************************************************************
|
||||
Return the constant first part of the regular expression if it has a beginning anchor
|
||||
|
||||
This works by scanning the string until the first special regex character is found so escaped characters will not be included.
|
||||
***********************************************************************************************************************************/
|
||||
/**********************************************************************************************************************************/
|
||||
String *
|
||||
regExpPrefix(const String *expression)
|
||||
{
|
||||
@@ -216,11 +212,13 @@ regExpPrefix(const String *expression)
|
||||
// Only generate prefix if expression is defined and has a beginning anchor
|
||||
if (expression != NULL && strPtr(expression)[0] == '^')
|
||||
{
|
||||
const char *expressionZ = strPtr(expression);
|
||||
size_t expressionSize = strSize(expression);
|
||||
unsigned int expressionIdx = 1;
|
||||
|
||||
for (; expressionIdx < strSize(expression); expressionIdx++)
|
||||
for (; expressionIdx < expressionSize; expressionIdx++)
|
||||
{
|
||||
char expressionChr = strPtr(expression)[expressionIdx];
|
||||
char expressionChr = expressionZ[expressionIdx];
|
||||
|
||||
// Search for characters that will end the prefix
|
||||
if (expressionChr == '.' || expressionChr == '^' || expressionChr == '$' || expressionChr == '*' ||
|
||||
@@ -234,7 +232,21 @@ regExpPrefix(const String *expression)
|
||||
|
||||
// Will there be any characters in the prefix?
|
||||
if (expressionIdx > 1)
|
||||
result = strSubN(expression, 1, expressionIdx - 1);
|
||||
{
|
||||
// Search again and make sure there is not another begin anchor. If so we won't be able to use the prefix
|
||||
unsigned int anchorIdx = expressionIdx;
|
||||
|
||||
for (anchorIdx = 1; anchorIdx < expressionSize; anchorIdx++)
|
||||
{
|
||||
// [^ and \^ are not begin anchors
|
||||
if (expressionZ[anchorIdx] == '^' && expressionZ[anchorIdx - 1] != '[' && expressionZ[anchorIdx - 1] != '\\')
|
||||
break;
|
||||
}
|
||||
|
||||
// If another begin anchor was not found then return the prefix
|
||||
if (anchorIdx == expressionSize)
|
||||
result = strSubN(expression, 1, expressionIdx - 1);
|
||||
}
|
||||
}
|
||||
|
||||
FUNCTION_TEST_RETURN(result);
|
||||
|
||||
@@ -22,6 +22,10 @@ bool regExpMatch(RegExp *this, const String *string);
|
||||
void regExpFree(RegExp *this);
|
||||
|
||||
bool regExpMatchOne(const String *expression, const String *string);
|
||||
|
||||
// Return the common prefix of a regular expression, if it has one. The common prefix consists of fixed characters that must always
|
||||
// be found at the beginning of the string to be matched. Escaped characters will not be included in the prefix. If there is no
|
||||
// usable prefix then NULL is returned.
|
||||
String *regExpPrefix(const String *expression);
|
||||
|
||||
/***********************************************************************************************************************************
|
||||
|
||||
@@ -61,7 +61,6 @@ testRun(void)
|
||||
TEST_RESULT_PTR(regExpPrefix(strNew("ABC")), NULL, "expression without begin anchor has no prefix");
|
||||
TEST_RESULT_PTR(regExpPrefix(strNew("^.")), NULL, "expression with no regular character has no prefix");
|
||||
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC^")), "ABC", "prefix stops at special character");
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC$")), "ABC", "prefix stops at special character");
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC*")), "ABC", "prefix stops at special character");
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC+")), "ABC", "prefix stops at special character");
|
||||
@@ -74,6 +73,11 @@ testRun(void)
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC|")), "ABC", "prefix stops at special character");
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC\\")), "ABC", "prefix stops at special character");
|
||||
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC^")), NULL, "no prefix when more than one begin anchor");
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC|^DEF")), NULL, "no prefix when more than one begin anchor");
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC[^DEF]")), "ABC", "prefix when ^ used for exclusion");
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC\\^DEF]")), "ABC", "prefix when ^ is escaped");
|
||||
|
||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABCDEF")), "ABCDEF", "prefix is entire expression");
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user