You've already forked pgbackrest
mirror of
https://github.com/pgbackrest/pgbackrest.git
synced 2026-05-22 10:15:16 +02:00
Improve poor man's regular expression common prefix generator.
The S3 driver depends on being able to generate a common prefix to limit the number of results from list commands, which saves on bandwidth. The prior implementation could be tricked by an expression like ^ABC|^DEF where there is more than one possible prefix. To fix this disallow any prefix when another ^ anchor is found in the expression. [^ and \^ are OK since they are not anchors. Note that this was not an active bug because there are currently no expressions with multiple ^ anchors.
This commit is contained in:
+19
-7
@@ -199,11 +199,7 @@ regExpMatchOne(const String *expression, const String *string)
|
|||||||
FUNCTION_TEST_RETURN(result);
|
FUNCTION_TEST_RETURN(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
/***********************************************************************************************************************************
|
/**********************************************************************************************************************************/
|
||||||
Return the constant first part of the regular expression if it has a beginning anchor
|
|
||||||
|
|
||||||
This works by scanning the string until the first special regex character is found so escaped characters will not be included.
|
|
||||||
***********************************************************************************************************************************/
|
|
||||||
String *
|
String *
|
||||||
regExpPrefix(const String *expression)
|
regExpPrefix(const String *expression)
|
||||||
{
|
{
|
||||||
@@ -216,11 +212,13 @@ regExpPrefix(const String *expression)
|
|||||||
// Only generate prefix if expression is defined and has a beginning anchor
|
// Only generate prefix if expression is defined and has a beginning anchor
|
||||||
if (expression != NULL && strPtr(expression)[0] == '^')
|
if (expression != NULL && strPtr(expression)[0] == '^')
|
||||||
{
|
{
|
||||||
|
const char *expressionZ = strPtr(expression);
|
||||||
|
size_t expressionSize = strSize(expression);
|
||||||
unsigned int expressionIdx = 1;
|
unsigned int expressionIdx = 1;
|
||||||
|
|
||||||
for (; expressionIdx < strSize(expression); expressionIdx++)
|
for (; expressionIdx < expressionSize; expressionIdx++)
|
||||||
{
|
{
|
||||||
char expressionChr = strPtr(expression)[expressionIdx];
|
char expressionChr = expressionZ[expressionIdx];
|
||||||
|
|
||||||
// Search for characters that will end the prefix
|
// Search for characters that will end the prefix
|
||||||
if (expressionChr == '.' || expressionChr == '^' || expressionChr == '$' || expressionChr == '*' ||
|
if (expressionChr == '.' || expressionChr == '^' || expressionChr == '$' || expressionChr == '*' ||
|
||||||
@@ -234,8 +232,22 @@ regExpPrefix(const String *expression)
|
|||||||
|
|
||||||
// Will there be any characters in the prefix?
|
// Will there be any characters in the prefix?
|
||||||
if (expressionIdx > 1)
|
if (expressionIdx > 1)
|
||||||
|
{
|
||||||
|
// Search again and make sure there is not another begin anchor. If so we won't be able to use the prefix
|
||||||
|
unsigned int anchorIdx = expressionIdx;
|
||||||
|
|
||||||
|
for (anchorIdx = 1; anchorIdx < expressionSize; anchorIdx++)
|
||||||
|
{
|
||||||
|
// [^ and \^ are not begin anchors
|
||||||
|
if (expressionZ[anchorIdx] == '^' && expressionZ[anchorIdx - 1] != '[' && expressionZ[anchorIdx - 1] != '\\')
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If another begin anchor was not found then return the prefix
|
||||||
|
if (anchorIdx == expressionSize)
|
||||||
result = strSubN(expression, 1, expressionIdx - 1);
|
result = strSubN(expression, 1, expressionIdx - 1);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
FUNCTION_TEST_RETURN(result);
|
FUNCTION_TEST_RETURN(result);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,6 +22,10 @@ bool regExpMatch(RegExp *this, const String *string);
|
|||||||
void regExpFree(RegExp *this);
|
void regExpFree(RegExp *this);
|
||||||
|
|
||||||
bool regExpMatchOne(const String *expression, const String *string);
|
bool regExpMatchOne(const String *expression, const String *string);
|
||||||
|
|
||||||
|
// Return the common prefix of a regular expression, if it has one. The common prefix consists of fixed characters that must always
|
||||||
|
// be found at the beginning of the string to be matched. Escaped characters will not be included in the prefix. If there is no
|
||||||
|
// usable prefix then NULL is returned.
|
||||||
String *regExpPrefix(const String *expression);
|
String *regExpPrefix(const String *expression);
|
||||||
|
|
||||||
/***********************************************************************************************************************************
|
/***********************************************************************************************************************************
|
||||||
|
|||||||
@@ -61,7 +61,6 @@ testRun(void)
|
|||||||
TEST_RESULT_PTR(regExpPrefix(strNew("ABC")), NULL, "expression without begin anchor has no prefix");
|
TEST_RESULT_PTR(regExpPrefix(strNew("ABC")), NULL, "expression without begin anchor has no prefix");
|
||||||
TEST_RESULT_PTR(regExpPrefix(strNew("^.")), NULL, "expression with no regular character has no prefix");
|
TEST_RESULT_PTR(regExpPrefix(strNew("^.")), NULL, "expression with no regular character has no prefix");
|
||||||
|
|
||||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC^")), "ABC", "prefix stops at special character");
|
|
||||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC$")), "ABC", "prefix stops at special character");
|
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC$")), "ABC", "prefix stops at special character");
|
||||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC*")), "ABC", "prefix stops at special character");
|
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC*")), "ABC", "prefix stops at special character");
|
||||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC+")), "ABC", "prefix stops at special character");
|
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC+")), "ABC", "prefix stops at special character");
|
||||||
@@ -74,6 +73,11 @@ testRun(void)
|
|||||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC|")), "ABC", "prefix stops at special character");
|
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC|")), "ABC", "prefix stops at special character");
|
||||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC\\")), "ABC", "prefix stops at special character");
|
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC\\")), "ABC", "prefix stops at special character");
|
||||||
|
|
||||||
|
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC^")), NULL, "no prefix when more than one begin anchor");
|
||||||
|
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC|^DEF")), NULL, "no prefix when more than one begin anchor");
|
||||||
|
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC[^DEF]")), "ABC", "prefix when ^ used for exclusion");
|
||||||
|
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABC\\^DEF]")), "ABC", "prefix when ^ is escaped");
|
||||||
|
|
||||||
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABCDEF")), "ABCDEF", "prefix is entire expression");
|
TEST_RESULT_STR_Z(regExpPrefix(strNew("^ABCDEF")), "ABCDEF", "prefix is entire expression");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user