1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-01-18 04:58:51 +02:00

Improve performance of list requests on S3.

Any beginning literal portion of a filter expression is used to generate a search prefix which often helps keep the request small enough to avoid rate limiting.

Suggested by Mihail Shvein.
This commit is contained in:
David Steele 2017-10-20 14:10:16 -04:00
parent f4524aeaa9
commit 1f120f3fce
4 changed files with 47 additions and 2 deletions

View File

@ -31,6 +31,16 @@
</release-item>
</release-bug-list>
<release-feature-list>
<release-item>
<release-item-contributor-list>
<release-item-ideator id="shvein.mihail"/>
</release-item-contributor-list>
<p>Improve performance of list requests on S3. Any beginning literal portion of a filter expression is used to generate a search prefix which often helps keep the request small enough to avoid rate limiting.</p>
</release-item>
</release-feature-list>
<release-refactor-list>
<release-item>
<p>Update C naming conventions.</p>
@ -3508,6 +3518,11 @@
<contributor-id type="github">sharmay</contributor-id>
</contributor>
<contributor id="shvein.mihail">
<contributor-name-display>Mihail Shvein</contributor-name-display>
<contributor-id type="github">M1hacka</contributor-id>
</contributor>
<contributor id="smith.greg">
<contributor-name-display>Greg Smith</contributor-name-display>
<contributor-id type="github">gregscds</contributor-id>

View File

@ -200,4 +200,25 @@ sub stringSplit
push @EXPORT, qw(stringSplit);
####################################################################################################################################
# regexPrefix - return the constant first part of the regex if it has a beginning anchor
#
# This works by scanning the string until the first special regex character is found so escaped characters will not be included.
####################################################################################################################################
sub regexPrefix
{
my $strExpression = shift;
my $strPrefix;
# Only generate prefix if expression is defined and has a beginning anchor
if (defined($strExpression) && $strExpression =~ /^\^/)
{
($strPrefix) = substr($strExpression, 1) =~ /^[^\.\^\$\*\+\-\?\(\)\[\]\{\}\\\|\ ]+/g;
}
return $strPrefix;
}
push @EXPORT, qw(regexPrefix);
1;

View File

@ -290,7 +290,8 @@ sub list
);
# Get file list
my $rstryFileList = $self->driver()->list($self->pathGet($strPathExp), {bIgnoreMissing => $bIgnoreMissing});
my $rstryFileList = $self->driver()->list(
$self->pathGet($strPathExp), {strExpression => $strExpression, bIgnoreMissing => $bIgnoreMissing});
# Apply expression if defined
if (defined($strExpression))

View File

@ -16,6 +16,7 @@ use File::Basename qw(basename dirname);
use pgBackRest::Common::Exception;
use pgBackRest::Common::Log;
use pgBackRest::Common::String;
use pgBackRest::Common::Xml;
use pgBackRest::Storage::S3::FileRead;
use pgBackRest::Storage::S3::FileWrite;
@ -243,15 +244,22 @@ sub list
(
$strOperation,
$strPath,
$strExpression
) =
logDebugParam
(
__PACKAGE__ . '->list', \@_,
{name => 'strPath', trace => true},
{name => 'strExpression', optional => true, trace => true},
);
# Use the regexp to build a prefix to shorten searches
my $strPrefix = regexPrefix($strExpression);
# Get list using manifest function
my @stryFileList = grep(!/^\.$/i, keys(%{$self->manifest($strPath, {bRecurse => false})}));
my @stryFileList = grep(
!/^\.$/i, keys(%{$self->manifest(
$strPath . (defined($strPrefix) ? "/${strPrefix}" : ''), {bRecurse => false, bPath => !defined($strPrefix)})}));
# Return from function and log return values if any
return logDebugReturn