1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-04-13 11:30:40 +02:00

Improve GCS multi-part performance.

The prior code avoided uploading a chunk if it was not clear whether the write was complete or not. This was primarily due to the GCS documentation being very vague on what to do in the case of a zero-size chunk.

Now chunks are uploaded as they are available. This should improve performance and also reduces the diff against a future commit that absolutely requires zero-size chunks.
This commit is contained in:
David Steele 2023-09-04 10:39:19 -04:00 committed by GitHub
parent fd9c6b0e9d
commit 6cb9c40fb8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 16 deletions

View File

@ -42,6 +42,17 @@
<p>Multi-stanza check command.</p>
</release-item>
<release-item>
<github-pull-request id="2162"/>
<release-item-contributor-list>
<release-item-contributor id="david.steele"/>
<release-item-reviewer id="reid.thompson"/>
</release-item-contributor-list>
<p>Improve <proper>GCS</proper> multi-part performance.</p>
</release-item>
<release-item>
<release-item-contributor-list>
<release-item-contributor id="david.steele"/>

View File

@ -186,15 +186,18 @@ storageWriteGcsBlockAsync(StorageWriteGcs *this, bool done)
}
// Add data to md5 hash
ioFilterProcessIn(this->md5hash, this->chunkBuffer);
if (!bufEmpty(this->chunkBuffer))
ioFilterProcessIn(this->md5hash, this->chunkBuffer);
// Upload the chunk. If this is the last chunk then add the total bytes in the file to the range rather than the * added to
// prior chunks. This indicates that the resumable upload is complete.
HttpHeader *header = httpHeaderAdd(
// prior chunks. This indicates that the resumable upload is complete. If the last chunk is zero-size, then the byte range
// is * to indicate that there is no more data to upload.
HttpHeader *const header = httpHeaderAdd(
httpHeaderNew(NULL), HTTP_HEADER_CONTENT_RANGE_STR,
strNewFmt(
HTTP_HEADER_CONTENT_RANGE_BYTES " %" PRIu64 "-%" PRIu64 "/%s", this->uploadTotal,
this->uploadTotal + bufUsed(this->chunkBuffer) - 1,
HTTP_HEADER_CONTENT_RANGE_BYTES " %s/%s",
bufUsed(this->chunkBuffer) == 0 ?
"*" : zNewFmt("%" PRIu64 "-%" PRIu64, this->uploadTotal, this->uploadTotal + bufUsed(this->chunkBuffer) - 1),
done ? zNewFmt("%" PRIu64, this->uploadTotal + bufUsed(this->chunkBuffer)) : "*"));
httpQueryAdd(query, GCS_QUERY_UPLOAD_ID_STR, this->uploadId);
@ -240,14 +243,6 @@ storageWriteGcs(THIS_VOID, const Buffer *buffer)
// Continue until the write buffer has been exhausted
do
{
// If the chunk buffer is full then write it. We can't write it at the end of this loop because this might be the end of the
// input and we'd have no way to signal the end of the resumable upload when closing the file if there is no more data.
if (bufRemains(this->chunkBuffer) == 0)
{
storageWriteGcsBlockAsync(this, false);
bufUsedZero(this->chunkBuffer);
}
// Copy as many bytes as possible into the chunk buffer
const size_t bytesNext =
bufRemains(this->chunkBuffer) > bufUsed(buffer) - bytesTotal ?
@ -255,6 +250,14 @@ storageWriteGcs(THIS_VOID, const Buffer *buffer)
bufCatSub(this->chunkBuffer, buffer, bytesTotal, bytesNext);
bytesTotal += bytesNext;
// If the chunk buffer is full then write it. It is possible that this is the last chunk and it would be better to wait, but
// the chances of that are quite small so in general it is better to write now so there is less to write later.
if (bufRemains(this->chunkBuffer) == 0)
{
storageWriteGcsBlockAsync(this, false);
bufUsedZero(this->chunkBuffer);
}
}
while (bytesTotal != bufUsed(buffer));
@ -283,8 +286,6 @@ storageWriteGcsClose(THIS_VOID)
// If a resumable upload was started then finish that way
if (this->uploadId != NULL)
{
ASSERT(!bufEmpty(this->chunkBuffer));
// Write what is left in the chunk buffer
storageWriteGcsBlockAsync(this, true);
storageWriteGcsBlock(this, true);

View File

@ -595,8 +595,13 @@ testRun(void)
testRequestP(
service, HTTP_VERB_PUT, .upload = true, .noAuth = true,
.query = "fields=md5Hash%2Csize&name=file.txt&uploadType=resumable&upload_id=ulid1", .contentRange = "16-31/32",
.query = "name=file.txt&uploadType=resumable&upload_id=ulid1", .contentRange = "16-31/*",
.content = "7890123456789012");
testResponseP(service, .code = 308);
testRequestP(
service, HTTP_VERB_PUT, .upload = true, .noAuth = true,
.query = "fields=md5Hash%2Csize&name=file.txt&uploadType=resumable&upload_id=ulid1", .contentRange = "*/32");
testResponseP(service, .content = "{\"md5Hash\":\"dnF5x6K/8ZZRzpfSlMMM+w==\",\"size\":\"32\"}");
TEST_ASSIGN(write, storageNewWriteP(storage, STRDEF("file.txt")), "new write");