1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2024-12-12 10:04:14 +02:00

Add HttpUrl object.

Parse a URL into component parts.
This commit is contained in:
David Steele 2021-03-01 13:44:47 -05:00 committed by GitHub
parent 1d77db3143
commit e64999db77
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 430 additions and 1 deletions

View File

@ -66,6 +66,14 @@
<p>Partial multi-repository implementation.</p>
</release-item>
<release-item>
<release-item-contributor-list>
<release-item-reviewer id="cynthia.shang"/>
</release-item-contributor-list>
<p>Add <code>HttpUrl</code> object.</p>
</release-item>
</release-development-list>
</release-core-list>
</release>

View File

@ -84,6 +84,7 @@ SRCS = \
common/io/http/request.c \
common/io/http/response.c \
common/io/http/session.c \
common/io/http/url.c \
common/io/io.c \
common/io/read.c \
common/io/session.c \

213
src/common/io/http/url.c Normal file
View File

@ -0,0 +1,213 @@
/***********************************************************************************************************************************
HTTP URL
***********************************************************************************************************************************/
#include "build.auto.h"
#include "common/debug.h"
#include "common/io/http/url.h"
#include "common/memContext.h"
#include "common/type/object.h"
#include "common/type/stringList.h"
#include "common/regExp.h"
/***********************************************************************************************************************************
Regular expression for URLs. This is not intended to be completely comprehensive, e.g. it is still possible to enter bad hostnames.
The goal is to make sure the syntax is correct enough for the rest of the parsing to succeed.
***********************************************************************************************************************************/
STRING_STATIC(
HTTP_URL_REGEXP_STR,
"^(http[s]{0,1}:\\/\\/){0,1}" // Optional protocol (http or https)
"([^\\[\\:\\/?]+|\\[[a-fA-F0-9:]+\\])" // host/ipv4/ipv6
"(:[1-9][0-9]{0,4}){0,1}" // Optional port
"(\\/[^?\\/]*)*$"); // Optional path
/***********************************************************************************************************************************
Object type
***********************************************************************************************************************************/
struct HttpUrl
{
HttpUrlPub pub;
MemContext *memContext; // Mem context
};
OBJECT_DEFINE_FREE(HTTP_URL);
/***********************************************************************************************************************************
Convert protocol type to a string
***********************************************************************************************************************************/
STRING_STATIC(HTTP_PROTOCOL_HTTP_STR, "http");
STRING_STATIC(HTTP_PROTOCOL_HTTPS_STR, "https");
static const String *
httpProtocolTypeStr(HttpProtocolType type)
{
switch (type)
{
case httpProtocolTypeHttp:
return HTTP_PROTOCOL_HTTP_STR;
case httpProtocolTypeHttps:
return HTTP_PROTOCOL_HTTPS_STR;
default:
return NULL;
}
}
/**********************************************************************************************************************************/
HttpUrl *
httpUrlNewParse(const String *const url, HttpUrlNewParseParam param)
{
FUNCTION_TEST_BEGIN();
FUNCTION_TEST_PARAM(STRING, url);
FUNCTION_TEST_PARAM(ENUM, param.type);
FUNCTION_TEST_END();
ASSERT(url != NULL);
HttpUrl *this = NULL;
MEM_CONTEXT_NEW_BEGIN("HttpUrl")
{
// Allocate state and set context
this = memNew(sizeof(HttpUrl));
*this = (HttpUrl)
{
.memContext = MEM_CONTEXT_NEW(),
.pub =
{
.url = strDup(url),
},
};
MEM_CONTEXT_TEMP_BEGIN()
{
// Check that URL format is one we accept
if (!regExpMatchOne(HTTP_URL_REGEXP_STR, url))
THROW_FMT(FormatError, "invalid URL '%s'", strZ(url));
// Determine whether the first part is protocol or host
StringList *splitUrl = strLstNewSplitZ(url, "/");
if (strEqZ(strLstGet(splitUrl, 0), "http:"))
this->pub.type = httpProtocolTypeHttp;
else if (strEqZ(strLstGet(splitUrl, 0), "https:"))
this->pub.type = httpProtocolTypeHttps;
// If no protocol found then the first part is the host
if (this->pub.type == httpProtocolTypeAny)
{
// Protocol must be set explicitly
ASSERT(param.type != httpProtocolTypeAny);
this->pub.type = param.type;
}
// Else protocol was found
else
{
// Protocol must match expected
if (param.type != httpProtocolTypeAny && this->pub.type != param.type)
THROW_FMT(FormatError, "expected protocol '%s' in URL '%s'", strZ(httpProtocolTypeStr(param.type)), strZ(url));
// Remove protocol parts from split
strLstRemoveIdx(splitUrl, 0);
strLstRemoveIdx(splitUrl, 0);
}
// Get host
const String *host = strLstGet(splitUrl, 0);
const String *port = NULL;
// If an IPv6 address
if (strBeginsWithZ(host, "["))
{
// Split closing bracket
StringList *splitHost = strLstNewSplitZ(host, "]");
ASSERT(strLstSize(splitHost) == 2);
// Remove opening bracket
host = strSub(strLstGet(splitHost, 0), 1);
// Get port if specified
if (strSize(strLstGet(splitHost, 1)) > 0)
port = strSub(strLstGet(splitHost, 1), 1);
}
// Else IPv4 or host name
else
{
// Split on colon
StringList *splitHost = strLstNewSplitZ(host, ":");
ASSERT(strLstSize(splitHost) != 0);
// First part is the host
host = strLstGet(splitHost, 0);
// Second part is the port, if it exists
if (strLstSize(splitHost) > 1)
{
ASSERT(strLstSize(splitHost) == 2);
port = strLstGet(splitHost, 1);
}
}
// Copy host into object context
MEM_CONTEXT_PRIOR_BEGIN()
{
this->pub.host = strDup(host);
}
MEM_CONTEXT_PRIOR_END();
// Get port if specified
if (port != NULL)
{
this->pub.port = cvtZToUInt(strZ(port));
}
// Else set default port based on the protocol
else
{
ASSERT(this->pub.type != httpProtocolTypeAny);
if (this->pub.type == httpProtocolTypeHttp)
this->pub.port = 80;
else
this->pub.port = 443;
}
// Check for path
if (strLstSize(splitUrl) > 1)
{
// Remove host part so it is easier to construct the path
strLstRemoveIdx(splitUrl, 0);
// Construct path and copy into local context
const String *path = strLstJoin(splitUrl, "/");
MEM_CONTEXT_PRIOR_BEGIN()
{
this->pub.path = strNewFmt("/%s", strZ(path));
}
MEM_CONTEXT_PRIOR_END();
}
// Else default path is /
else
this->pub.path = FSLASH_STR;
}
MEM_CONTEXT_TEMP_END();
}
MEM_CONTEXT_NEW_END();
FUNCTION_TEST_RETURN(this);
}
/**********************************************************************************************************************************/
String *
httpUrlToLog(const HttpUrl *this)
{
// Is IPv6 address?
bool ipv6 = strChr(this->pub.host, ':') != -1;
return strNewFmt(
"{%s://%s%s%s:%u%s}", strZ(httpProtocolTypeStr(this->pub.type)), ipv6 ? "[" : "", strZ(this->pub.host), ipv6 ? "]" : "",
this->pub.port, strZ(this->pub.path));
}

111
src/common/io/http/url.h Normal file
View File

@ -0,0 +1,111 @@
/***********************************************************************************************************************************
HTTP URL
Parse a URL into component parts.
***********************************************************************************************************************************/
#ifndef COMMON_IO_HTTP_URL_H
#define COMMON_IO_HTTP_URL_H
/***********************************************************************************************************************************
Object type
***********************************************************************************************************************************/
#define HTTP_URL_TYPE HttpUrl
#define HTTP_URL_PREFIX httpUrl
typedef struct HttpUrl HttpUrl;
#include "common/type/param.h"
#include "common/type/string.h"
/***********************************************************************************************************************************
HTTP protocol type
***********************************************************************************************************************************/
typedef enum
{
httpProtocolTypeAny = 0,
httpProtocolTypeHttp = 1,
httpProtocolTypeHttps = 2,
} HttpProtocolType;
/***********************************************************************************************************************************
Constructors
***********************************************************************************************************************************/
typedef struct HttpUrlNewParseParam
{
VAR_PARAM_HEADER;
HttpProtocolType type; // Expected protocol type (httpProtocolTypeAny if any)
} HttpUrlNewParseParam;
#define httpUrlNewParseP(url, ...) \
httpUrlNewParse(url, (HttpUrlNewParseParam){VAR_PARAM_INIT, __VA_ARGS__})
HttpUrl *httpUrlNewParse(const String *const url, HttpUrlNewParseParam param);
/***********************************************************************************************************************************
Getters/setters
***********************************************************************************************************************************/
typedef struct HttpUrlPub
{
const String *url; // Original URL
HttpProtocolType type; // Protocol type, e.g. http
const String *host; // Host
unsigned int port; // Port
const String *path; // Path
} HttpUrlPub;
// Protocol type
__attribute__((always_inline)) static inline HttpProtocolType
httpUrlProtocolType(const HttpUrl *this)
{
ASSERT_INLINE(this != NULL);
return ((const HttpUrlPub *)this)->type;
}
// Host
__attribute__((always_inline)) static inline const String *
httpUrlHost(const HttpUrl *this)
{
ASSERT_INLINE(this != NULL);
return ((const HttpUrlPub *)this)->host;
}
// Path
__attribute__((always_inline)) static inline const String *
httpUrlPath(const HttpUrl *this)
{
ASSERT_INLINE(this != NULL);
return ((const HttpUrlPub *)this)->path;
}
// Port
__attribute__((always_inline)) static inline unsigned int
httpUrlPort(const HttpUrl *this)
{
ASSERT_INLINE(this != NULL);
return ((const HttpUrlPub *)this)->port;
}
// URL (exactly as originally passed)
__attribute__((always_inline)) static inline const String *
httpUrl(const HttpUrl *this)
{
ASSERT_INLINE(this != NULL);
return ((const HttpUrlPub *)this)->url;
}
/***********************************************************************************************************************************
Destructor
***********************************************************************************************************************************/
void httpUrlFree(HttpUrl *this);
/***********************************************************************************************************************************
Macros for function logging
***********************************************************************************************************************************/
String *httpUrlToLog(const HttpUrl *this);
#define FUNCTION_LOG_HTTP_URL_TYPE \
HttpUrl *
#define FUNCTION_LOG_HTTP_URL_FORMAT(value, buffer, bufferSize) \
FUNCTION_LOG_STRING_OBJECT_FORMAT(value, httpUrlToLog, buffer, bufferSize)
#endif

View File

@ -304,7 +304,7 @@ unit:
# ----------------------------------------------------------------------------------------------------------------------------
- name: io-http
total: 5
total: 6
coverage:
- common/io/http/client
@ -314,6 +314,7 @@ unit:
- common/io/http/request
- common/io/http/response
- common/io/http/session
- common/io/http/url
# ----------------------------------------------------------------------------------------------------------------------------
- name: exec

View File

@ -173,6 +173,101 @@ testRun(void)
TEST_RESULT_VOID(httpQueryFree(query), "free");
}
// *****************************************************************************************************************************
if (testBegin("HttpUrl"))
{
HttpUrl *url = NULL;
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("invalid url");
TEST_ERROR(httpUrlNewParseP(STRDEF("ftp://" BOGUS_STR)), FormatError, "invalid URL 'ftp://BOGUS'");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("HttpProtocolTypeStr");
TEST_RESULT_STR_Z(httpProtocolTypeStr(httpProtocolTypeHttp), "http", "check http");
TEST_RESULT_STR_Z(httpProtocolTypeStr(httpProtocolTypeAny), NULL, "check any");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("simple http");
TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("http://test"), .type = httpProtocolTypeHttp), "new");
TEST_RESULT_STR_Z(httpUrl(url), "http://test", "check url");
TEST_RESULT_STR_Z(httpUrlHost(url), "test", "check host");
TEST_RESULT_STR_Z(httpUrlPath(url), "/", "check path");
TEST_RESULT_UINT(httpUrlPort(url), 80, "check port");
TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttp, "check protocol");
TEST_RESULT_STR_Z(httpUrlToLog(url), "{http://test:80/}", "check log");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("host and port");
TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("gcs:4443"), .type = httpProtocolTypeHttps), "new");
TEST_RESULT_STR_Z(httpUrl(url), "gcs:4443", "check url");
TEST_RESULT_STR_Z(httpUrlHost(url), "gcs", "check host");
TEST_RESULT_STR_Z(httpUrlPath(url), "/", "check path");
TEST_RESULT_UINT(httpUrlPort(url), 4443, "check port");
TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttps, "check protocol");
TEST_RESULT_STR_Z(httpUrlToLog(url), "{https://gcs:4443/}", "check log");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("http but expected https");
TEST_ERROR(
httpUrlNewParseP(STRDEF("http://test"), .type = httpProtocolTypeHttps), FormatError,
"expected protocol 'https' in URL 'http://test'");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("https with port and path");
TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("https://test.com:445/path")), "new");
TEST_RESULT_STR_Z(httpUrl(url), "https://test.com:445/path", "check url");
TEST_RESULT_STR_Z(httpUrlHost(url), "test.com", "check host");
TEST_RESULT_STR_Z(httpUrlPath(url), "/path", "check path");
TEST_RESULT_UINT(httpUrlPort(url), 445, "check port");
TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttps, "check protocol");
TEST_RESULT_STR_Z(httpUrlToLog(url), "{https://test.com:445/path}", "check log");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("host only");
TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("test.com"), .type = httpProtocolTypeHttps), "new");
TEST_RESULT_STR_Z(httpUrl(url), "test.com", "check url");
TEST_RESULT_STR_Z(httpUrlHost(url), "test.com", "check host");
TEST_RESULT_STR_Z(httpUrlPath(url), "/", "check path");
TEST_RESULT_UINT(httpUrlPort(url), 443, "check port");
TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttps, "check protocol");
TEST_RESULT_STR_Z(httpUrlToLog(url), "{https://test.com:443/}", "check log");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("IPv6");
TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("http://[2001:db8::ff00:42:8329]:81"), .type = httpProtocolTypeHttp), "new");
TEST_RESULT_STR_Z(httpUrl(url), "http://[2001:db8::ff00:42:8329]:81", "check url");
TEST_RESULT_STR_Z(httpUrlHost(url), "2001:db8::ff00:42:8329", "check host");
TEST_RESULT_STR_Z(httpUrlPath(url), "/", "check path");
TEST_RESULT_UINT(httpUrlPort(url), 81, "check port");
TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttp, "check protocol");
TEST_RESULT_STR_Z(httpUrlToLog(url), "{http://[2001:db8::ff00:42:8329]:81/}", "check log");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("IPv6 no port");
TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("http://[2001:db8::ff00:42:8329]/url"), .type = httpProtocolTypeHttp), "new");
TEST_RESULT_STR_Z(httpUrl(url), "http://[2001:db8::ff00:42:8329]/url", "check url");
TEST_RESULT_STR_Z(httpUrlHost(url), "2001:db8::ff00:42:8329", "check host");
TEST_RESULT_STR_Z(httpUrlPath(url), "/url", "check path");
TEST_RESULT_UINT(httpUrlPort(url), 80, "check port");
TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttp, "check protocol");
TEST_RESULT_STR_Z(httpUrlToLog(url), "{http://[2001:db8::ff00:42:8329]:80/url}", "check log");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("free");
TEST_RESULT_VOID(httpUrlFree(url), "free");
}
// *****************************************************************************************************************************
if (testBegin("HttpClient"))
{