From e64999db7744cad55117f582c2cd42fb4b836f6f Mon Sep 17 00:00:00 2001 From: David Steele Date: Mon, 1 Mar 2021 13:44:47 -0500 Subject: [PATCH] Add HttpUrl object. Parse a URL into component parts. --- doc/xml/release.xml | 8 ++ src/Makefile.in | 1 + src/common/io/http/url.c | 213 ++++++++++++++++++++++++++++ src/common/io/http/url.h | 111 +++++++++++++++ test/define.yaml | 3 +- test/src/module/common/ioHttpTest.c | 95 +++++++++++++ 6 files changed, 430 insertions(+), 1 deletion(-) create mode 100644 src/common/io/http/url.c create mode 100644 src/common/io/http/url.h diff --git a/doc/xml/release.xml b/doc/xml/release.xml index 25e8d523c..5a8347e42 100644 --- a/doc/xml/release.xml +++ b/doc/xml/release.xml @@ -66,6 +66,14 @@

Partial multi-repository implementation.

+ + + + + + +

Add HttpUrl object.

+
diff --git a/src/Makefile.in b/src/Makefile.in index 1145f0063..afa5af5e7 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -84,6 +84,7 @@ SRCS = \ common/io/http/request.c \ common/io/http/response.c \ common/io/http/session.c \ + common/io/http/url.c \ common/io/io.c \ common/io/read.c \ common/io/session.c \ diff --git a/src/common/io/http/url.c b/src/common/io/http/url.c new file mode 100644 index 000000000..7e468be5c --- /dev/null +++ b/src/common/io/http/url.c @@ -0,0 +1,213 @@ +/*********************************************************************************************************************************** +HTTP URL +***********************************************************************************************************************************/ +#include "build.auto.h" + +#include "common/debug.h" +#include "common/io/http/url.h" +#include "common/memContext.h" +#include "common/type/object.h" +#include "common/type/stringList.h" +#include "common/regExp.h" + +/*********************************************************************************************************************************** +Regular expression for URLs. This is not intended to be completely comprehensive, e.g. it is still possible to enter bad hostnames. +The goal is to make sure the syntax is correct enough for the rest of the parsing to succeed. +***********************************************************************************************************************************/ +STRING_STATIC( + HTTP_URL_REGEXP_STR, + "^(http[s]{0,1}:\\/\\/){0,1}" // Optional protocol (http or https) + "([^\\[\\:\\/?]+|\\[[a-fA-F0-9:]+\\])" // host/ipv4/ipv6 + "(:[1-9][0-9]{0,4}){0,1}" // Optional port + "(\\/[^?\\/]*)*$"); // Optional path + +/*********************************************************************************************************************************** +Object type +***********************************************************************************************************************************/ +struct HttpUrl +{ + HttpUrlPub pub; + MemContext *memContext; // Mem context +}; + +OBJECT_DEFINE_FREE(HTTP_URL); + +/*********************************************************************************************************************************** +Convert protocol type to a string +***********************************************************************************************************************************/ +STRING_STATIC(HTTP_PROTOCOL_HTTP_STR, "http"); +STRING_STATIC(HTTP_PROTOCOL_HTTPS_STR, "https"); + +static const String * +httpProtocolTypeStr(HttpProtocolType type) +{ + switch (type) + { + case httpProtocolTypeHttp: + return HTTP_PROTOCOL_HTTP_STR; + + case httpProtocolTypeHttps: + return HTTP_PROTOCOL_HTTPS_STR; + + default: + return NULL; + } +} + +/**********************************************************************************************************************************/ +HttpUrl * +httpUrlNewParse(const String *const url, HttpUrlNewParseParam param) +{ + FUNCTION_TEST_BEGIN(); + FUNCTION_TEST_PARAM(STRING, url); + FUNCTION_TEST_PARAM(ENUM, param.type); + FUNCTION_TEST_END(); + + ASSERT(url != NULL); + + HttpUrl *this = NULL; + + MEM_CONTEXT_NEW_BEGIN("HttpUrl") + { + // Allocate state and set context + this = memNew(sizeof(HttpUrl)); + + *this = (HttpUrl) + { + .memContext = MEM_CONTEXT_NEW(), + .pub = + { + .url = strDup(url), + }, + }; + + MEM_CONTEXT_TEMP_BEGIN() + { + // Check that URL format is one we accept + if (!regExpMatchOne(HTTP_URL_REGEXP_STR, url)) + THROW_FMT(FormatError, "invalid URL '%s'", strZ(url)); + + // Determine whether the first part is protocol or host + StringList *splitUrl = strLstNewSplitZ(url, "/"); + + if (strEqZ(strLstGet(splitUrl, 0), "http:")) + this->pub.type = httpProtocolTypeHttp; + else if (strEqZ(strLstGet(splitUrl, 0), "https:")) + this->pub.type = httpProtocolTypeHttps; + + // If no protocol found then the first part is the host + if (this->pub.type == httpProtocolTypeAny) + { + // Protocol must be set explicitly + ASSERT(param.type != httpProtocolTypeAny); + + this->pub.type = param.type; + } + // Else protocol was found + else + { + // Protocol must match expected + if (param.type != httpProtocolTypeAny && this->pub.type != param.type) + THROW_FMT(FormatError, "expected protocol '%s' in URL '%s'", strZ(httpProtocolTypeStr(param.type)), strZ(url)); + + // Remove protocol parts from split + strLstRemoveIdx(splitUrl, 0); + strLstRemoveIdx(splitUrl, 0); + } + + // Get host + const String *host = strLstGet(splitUrl, 0); + const String *port = NULL; + + // If an IPv6 address + if (strBeginsWithZ(host, "[")) + { + // Split closing bracket + StringList *splitHost = strLstNewSplitZ(host, "]"); + ASSERT(strLstSize(splitHost) == 2); + + // Remove opening bracket + host = strSub(strLstGet(splitHost, 0), 1); + + // Get port if specified + if (strSize(strLstGet(splitHost, 1)) > 0) + port = strSub(strLstGet(splitHost, 1), 1); + } + // Else IPv4 or host name + else + { + // Split on colon + StringList *splitHost = strLstNewSplitZ(host, ":"); + ASSERT(strLstSize(splitHost) != 0); + + // First part is the host + host = strLstGet(splitHost, 0); + + // Second part is the port, if it exists + if (strLstSize(splitHost) > 1) + { + ASSERT(strLstSize(splitHost) == 2); + port = strLstGet(splitHost, 1); + } + } + + // Copy host into object context + MEM_CONTEXT_PRIOR_BEGIN() + { + this->pub.host = strDup(host); + } + MEM_CONTEXT_PRIOR_END(); + + // Get port if specified + if (port != NULL) + { + this->pub.port = cvtZToUInt(strZ(port)); + } + // Else set default port based on the protocol + else + { + ASSERT(this->pub.type != httpProtocolTypeAny); + + if (this->pub.type == httpProtocolTypeHttp) + this->pub.port = 80; + else + this->pub.port = 443; + } + + // Check for path + if (strLstSize(splitUrl) > 1) + { + // Remove host part so it is easier to construct the path + strLstRemoveIdx(splitUrl, 0); + + // Construct path and copy into local context + const String *path = strLstJoin(splitUrl, "/"); + + MEM_CONTEXT_PRIOR_BEGIN() + { + this->pub.path = strNewFmt("/%s", strZ(path)); + } + MEM_CONTEXT_PRIOR_END(); + } + // Else default path is / + else + this->pub.path = FSLASH_STR; + } + MEM_CONTEXT_TEMP_END(); + } + MEM_CONTEXT_NEW_END(); + + FUNCTION_TEST_RETURN(this); +} + +/**********************************************************************************************************************************/ +String * +httpUrlToLog(const HttpUrl *this) +{ + // Is IPv6 address? + bool ipv6 = strChr(this->pub.host, ':') != -1; + + return strNewFmt( + "{%s://%s%s%s:%u%s}", strZ(httpProtocolTypeStr(this->pub.type)), ipv6 ? "[" : "", strZ(this->pub.host), ipv6 ? "]" : "", + this->pub.port, strZ(this->pub.path)); +} diff --git a/src/common/io/http/url.h b/src/common/io/http/url.h new file mode 100644 index 000000000..d2deb6833 --- /dev/null +++ b/src/common/io/http/url.h @@ -0,0 +1,111 @@ +/*********************************************************************************************************************************** +HTTP URL + +Parse a URL into component parts. +***********************************************************************************************************************************/ +#ifndef COMMON_IO_HTTP_URL_H +#define COMMON_IO_HTTP_URL_H + +/*********************************************************************************************************************************** +Object type +***********************************************************************************************************************************/ +#define HTTP_URL_TYPE HttpUrl +#define HTTP_URL_PREFIX httpUrl + +typedef struct HttpUrl HttpUrl; + +#include "common/type/param.h" +#include "common/type/string.h" + +/*********************************************************************************************************************************** +HTTP protocol type +***********************************************************************************************************************************/ +typedef enum +{ + httpProtocolTypeAny = 0, + httpProtocolTypeHttp = 1, + httpProtocolTypeHttps = 2, +} HttpProtocolType; + +/*********************************************************************************************************************************** +Constructors +***********************************************************************************************************************************/ +typedef struct HttpUrlNewParseParam +{ + VAR_PARAM_HEADER; + HttpProtocolType type; // Expected protocol type (httpProtocolTypeAny if any) +} HttpUrlNewParseParam; + +#define httpUrlNewParseP(url, ...) \ + httpUrlNewParse(url, (HttpUrlNewParseParam){VAR_PARAM_INIT, __VA_ARGS__}) + +HttpUrl *httpUrlNewParse(const String *const url, HttpUrlNewParseParam param); + +/*********************************************************************************************************************************** +Getters/setters +***********************************************************************************************************************************/ +typedef struct HttpUrlPub +{ + const String *url; // Original URL + HttpProtocolType type; // Protocol type, e.g. http + const String *host; // Host + unsigned int port; // Port + const String *path; // Path +} HttpUrlPub; + +// Protocol type +__attribute__((always_inline)) static inline HttpProtocolType +httpUrlProtocolType(const HttpUrl *this) +{ + ASSERT_INLINE(this != NULL); + return ((const HttpUrlPub *)this)->type; +} + +// Host +__attribute__((always_inline)) static inline const String * +httpUrlHost(const HttpUrl *this) +{ + ASSERT_INLINE(this != NULL); + return ((const HttpUrlPub *)this)->host; +} + +// Path +__attribute__((always_inline)) static inline const String * +httpUrlPath(const HttpUrl *this) +{ + ASSERT_INLINE(this != NULL); + return ((const HttpUrlPub *)this)->path; +} + +// Port +__attribute__((always_inline)) static inline unsigned int +httpUrlPort(const HttpUrl *this) +{ + ASSERT_INLINE(this != NULL); + return ((const HttpUrlPub *)this)->port; +} + +// URL (exactly as originally passed) +__attribute__((always_inline)) static inline const String * +httpUrl(const HttpUrl *this) +{ + ASSERT_INLINE(this != NULL); + return ((const HttpUrlPub *)this)->url; +} + +/*********************************************************************************************************************************** +Destructor +***********************************************************************************************************************************/ +void httpUrlFree(HttpUrl *this); + +/*********************************************************************************************************************************** +Macros for function logging +***********************************************************************************************************************************/ +String *httpUrlToLog(const HttpUrl *this); + +#define FUNCTION_LOG_HTTP_URL_TYPE \ + HttpUrl * +#define FUNCTION_LOG_HTTP_URL_FORMAT(value, buffer, bufferSize) \ + FUNCTION_LOG_STRING_OBJECT_FORMAT(value, httpUrlToLog, buffer, bufferSize) + +#endif diff --git a/test/define.yaml b/test/define.yaml index 86d35ca5e..14b5eb6bc 100644 --- a/test/define.yaml +++ b/test/define.yaml @@ -304,7 +304,7 @@ unit: # ---------------------------------------------------------------------------------------------------------------------------- - name: io-http - total: 5 + total: 6 coverage: - common/io/http/client @@ -314,6 +314,7 @@ unit: - common/io/http/request - common/io/http/response - common/io/http/session + - common/io/http/url # ---------------------------------------------------------------------------------------------------------------------------- - name: exec diff --git a/test/src/module/common/ioHttpTest.c b/test/src/module/common/ioHttpTest.c index 3d8080606..f0d3d0a0c 100644 --- a/test/src/module/common/ioHttpTest.c +++ b/test/src/module/common/ioHttpTest.c @@ -173,6 +173,101 @@ testRun(void) TEST_RESULT_VOID(httpQueryFree(query), "free"); } + // ***************************************************************************************************************************** + if (testBegin("HttpUrl")) + { + HttpUrl *url = NULL; + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("invalid url"); + + TEST_ERROR(httpUrlNewParseP(STRDEF("ftp://" BOGUS_STR)), FormatError, "invalid URL 'ftp://BOGUS'"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("HttpProtocolTypeStr"); + + TEST_RESULT_STR_Z(httpProtocolTypeStr(httpProtocolTypeHttp), "http", "check http"); + TEST_RESULT_STR_Z(httpProtocolTypeStr(httpProtocolTypeAny), NULL, "check any"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("simple http"); + + TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("http://test"), .type = httpProtocolTypeHttp), "new"); + TEST_RESULT_STR_Z(httpUrl(url), "http://test", "check url"); + TEST_RESULT_STR_Z(httpUrlHost(url), "test", "check host"); + TEST_RESULT_STR_Z(httpUrlPath(url), "/", "check path"); + TEST_RESULT_UINT(httpUrlPort(url), 80, "check port"); + TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttp, "check protocol"); + TEST_RESULT_STR_Z(httpUrlToLog(url), "{http://test:80/}", "check log"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("host and port"); + + TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("gcs:4443"), .type = httpProtocolTypeHttps), "new"); + TEST_RESULT_STR_Z(httpUrl(url), "gcs:4443", "check url"); + TEST_RESULT_STR_Z(httpUrlHost(url), "gcs", "check host"); + TEST_RESULT_STR_Z(httpUrlPath(url), "/", "check path"); + TEST_RESULT_UINT(httpUrlPort(url), 4443, "check port"); + TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttps, "check protocol"); + TEST_RESULT_STR_Z(httpUrlToLog(url), "{https://gcs:4443/}", "check log"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("http but expected https"); + + TEST_ERROR( + httpUrlNewParseP(STRDEF("http://test"), .type = httpProtocolTypeHttps), FormatError, + "expected protocol 'https' in URL 'http://test'"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("https with port and path"); + + TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("https://test.com:445/path")), "new"); + TEST_RESULT_STR_Z(httpUrl(url), "https://test.com:445/path", "check url"); + TEST_RESULT_STR_Z(httpUrlHost(url), "test.com", "check host"); + TEST_RESULT_STR_Z(httpUrlPath(url), "/path", "check path"); + TEST_RESULT_UINT(httpUrlPort(url), 445, "check port"); + TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttps, "check protocol"); + TEST_RESULT_STR_Z(httpUrlToLog(url), "{https://test.com:445/path}", "check log"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("host only"); + + TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("test.com"), .type = httpProtocolTypeHttps), "new"); + TEST_RESULT_STR_Z(httpUrl(url), "test.com", "check url"); + TEST_RESULT_STR_Z(httpUrlHost(url), "test.com", "check host"); + TEST_RESULT_STR_Z(httpUrlPath(url), "/", "check path"); + TEST_RESULT_UINT(httpUrlPort(url), 443, "check port"); + TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttps, "check protocol"); + TEST_RESULT_STR_Z(httpUrlToLog(url), "{https://test.com:443/}", "check log"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("IPv6"); + + TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("http://[2001:db8::ff00:42:8329]:81"), .type = httpProtocolTypeHttp), "new"); + TEST_RESULT_STR_Z(httpUrl(url), "http://[2001:db8::ff00:42:8329]:81", "check url"); + TEST_RESULT_STR_Z(httpUrlHost(url), "2001:db8::ff00:42:8329", "check host"); + TEST_RESULT_STR_Z(httpUrlPath(url), "/", "check path"); + TEST_RESULT_UINT(httpUrlPort(url), 81, "check port"); + TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttp, "check protocol"); + TEST_RESULT_STR_Z(httpUrlToLog(url), "{http://[2001:db8::ff00:42:8329]:81/}", "check log"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("IPv6 no port"); + + TEST_ASSIGN(url, httpUrlNewParseP(STRDEF("http://[2001:db8::ff00:42:8329]/url"), .type = httpProtocolTypeHttp), "new"); + TEST_RESULT_STR_Z(httpUrl(url), "http://[2001:db8::ff00:42:8329]/url", "check url"); + TEST_RESULT_STR_Z(httpUrlHost(url), "2001:db8::ff00:42:8329", "check host"); + TEST_RESULT_STR_Z(httpUrlPath(url), "/url", "check path"); + TEST_RESULT_UINT(httpUrlPort(url), 80, "check port"); + TEST_RESULT_UINT(httpUrlProtocolType(url), httpProtocolTypeHttp, "check protocol"); + TEST_RESULT_STR_Z(httpUrlToLog(url), "{http://[2001:db8::ff00:42:8329]:80/url}", "check log"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("free"); + + TEST_RESULT_VOID(httpUrlFree(url), "free"); + } + // ***************************************************************************************************************************** if (testBegin("HttpClient")) {