1
0
mirror of https://github.com/httpie/cli.git synced 2025-06-19 00:27:52 +02:00

Improve handling of prettified responses without correct content-type encoding (#1110)

* Improve handling of responses without correct content-type charset

* [skip ci] Minor tweaks in tests

* [skip ci] Add documentation

Co-authored-by: claudiatd <claudiatd@gmail.com>

* Improve unknown encoding test

[skip ci]

* Review mime and options retrieval

* Add full content-type example in help output

* Simplify decoder

* [skip ci] s/charset/encoding/

* Tweaks

* [skip ci] Fix type annotation

* [skip ci] s/charset/encoding/

* Tweaks

* Fix type annoation

* Improvement

* Introduce `codec.encode()`

* [skip ci] Tweak changelog

Co-authored-by: claudiatd <claudiatd@gmail.com>
This commit is contained in:
Mickaël Schoentgen
2021-09-29 20:22:19 +02:00
committed by GitHub
parent b50f9aa7e7
commit 71adcd97d0
10 changed files with 184 additions and 26 deletions

View File

@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](https://semver.org/).
- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130)) - Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130))
- Added `--format-options=response.as:CONTENT_TYPE` to allow overriding the response `Content-Type`. ([#1134](https://github.com/httpie/httpie/issues/1134)) - Added `--format-options=response.as:CONTENT_TYPE` to allow overriding the response `Content-Type`. ([#1134](https://github.com/httpie/httpie/issues/1134))
- Added `--response-as` shortcut for setting the response `Content-Type`-related `--format-options`. ([#1134](https://github.com/httpie/httpie/issues/1134)) - Added `--response-as` shortcut for setting the response `Content-Type`-related `--format-options`. ([#1134](https://github.com/httpie/httpie/issues/1134))
- Improved handling of prettified responses without correct `Content-Type` encoding. ([#1110](https://github.com/httpie/httpie/issues/1110))
- Installed plugins are now listed in `--debug` output. ([#1165](https://github.com/httpie/httpie/issues/1165)) - Installed plugins are now listed in `--debug` output. ([#1165](https://github.com/httpie/httpie/issues/1165))
- Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163)) - Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163))

View File

@ -1249,6 +1249,18 @@ For example, the following request will force the response to be treated as XML:
``` ```
You will nearly instantly see something like this: You will nearly instantly see something like this:
```http
HTTP/1.1 200 OK
Content-Type: application/octet-stream
```
### Redirected output
HTTPie uses a different set of defaults for redirected output than for [terminal output](#terminal-output).
The differences being:
- Formatting and colors aren’t applied (unless `--pretty` is specified). - Formatting and colors aren’t applied (unless `--pretty` is specified).
- Only the response body is printed (unless one of the [output options](#output-options) is set). - Only the response body is printed (unless one of the [output options](#output-options) is set).
- Also, binary data isn’t suppressed. - Also, binary data isn’t suppressed.

View File

@ -316,6 +316,8 @@ output_processing.add_argument(
Override the response Content-Type for formatting purposes, e.g.: Override the response Content-Type for formatting purposes, e.g.:
--response-as=application/xml --response-as=application/xml
--response-as=charset=utf-8
--response-as='application/xml; charset=utf-8'
It is a shortcut for: It is a shortcut for:

37
httpie/codec.py Normal file
View File

@ -0,0 +1,37 @@
from typing import Union
from charset_normalizer import from_bytes
from .constants import UTF8
Bytes = Union[bytearray, bytes]
def detect_encoding(content: Bytes) -> str:
"""Detect the `content` encoding.
Fallback to UTF-8 when no suitable encoding found.
"""
match = from_bytes(bytes(content)).best()
return match.encoding if match else UTF8
def decode(content: Bytes, encoding: str) -> str:
"""Decode `content` using the given `encoding`.
If no `encoding` is provided, the best effort is to guess it from `content`.
Unicode errors are replaced.
"""
if not encoding:
encoding = detect_encoding(content)
return content.decode(encoding, 'replace')
def encode(content: str, encoding: str) -> bytes:
"""Encode `content` using the given `encoding`.
Unicode errors are replaced.
"""
return content.encode(encoding, 'replace')

View File

@ -30,11 +30,6 @@ class HTTPMessage(metaclass=ABCMeta):
def encoding(self) -> Optional[str]: def encoding(self) -> Optional[str]:
"""Return a `str` with the message's encoding, if known.""" """Return a `str` with the message's encoding, if known."""
@property
def body(self) -> bytes:
"""Return a `bytes` with the message's body."""
raise NotImplementedError()
@property @property
def content_type(self) -> str: def content_type(self) -> str:
"""Return the message content type.""" """Return the message content type."""
@ -86,12 +81,6 @@ class HTTPResponse(HTTPMessage):
def encoding(self): def encoding(self):
return self._orig.encoding or UTF8 return self._orig.encoding or UTF8
@property
def body(self):
# Only now the response body is fetched.
# Shouldn't be touched unless the body is actually needed.
return self._orig.content
class HTTPRequest(HTTPMessage): class HTTPRequest(HTTPMessage):
"""A :class:`requests.models.Request` wrapper.""" """A :class:`requests.models.Request` wrapper."""

View File

@ -25,7 +25,7 @@ def pretty_xml(document: 'Document',
} }
if standalone is not None and sys.version_info >= (3, 9): if standalone is not None and sys.version_info >= (3, 9):
kwargs['standalone'] = standalone kwargs['standalone'] = standalone
body = document.toprettyxml(**kwargs).decode() body = document.toprettyxml(**kwargs).decode(kwargs['encoding'])
# Remove blank lines automatically added by `toprettyxml()`. # Remove blank lines automatically added by `toprettyxml()`.
return '\n'.join(line for line in body.splitlines() if line.strip()) return '\n'.join(line for line in body.splitlines() if line.strip())

View File

@ -1,7 +1,8 @@
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from itertools import chain from itertools import chain
from typing import Callable, Iterable, Union from typing import Any, Callable, Dict, Iterable, Tuple, Union
from .. import codec
from ..cli.constants import EMPTY_FORMAT_OPTION from ..cli.constants import EMPTY_FORMAT_OPTION
from ..context import Environment from ..context import Environment
from ..constants import UTF8 from ..constants import UTF8
@ -114,8 +115,8 @@ class EncodedStream(BaseStream):
for line, lf in self.msg.iter_lines(self.CHUNK_SIZE): for line, lf in self.msg.iter_lines(self.CHUNK_SIZE):
if b'\0' in line: if b'\0' in line:
raise BinarySuppressedError() raise BinarySuppressedError()
yield line.decode(self.msg.encoding) \ line = codec.decode(line, self.msg.encoding)
.encode(self.output_encoding, 'replace') + lf yield codec.encode(line, self.output_encoding) + lf
class PrettyStream(EncodedStream): class PrettyStream(EncodedStream):
@ -137,15 +138,23 @@ class PrettyStream(EncodedStream):
super().__init__(**kwargs) super().__init__(**kwargs)
self.formatting = formatting self.formatting = formatting
self.conversion = conversion self.conversion = conversion
self.mime = self.get_mime() self.mime, mime_options = self._get_mime_and_options()
self.encoding = mime_options.get('charset') or ''
def get_mime(self) -> str: def _get_mime_and_options(self) -> Tuple[str, Dict[str, Any]]:
mime = parse_header_content_type(self.msg.content_type)[0] # Defaults from the `Content-Type` header.
if isinstance(self.msg, HTTPResponse): mime, options = parse_header_content_type(self.msg.content_type)
if not isinstance(self.msg, HTTPResponse):
return mime, options
# Override from the `--response-as` option.
forced_content_type = self.formatting.options['response']['as'] forced_content_type = self.formatting.options['response']['as']
if forced_content_type != EMPTY_FORMAT_OPTION: if forced_content_type == EMPTY_FORMAT_OPTION:
mime = parse_header_content_type(forced_content_type)[0] or mime return mime, options
return mime
forced_mime, forced_options = parse_header_content_type(forced_content_type)
return (forced_mime or mime, forced_options or options)
def get_headers(self) -> bytes: def get_headers(self) -> bytes:
return self.formatting.format_headers( return self.formatting.format_headers(
@ -176,9 +185,9 @@ class PrettyStream(EncodedStream):
if not isinstance(chunk, str): if not isinstance(chunk, str):
# Text when a converter has been used, # Text when a converter has been used,
# otherwise it will always be bytes. # otherwise it will always be bytes.
chunk = chunk.decode(self.msg.encoding, 'replace') chunk = codec.decode(chunk, self.encoding)
chunk = self.formatting.format_body(content=chunk, mime=self.mime) chunk = self.formatting.format_body(content=chunk, mime=self.mime)
return chunk.encode(self.output_encoding, 'replace') return codec.encode(chunk, self.output_encoding)
class BufferedPrettyStream(PrettyStream): class BufferedPrettyStream(PrettyStream):

View File

@ -25,6 +25,7 @@ dev_require = [
'wheel', 'wheel',
] ]
install_requires = [ install_requires = [
'charset_normalizer>=2.0.0',
'defusedxml>=0.6.0', 'defusedxml>=0.6.0',
'requests[socks]>=2.22.0', 'requests[socks]>=2.22.0',
'Pygments>=2.5.2', 'Pygments>=2.5.2',

View File

@ -39,3 +39,10 @@ def test_max_headers_limit(httpbin_both):
def test_max_headers_no_limit(httpbin_both): def test_max_headers_no_limit(httpbin_both):
assert HTTP_OK in http('--max-headers=0', httpbin_both + '/get') assert HTTP_OK in http('--max-headers=0', httpbin_both + '/get')
def test_charset_argument_unknown_encoding(httpbin_both):
with raises(LookupError) as e:
http('--response-as', 'charset=foobar',
'GET', httpbin_both + '/get')
assert 'unknown encoding: foobar' in str(e.value)

View File

@ -2,9 +2,17 @@
Various unicode handling related tests. Various unicode handling related tests.
""" """
from .utils import http, HTTP_OK import pytest
import responses
from httpie.cli.constants import PRETTY_MAP
from httpie.constants import UTF8
from .utils import http, HTTP_OK, URL_EXAMPLE
from .fixtures import UNICODE from .fixtures import UNICODE
ENCODINGS = [UTF8, 'windows-1250']
def test_unicode_headers(httpbin): def test_unicode_headers(httpbin):
# httpbin doesn't interpret UFT-8 headers # httpbin doesn't interpret UFT-8 headers
@ -109,3 +117,95 @@ def test_unicode_digest_auth(httpbin):
http('--auth-type=digest', http('--auth-type=digest',
'--auth', f'test:{UNICODE}', '--auth', f'test:{UNICODE}',
f'{httpbin.url}/digest-auth/auth/test/{UNICODE}') f'{httpbin.url}/digest-auth/auth/test/{UNICODE}')
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_GET_encoding_detection_from_content_type_header(encoding):
responses.add(responses.GET,
URL_EXAMPLE,
body='<?xml version="1.0"?>\n<c>Financiën</c>'.encode(encoding),
content_type=f'text/xml; charset={encoding.upper()}')
r = http('GET', URL_EXAMPLE)
assert 'Financiën' in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_GET_encoding_detection_from_content(encoding):
body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>Financiën</c>'
responses.add(responses.GET,
URL_EXAMPLE,
body=body.encode(encoding),
content_type='text/xml')
r = http('GET', URL_EXAMPLE)
assert 'Financiën' in r
@responses.activate
def test_GET_encoding_provided_by_format_options():
responses.add(responses.GET,
URL_EXAMPLE,
body='▒▒▒'.encode('johab'),
content_type='text/plain')
r = http('--format-options', 'response.as:text/plain; charset=johab',
'GET', URL_EXAMPLE)
assert '▒▒▒' in r
@responses.activate
def test_GET_encoding_provided_by_shortcut_option():
responses.add(responses.GET,
URL_EXAMPLE,
body='▒▒▒'.encode('johab'),
content_type='text/plain')
r = http('--response-as', 'text/plain; charset=johab',
'GET', URL_EXAMPLE)
assert '▒▒▒' in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_GET_encoding_provided_by_empty_shortcut_option_should_use_content_detection(encoding):
body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>Financiën</c>'
responses.add(responses.GET,
URL_EXAMPLE,
body=body.encode(encoding),
content_type='text/xml')
r = http('--response-as', '', 'GET', URL_EXAMPLE)
assert 'Financiën' in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_POST_encoding_detection_from_content_type_header(encoding):
responses.add(responses.POST,
URL_EXAMPLE,
body='Všichni lidé jsou si rovni.'.encode(encoding),
content_type=f'text/plain; charset={encoding.upper()}')
r = http('--form', 'POST', URL_EXAMPLE)
assert 'Všichni lidé jsou si rovni.' in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@responses.activate
def test_POST_encoding_detection_from_content(encoding):
responses.add(responses.POST,
URL_EXAMPLE,
body='Všichni lidé jsou si rovni.'.encode(encoding),
content_type='text/plain')
r = http('--form', 'POST', URL_EXAMPLE)
assert 'Všichni lidé jsou si rovni.' in r
@pytest.mark.parametrize('encoding', ENCODINGS)
@pytest.mark.parametrize('pretty', PRETTY_MAP.keys())
@responses.activate
def test_stream_encoding_detection_from_content_type_header(encoding, pretty):
responses.add(responses.GET,
URL_EXAMPLE,
body='<?xml version="1.0"?>\n<c>Financiën</c>'.encode(encoding),
stream=True,
content_type=f'text/xml; charset={encoding.upper()}')
r = http('--pretty=' + pretty, '--stream', 'GET', URL_EXAMPLE)
assert 'Financiën' in r