You've already forked httpie-cli
							
							
				mirror of
				https://github.com/httpie/cli.git
				synced 2025-10-30 23:47:52 +02:00 
			
		
		
		
	Improve handling of prettified responses without correct content-type encoding (#1110)
* Improve handling of responses without correct content-type charset * [skip ci] Minor tweaks in tests * [skip ci] Add documentation Co-authored-by: claudiatd <claudiatd@gmail.com> * Improve unknown encoding test [skip ci] * Review mime and options retrieval * Add full content-type example in help output * Simplify decoder * [skip ci] s/charset/encoding/ * Tweaks * [skip ci] Fix type annotation * [skip ci] s/charset/encoding/ * Tweaks * Fix type annoation * Improvement * Introduce `codec.encode()` * [skip ci] Tweak changelog Co-authored-by: claudiatd <claudiatd@gmail.com>
This commit is contained in:
		
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							b50f9aa7e7
						
					
				
				
					commit
					71adcd97d0
				
			| @@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](https://semver.org/). | |||||||
| - Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130)) | - Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130)) | ||||||
| - Added `--format-options=response.as:CONTENT_TYPE` to allow overriding the response `Content-Type`. ([#1134](https://github.com/httpie/httpie/issues/1134)) | - Added `--format-options=response.as:CONTENT_TYPE` to allow overriding the response `Content-Type`. ([#1134](https://github.com/httpie/httpie/issues/1134)) | ||||||
| - Added `--response-as` shortcut for setting the response `Content-Type`-related `--format-options`. ([#1134](https://github.com/httpie/httpie/issues/1134)) | - Added `--response-as` shortcut for setting the response `Content-Type`-related `--format-options`. ([#1134](https://github.com/httpie/httpie/issues/1134)) | ||||||
|  | - Improved handling of prettified responses without correct `Content-Type` encoding. ([#1110](https://github.com/httpie/httpie/issues/1110)) | ||||||
| - Installed plugins are now listed in `--debug` output. ([#1165](https://github.com/httpie/httpie/issues/1165)) | - Installed plugins are now listed in `--debug` output. ([#1165](https://github.com/httpie/httpie/issues/1165)) | ||||||
| - Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163)) | - Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163)) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1249,6 +1249,18 @@ For example, the following request will force the response to be treated as XML: | |||||||
| ``` | ``` | ||||||
|  |  | ||||||
| You will nearly instantly see something like this: | You will nearly instantly see something like this: | ||||||
|  |  | ||||||
|  | ```http | ||||||
|  | HTTP/1.1 200 OK | ||||||
|  | Content-Type: application/octet-stream | ||||||
|  |  | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | ### Redirected output | ||||||
|  |  | ||||||
|  | HTTPie uses a different set of defaults for redirected output than for [terminal output](#terminal-output). | ||||||
|  | The differences being: | ||||||
|  |  | ||||||
| - Formatting and colors aren’t applied (unless `--pretty` is specified). | - Formatting and colors aren’t applied (unless `--pretty` is specified). | ||||||
| - Only the response body is printed (unless one of the [output options](#output-options) is set). | - Only the response body is printed (unless one of the [output options](#output-options) is set). | ||||||
| - Also, binary data isn’t suppressed. | - Also, binary data isn’t suppressed. | ||||||
|   | |||||||
| @@ -316,6 +316,8 @@ output_processing.add_argument( | |||||||
|     Override the response Content-Type for formatting purposes, e.g.: |     Override the response Content-Type for formatting purposes, e.g.: | ||||||
|  |  | ||||||
|         --response-as=application/xml |         --response-as=application/xml | ||||||
|  |         --response-as=charset=utf-8 | ||||||
|  |         --response-as='application/xml; charset=utf-8' | ||||||
|  |  | ||||||
|     It is a shortcut for: |     It is a shortcut for: | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										37
									
								
								httpie/codec.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								httpie/codec.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | |||||||
|  | from typing import Union | ||||||
|  |  | ||||||
|  | from charset_normalizer import from_bytes | ||||||
|  |  | ||||||
|  | from .constants import UTF8 | ||||||
|  |  | ||||||
|  | Bytes = Union[bytearray, bytes] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def detect_encoding(content: Bytes) -> str: | ||||||
|  |     """Detect the `content` encoding. | ||||||
|  |     Fallback to UTF-8 when no suitable encoding found. | ||||||
|  |  | ||||||
|  |     """ | ||||||
|  |     match = from_bytes(bytes(content)).best() | ||||||
|  |     return match.encoding if match else UTF8 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def decode(content: Bytes, encoding: str) -> str: | ||||||
|  |     """Decode `content` using the given `encoding`. | ||||||
|  |     If no `encoding` is provided, the best effort is to guess it from `content`. | ||||||
|  |  | ||||||
|  |     Unicode errors are replaced. | ||||||
|  |  | ||||||
|  |     """ | ||||||
|  |     if not encoding: | ||||||
|  |         encoding = detect_encoding(content) | ||||||
|  |     return content.decode(encoding, 'replace') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def encode(content: str, encoding: str) -> bytes: | ||||||
|  |     """Encode `content` using the given `encoding`. | ||||||
|  |  | ||||||
|  |     Unicode errors are replaced. | ||||||
|  |  | ||||||
|  |     """ | ||||||
|  |     return content.encode(encoding, 'replace') | ||||||
| @@ -30,11 +30,6 @@ class HTTPMessage(metaclass=ABCMeta): | |||||||
|     def encoding(self) -> Optional[str]: |     def encoding(self) -> Optional[str]: | ||||||
|         """Return a `str` with the message's encoding, if known.""" |         """Return a `str` with the message's encoding, if known.""" | ||||||
|  |  | ||||||
|     @property |  | ||||||
|     def body(self) -> bytes: |  | ||||||
|         """Return a `bytes` with the message's body.""" |  | ||||||
|         raise NotImplementedError() |  | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def content_type(self) -> str: |     def content_type(self) -> str: | ||||||
|         """Return the message content type.""" |         """Return the message content type.""" | ||||||
| @@ -86,12 +81,6 @@ class HTTPResponse(HTTPMessage): | |||||||
|     def encoding(self): |     def encoding(self): | ||||||
|         return self._orig.encoding or UTF8 |         return self._orig.encoding or UTF8 | ||||||
|  |  | ||||||
|     @property |  | ||||||
|     def body(self): |  | ||||||
|         # Only now the response body is fetched. |  | ||||||
|         # Shouldn't be touched unless the body is actually needed. |  | ||||||
|         return self._orig.content |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class HTTPRequest(HTTPMessage): | class HTTPRequest(HTTPMessage): | ||||||
|     """A :class:`requests.models.Request` wrapper.""" |     """A :class:`requests.models.Request` wrapper.""" | ||||||
|   | |||||||
| @@ -25,7 +25,7 @@ def pretty_xml(document: 'Document', | |||||||
|     } |     } | ||||||
|     if standalone is not None and sys.version_info >= (3, 9): |     if standalone is not None and sys.version_info >= (3, 9): | ||||||
|         kwargs['standalone'] = standalone |         kwargs['standalone'] = standalone | ||||||
|     body = document.toprettyxml(**kwargs).decode() |     body = document.toprettyxml(**kwargs).decode(kwargs['encoding']) | ||||||
|  |  | ||||||
|     # Remove blank lines automatically added by `toprettyxml()`. |     # Remove blank lines automatically added by `toprettyxml()`. | ||||||
|     return '\n'.join(line for line in body.splitlines() if line.strip()) |     return '\n'.join(line for line in body.splitlines() if line.strip()) | ||||||
|   | |||||||
| @@ -1,7 +1,8 @@ | |||||||
| from abc import ABCMeta, abstractmethod | from abc import ABCMeta, abstractmethod | ||||||
| from itertools import chain | from itertools import chain | ||||||
| from typing import Callable, Iterable, Union | from typing import Any, Callable, Dict, Iterable, Tuple, Union | ||||||
|  |  | ||||||
|  | from .. import codec | ||||||
| from ..cli.constants import EMPTY_FORMAT_OPTION | from ..cli.constants import EMPTY_FORMAT_OPTION | ||||||
| from ..context import Environment | from ..context import Environment | ||||||
| from ..constants import UTF8 | from ..constants import UTF8 | ||||||
| @@ -114,8 +115,8 @@ class EncodedStream(BaseStream): | |||||||
|         for line, lf in self.msg.iter_lines(self.CHUNK_SIZE): |         for line, lf in self.msg.iter_lines(self.CHUNK_SIZE): | ||||||
|             if b'\0' in line: |             if b'\0' in line: | ||||||
|                 raise BinarySuppressedError() |                 raise BinarySuppressedError() | ||||||
|             yield line.decode(self.msg.encoding) \ |             line = codec.decode(line, self.msg.encoding) | ||||||
|                       .encode(self.output_encoding, 'replace') + lf |             yield codec.encode(line, self.output_encoding) + lf | ||||||
|  |  | ||||||
|  |  | ||||||
| class PrettyStream(EncodedStream): | class PrettyStream(EncodedStream): | ||||||
| @@ -137,15 +138,23 @@ class PrettyStream(EncodedStream): | |||||||
|         super().__init__(**kwargs) |         super().__init__(**kwargs) | ||||||
|         self.formatting = formatting |         self.formatting = formatting | ||||||
|         self.conversion = conversion |         self.conversion = conversion | ||||||
|         self.mime = self.get_mime() |         self.mime, mime_options = self._get_mime_and_options() | ||||||
|  |         self.encoding = mime_options.get('charset') or '' | ||||||
|  |  | ||||||
|     def get_mime(self) -> str: |     def _get_mime_and_options(self) -> Tuple[str, Dict[str, Any]]: | ||||||
|         mime = parse_header_content_type(self.msg.content_type)[0] |         # Defaults from the `Content-Type` header. | ||||||
|         if isinstance(self.msg, HTTPResponse): |         mime, options = parse_header_content_type(self.msg.content_type) | ||||||
|             forced_content_type = self.formatting.options['response']['as'] |  | ||||||
|             if forced_content_type != EMPTY_FORMAT_OPTION: |         if not isinstance(self.msg, HTTPResponse): | ||||||
|                 mime = parse_header_content_type(forced_content_type)[0] or mime |             return mime, options | ||||||
|         return mime |  | ||||||
|  |         # Override from the `--response-as` option. | ||||||
|  |         forced_content_type = self.formatting.options['response']['as'] | ||||||
|  |         if forced_content_type == EMPTY_FORMAT_OPTION: | ||||||
|  |             return mime, options | ||||||
|  |  | ||||||
|  |         forced_mime, forced_options = parse_header_content_type(forced_content_type) | ||||||
|  |         return (forced_mime or mime, forced_options or options) | ||||||
|  |  | ||||||
|     def get_headers(self) -> bytes: |     def get_headers(self) -> bytes: | ||||||
|         return self.formatting.format_headers( |         return self.formatting.format_headers( | ||||||
| @@ -176,9 +185,9 @@ class PrettyStream(EncodedStream): | |||||||
|         if not isinstance(chunk, str): |         if not isinstance(chunk, str): | ||||||
|             # Text when a converter has been used, |             # Text when a converter has been used, | ||||||
|             # otherwise it will always be bytes. |             # otherwise it will always be bytes. | ||||||
|             chunk = chunk.decode(self.msg.encoding, 'replace') |             chunk = codec.decode(chunk, self.encoding) | ||||||
|         chunk = self.formatting.format_body(content=chunk, mime=self.mime) |         chunk = self.formatting.format_body(content=chunk, mime=self.mime) | ||||||
|         return chunk.encode(self.output_encoding, 'replace') |         return codec.encode(chunk, self.output_encoding) | ||||||
|  |  | ||||||
|  |  | ||||||
| class BufferedPrettyStream(PrettyStream): | class BufferedPrettyStream(PrettyStream): | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								setup.py
									
									
									
									
									
								
							| @@ -25,6 +25,7 @@ dev_require = [ | |||||||
|     'wheel', |     'wheel', | ||||||
| ] | ] | ||||||
| install_requires = [ | install_requires = [ | ||||||
|  |     'charset_normalizer>=2.0.0', | ||||||
|     'defusedxml>=0.6.0', |     'defusedxml>=0.6.0', | ||||||
|     'requests[socks]>=2.22.0', |     'requests[socks]>=2.22.0', | ||||||
|     'Pygments>=2.5.2', |     'Pygments>=2.5.2', | ||||||
|   | |||||||
| @@ -39,3 +39,10 @@ def test_max_headers_limit(httpbin_both): | |||||||
|  |  | ||||||
| def test_max_headers_no_limit(httpbin_both): | def test_max_headers_no_limit(httpbin_both): | ||||||
|     assert HTTP_OK in http('--max-headers=0', httpbin_both + '/get') |     assert HTTP_OK in http('--max-headers=0', httpbin_both + '/get') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def test_charset_argument_unknown_encoding(httpbin_both): | ||||||
|  |     with raises(LookupError) as e: | ||||||
|  |         http('--response-as', 'charset=foobar', | ||||||
|  |              'GET', httpbin_both + '/get') | ||||||
|  |     assert 'unknown encoding: foobar' in str(e.value) | ||||||
|   | |||||||
| @@ -2,9 +2,17 @@ | |||||||
| Various unicode handling related tests. | Various unicode handling related tests. | ||||||
|  |  | ||||||
| """ | """ | ||||||
| from .utils import http, HTTP_OK | import pytest | ||||||
|  | import responses | ||||||
|  |  | ||||||
|  | from httpie.cli.constants import PRETTY_MAP | ||||||
|  | from httpie.constants import UTF8 | ||||||
|  |  | ||||||
|  | from .utils import http, HTTP_OK, URL_EXAMPLE | ||||||
| from .fixtures import UNICODE | from .fixtures import UNICODE | ||||||
|  |  | ||||||
|  | ENCODINGS = [UTF8, 'windows-1250'] | ||||||
|  |  | ||||||
|  |  | ||||||
| def test_unicode_headers(httpbin): | def test_unicode_headers(httpbin): | ||||||
|     # httpbin doesn't interpret UFT-8 headers |     # httpbin doesn't interpret UFT-8 headers | ||||||
| @@ -109,3 +117,95 @@ def test_unicode_digest_auth(httpbin): | |||||||
|     http('--auth-type=digest', |     http('--auth-type=digest', | ||||||
|          '--auth', f'test:{UNICODE}', |          '--auth', f'test:{UNICODE}', | ||||||
|          f'{httpbin.url}/digest-auth/auth/test/{UNICODE}') |          f'{httpbin.url}/digest-auth/auth/test/{UNICODE}') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @pytest.mark.parametrize('encoding', ENCODINGS) | ||||||
|  | @responses.activate | ||||||
|  | def test_GET_encoding_detection_from_content_type_header(encoding): | ||||||
|  |     responses.add(responses.GET, | ||||||
|  |                   URL_EXAMPLE, | ||||||
|  |                   body='<?xml version="1.0"?>\n<c>Financiën</c>'.encode(encoding), | ||||||
|  |                   content_type=f'text/xml; charset={encoding.upper()}') | ||||||
|  |     r = http('GET', URL_EXAMPLE) | ||||||
|  |     assert 'Financiën' in r | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @pytest.mark.parametrize('encoding', ENCODINGS) | ||||||
|  | @responses.activate | ||||||
|  | def test_GET_encoding_detection_from_content(encoding): | ||||||
|  |     body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>Financiën</c>' | ||||||
|  |     responses.add(responses.GET, | ||||||
|  |                   URL_EXAMPLE, | ||||||
|  |                   body=body.encode(encoding), | ||||||
|  |                   content_type='text/xml') | ||||||
|  |     r = http('GET', URL_EXAMPLE) | ||||||
|  |     assert 'Financiën' in r | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @responses.activate | ||||||
|  | def test_GET_encoding_provided_by_format_options(): | ||||||
|  |     responses.add(responses.GET, | ||||||
|  |                   URL_EXAMPLE, | ||||||
|  |                   body='▒▒▒'.encode('johab'), | ||||||
|  |                   content_type='text/plain') | ||||||
|  |     r = http('--format-options', 'response.as:text/plain; charset=johab', | ||||||
|  |              'GET', URL_EXAMPLE) | ||||||
|  |     assert '▒▒▒' in r | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @responses.activate | ||||||
|  | def test_GET_encoding_provided_by_shortcut_option(): | ||||||
|  |     responses.add(responses.GET, | ||||||
|  |                   URL_EXAMPLE, | ||||||
|  |                   body='▒▒▒'.encode('johab'), | ||||||
|  |                   content_type='text/plain') | ||||||
|  |     r = http('--response-as', 'text/plain; charset=johab', | ||||||
|  |              'GET', URL_EXAMPLE) | ||||||
|  |     assert '▒▒▒' in r | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @pytest.mark.parametrize('encoding', ENCODINGS) | ||||||
|  | @responses.activate | ||||||
|  | def test_GET_encoding_provided_by_empty_shortcut_option_should_use_content_detection(encoding): | ||||||
|  |     body = f'<?xml version="1.0" encoding="{encoding.upper()}"?>\n<c>Financiën</c>' | ||||||
|  |     responses.add(responses.GET, | ||||||
|  |                   URL_EXAMPLE, | ||||||
|  |                   body=body.encode(encoding), | ||||||
|  |                   content_type='text/xml') | ||||||
|  |     r = http('--response-as', '', 'GET', URL_EXAMPLE) | ||||||
|  |     assert 'Financiën' in r | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @pytest.mark.parametrize('encoding', ENCODINGS) | ||||||
|  | @responses.activate | ||||||
|  | def test_POST_encoding_detection_from_content_type_header(encoding): | ||||||
|  |     responses.add(responses.POST, | ||||||
|  |                   URL_EXAMPLE, | ||||||
|  |                   body='Všichni lidé jsou si rovni.'.encode(encoding), | ||||||
|  |                   content_type=f'text/plain; charset={encoding.upper()}') | ||||||
|  |     r = http('--form', 'POST', URL_EXAMPLE) | ||||||
|  |     assert 'Všichni lidé jsou si rovni.' in r | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @pytest.mark.parametrize('encoding', ENCODINGS) | ||||||
|  | @responses.activate | ||||||
|  | def test_POST_encoding_detection_from_content(encoding): | ||||||
|  |     responses.add(responses.POST, | ||||||
|  |                   URL_EXAMPLE, | ||||||
|  |                   body='Všichni lidé jsou si rovni.'.encode(encoding), | ||||||
|  |                   content_type='text/plain') | ||||||
|  |     r = http('--form', 'POST', URL_EXAMPLE) | ||||||
|  |     assert 'Všichni lidé jsou si rovni.' in r | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @pytest.mark.parametrize('encoding', ENCODINGS) | ||||||
|  | @pytest.mark.parametrize('pretty', PRETTY_MAP.keys()) | ||||||
|  | @responses.activate | ||||||
|  | def test_stream_encoding_detection_from_content_type_header(encoding, pretty): | ||||||
|  |     responses.add(responses.GET, | ||||||
|  |                   URL_EXAMPLE, | ||||||
|  |                   body='<?xml version="1.0"?>\n<c>Financiën</c>'.encode(encoding), | ||||||
|  |                   stream=True, | ||||||
|  |                   content_type=f'text/xml; charset={encoding.upper()}') | ||||||
|  |     r = http('--pretty=' + pretty, '--stream', 'GET', URL_EXAMPLE) | ||||||
|  |     assert 'Financiën' in r | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user