mirror of
https://github.com/httpie/cli.git
synced 2025-02-21 19:06:29 +02:00
* Fix encoding error with non-prettified encoded responses Removed `--format-option response.as` an promote `--response-as`: using the format option would be misleading as it is now also used by non-prettified responses. * Encoding refactoring * split --response-as into --response-mime and --response-charset * add support for Content-Type charset for requests printed to terminal * add support charset detection for requests printed to terminal without a Content-Type charset * etc. * `test_unicode.py` → `test_encoding.py` * Drop sequence length check * Clean-up tests * [skip ci] Tweaks * Use the compatible release clause for `charset_normalizer` requirement Cf. https://www.python.org/dev/peps/pep-0440/#version-specifiers * Clean-up * Partially revert d52a4833e461e1b16b7961a112ea5c53e93cd643 * Changelog * Tweak tests * [skip ci] Better test name * Cleanup tests and add request body charset detection * More test suite cleanups * Cleanup * Fix code style in test * Improve detect_encoding() docstring * Uniformize pytest.mark.parametrize() calls * [skip ci] Comment out TODOs (will be tackled in a specific PR) Co-authored-by: Jakub Roztocil <jakub@roztocil.co>
51 lines
1.3 KiB
Python
51 lines
1.3 KiB
Python
from typing import Union
|
|
|
|
from charset_normalizer import from_bytes
|
|
from charset_normalizer.constant import TOO_SMALL_SEQUENCE
|
|
|
|
UTF8 = 'utf-8'
|
|
|
|
ContentBytes = Union[bytearray, bytes]
|
|
|
|
|
|
def detect_encoding(content: ContentBytes) -> str:
|
|
"""
|
|
We default to UTF-8 if text too short, because the detection
|
|
can return a random encoding leading to confusing results
|
|
given the `charset_normalizer` version (< 2.0.5).
|
|
|
|
>>> too_short = ']"foo"'
|
|
>>> detected = from_bytes(too_short.encode()).best().encoding
|
|
>>> detected
|
|
'ascii'
|
|
>>> too_short.encode().decode(detected)
|
|
']"foo"'
|
|
"""
|
|
encoding = UTF8
|
|
if len(content) > TOO_SMALL_SEQUENCE:
|
|
match = from_bytes(bytes(content)).best()
|
|
if match:
|
|
encoding = match.encoding
|
|
return encoding
|
|
|
|
|
|
def smart_decode(content: ContentBytes, encoding: str) -> str:
|
|
"""Decode `content` using the given `encoding`.
|
|
If no `encoding` is provided, the best effort is to guess it from `content`.
|
|
|
|
Unicode errors are replaced.
|
|
|
|
"""
|
|
if not encoding:
|
|
encoding = detect_encoding(content)
|
|
return content.decode(encoding, 'replace')
|
|
|
|
|
|
def smart_encode(content: str, encoding: str) -> bytes:
|
|
"""Encode `content` using the given `encoding`.
|
|
|
|
Unicode errors are replaced.
|
|
|
|
"""
|
|
return content.encode(encoding, 'replace')
|