1
0
mirror of https://github.com/httpie/cli.git synced 2026-04-24 19:53:55 +02:00

Optimize encoding detection (#1243)

* Optimize encoding detection

* Use a threshold based system
This commit is contained in:
Batuhan Taskaya
2021-12-23 22:05:58 +03:00
committed by GitHub
parent 5a83a9ebc4
commit e09401b81a
6 changed files with 71 additions and 10 deletions
+3 -3
View File
@@ -1,4 +1,4 @@
from typing import Union
from typing import Union, Tuple
from charset_normalizer import from_bytes
from charset_normalizer.constant import TOO_SMALL_SEQUENCE
@@ -29,7 +29,7 @@ def detect_encoding(content: ContentBytes) -> str:
return encoding
def smart_decode(content: ContentBytes, encoding: str) -> str:
def smart_decode(content: ContentBytes, encoding: str) -> Tuple[str, str]:
"""Decode `content` using the given `encoding`.
If no `encoding` is provided, the best effort is to guess it from `content`.
@@ -38,7 +38,7 @@ def smart_decode(content: ContentBytes, encoding: str) -> str:
"""
if not encoding:
encoding = detect_encoding(content)
return content.decode(encoding, 'replace')
return content.decode(encoding, 'replace'), encoding
def smart_encode(content: str, encoding: str) -> bytes: