Commit fd09164d authored by Nsukami Patrick's avatar Nsukami Patrick
Browse files

[mypy] Add typing

parent 9682dfd86437
Pipeline #10785 passed with stages
in 2 minutes and 10 seconds
...@@ -53,7 +53,7 @@ mypy: ...@@ -53,7 +53,7 @@ mypy:
variables: variables:
- $TRIGGERED_FROM_OTHER_PROJECT - $TRIGGERED_FROM_OTHER_PROJECT
stage: tests stage: tests
script: python3 -c 'print("skipped")' # tox -e mypy script: tox -e mypy
trigger-cubicweb-pipeline: trigger-cubicweb-pipeline:
stage: after-tests stage: after-tests
......
...@@ -2,3 +2,5 @@ syntax: glob ...@@ -2,3 +2,5 @@ syntax: glob
*.pyc *.pyc
__pycache__ __pycache__
.tox .tox
.mypy_cache
*.egg-info
include ChangeLog include ChangeLog
include COPYING include COPYING
include COPYING.LESSER include COPYING.LESSER
recursive-include test *
include __pkginfo__.py include __pkginfo__.py
include doc/makefile include doc/makefile
include announce.txt include announce.txt
include tox.ini include tox.ini
include mypy.ini
recursive-include logilab *.py
recursive-include test *
exclude .gitlab-ci.yml exclude .gitlab-ci.yml
prune debian prune debian
...@@ -39,6 +39,10 @@ install_requires = [ ...@@ -39,6 +39,10 @@ install_requires = [
"html2text", "html2text",
] ]
tests_require = [
"pdflatex",
]
classifiers = [ classifiers = [
"Programming Language :: Python", "Programming Language :: Python",
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
......
...@@ -26,18 +26,16 @@ ...@@ -26,18 +26,16 @@
`Lesser General Public License version 2` `Lesser General Public License version 2`
""" """
__docformat__ = "restructuredtext en" from _io import BytesIO
import locale import locale
import mimetypes import mimetypes
import re import re
try:
maketrans = bytes.maketrans
except AttributeError:
from string import maketrans
import codecs import codecs
from io import BytesIO from io import BytesIO # noqa: F811
from typing import Optional, Callable, Any, Tuple
from types import ModuleType
from logilab.common import deprecation from logilab.common import deprecation
...@@ -45,13 +43,18 @@ from html.entities import name2codepoint ...@@ -45,13 +43,18 @@ from html.entities import name2codepoint
import pkg_resources import pkg_resources
__version__ = pkg_resources.get_distribution("logilab-mtconverter").version maketrans: Callable[[bytes, bytes], bytes] = bytes.maketrans
__version__: str = pkg_resources.get_distribution("logilab-mtconverter").version
__docformat__: str = "restructuredtext en"
try: try:
import chardet import chardet
except ImportError: except ImportError:
# chardet unvailable # chardet unvailable
chardet = None # Name 'chardet' already defined (by an import)
chardet: Optional[ModuleType] = None # type: ignore[no-redef]
try: try:
DEFAULT_ENCODING = locale.getpreferredencoding() DEFAULT_ENCODING = locale.getpreferredencoding()
...@@ -74,7 +77,7 @@ CHARDET_MIN_SIZE = 20 ...@@ -74,7 +77,7 @@ CHARDET_MIN_SIZE = 20
CHARDET_CONFIDENCE_THRESHOLD = 0.75 CHARDET_CONFIDENCE_THRESHOLD = 0.75
def need_guess(mimetype, encoding): def need_guess(mimetype: str, encoding: str) -> bool:
"""return True if we can complete given mimetype / encoding information""" """return True if we can complete given mimetype / encoding information"""
if not mimetype: if not mimetype:
return True return True
...@@ -83,11 +86,11 @@ def need_guess(mimetype, encoding): ...@@ -83,11 +86,11 @@ def need_guess(mimetype, encoding):
return False return False
def is_text_mimetype(mimetype): def is_text_mimetype(mimetype: str) -> bool:
return mimetype.startswith("text/") or mimetype in TEXT_MIMETYPES return mimetype.startswith("text/") or mimetype in TEXT_MIMETYPES
def guess_encoding(buffer, fallbackencoding=None): def guess_encoding(buffer: BytesIO, fallbackencoding: Optional[Any] = None) -> str:
"""try to guess encoding from a buffer""" """try to guess encoding from a buffer"""
if hasattr(buffer, "getvalue"): # may be a StringIO if hasattr(buffer, "getvalue"): # may be a StringIO
buffer = buffer.getvalue() buffer = buffer.getvalue()
...@@ -113,13 +116,13 @@ def guess_encoding(buffer, fallbackencoding=None): ...@@ -113,13 +116,13 @@ def guess_encoding(buffer, fallbackencoding=None):
def guess_mimetype_and_encoding( def guess_mimetype_and_encoding(
format=None, format: Optional[str] = None,
encoding=None, encoding: Optional[Any] = None,
data=None, data: Optional[bytes] = None,
filename=None, filename: Optional[str] = None,
fallbackencoding=None, fallbackencoding: Optional[Any] = None,
fallbackmimetype=u"application/octet-stream", fallbackmimetype: str = u"application/octet-stream",
): ) -> Tuple[Optional[str], Optional[str]]:
if format and format.split("/")[-1] in BINARY_ENCODINGS: if format and format.split("/")[-1] in BINARY_ENCODINGS:
format = None # try to do better format = None # try to do better
if filename and not format: if filename and not format:
...@@ -137,19 +140,19 @@ def guess_mimetype_and_encoding( ...@@ -137,19 +140,19 @@ def guess_mimetype_and_encoding(
CONTROL_CHARS = [bytes((ci,)) for ci in range(32)] CONTROL_CHARS = [bytes((ci,)) for ci in range(32)]
TR_CONTROL_CHARS = [" "] * len(CONTROL_CHARS) _TR_CONTROL_CHARS = [" "] * len(CONTROL_CHARS)
for c in ("\n", "\r", "\t"): for c in ("\n", "\r", "\t"):
TR_CONTROL_CHARS[ord(c)] = c _TR_CONTROL_CHARS[ord(c)] = c
TR_CONTROL_CHARS[ord("\f")] = "\n" _TR_CONTROL_CHARS[ord("\f")] = "\n"
TR_CONTROL_CHARS[ord("\v")] = "\n" _TR_CONTROL_CHARS[ord("\v")] = "\n"
TR_CONTROL_CHARS = [c.encode("ascii") for c in TR_CONTROL_CHARS] TR_CONTROL_CHARS = [c.encode("ascii") for c in _TR_CONTROL_CHARS]
ESC_CAR_TABLE = maketrans( ESC_CAR_TABLE = maketrans(
"".encode("ascii").join(CONTROL_CHARS), "".encode("ascii").join(TR_CONTROL_CHARS) "".encode("ascii").join(CONTROL_CHARS), "".encode("ascii").join(TR_CONTROL_CHARS)
) )
ESC_UCAR_TABLE = ESC_CAR_TABLE.decode("latin1") ESC_UCAR_TABLE = ESC_CAR_TABLE.decode("latin1")
def xml_escape(data): def xml_escape(data: str) -> str:
"""escapes XML forbidden characters in attributes and PCDATA""" """escapes XML forbidden characters in attributes and PCDATA"""
if isinstance(data, str): if isinstance(data, str):
data = data.translate(ESC_UCAR_TABLE) data = data.translate(ESC_UCAR_TABLE)
...@@ -167,7 +170,7 @@ def xml_escape(data): ...@@ -167,7 +170,7 @@ def xml_escape(data):
html_escape = deprecation.renamed("html_escape", xml_escape) html_escape = deprecation.renamed("html_escape", xml_escape)
def html_unescape(data): def html_unescape(data: str) -> str:
"""unescapes XML/HTML entities""" """unescapes XML/HTML entities"""
for entityname, codepoint in name2codepoint.items(): for entityname, codepoint in name2codepoint.items():
data = data.replace("&%s;" % entityname, chr(codepoint)) data = data.replace("&%s;" % entityname, chr(codepoint))
...@@ -179,7 +182,9 @@ class TransformData: ...@@ -179,7 +182,9 @@ class TransformData:
type and encoding in case it applies type and encoding in case it applies
""" """
def __init__(self, data, mimetype, encoding=None, **kwargs): def __init__(
self, data: str, mimetype: str, encoding: Optional[str] = None, **kwargs: Any
) -> None:
self.__dict__.update(kwargs) self.__dict__.update(kwargs)
self.data = data self.data = data
self.mimetype = mimetype self.mimetype = mimetype
...@@ -187,11 +192,11 @@ class TransformData: ...@@ -187,11 +192,11 @@ class TransformData:
if not self.is_binary() and not encoding and not isinstance(self.data, str): if not self.is_binary() and not encoding and not isinstance(self.data, str):
self.encoding = guess_encoding(data) self.encoding = guess_encoding(data)
def get(self, attr, default=None): def get(self, attr: str, default: Optional[Any] = None) -> Optional[Any]:
"""get an optional data attribute""" """get an optional data attribute"""
return getattr(self, attr, default) return getattr(self, attr, default)
def decode(self, force=False): def decode(self, force: bool = False) -> str:
"""return the data as an unicode string""" """return the data as an unicode string"""
if isinstance(self.data, str): if isinstance(self.data, str):
return self.data return self.data
...@@ -209,7 +214,7 @@ class TransformData: ...@@ -209,7 +214,7 @@ class TransformData:
encoding = guess_encoding(self.data) encoding = guess_encoding(self.data)
return self.data.decode(encoding, UNICODE_POLICY) return self.data.decode(encoding, UNICODE_POLICY)
def encode(self, encoding=None): def encode(self, encoding: Optional[Any] = None) -> bytes:
"""return the data as an encoded string""" """return the data as an encoded string"""
if (encoding is None or self.encoding == encoding) and isinstance( if (encoding is None or self.encoding == encoding) and isinstance(
self.data, bytes self.data, bytes
...@@ -218,14 +223,14 @@ class TransformData: ...@@ -218,14 +223,14 @@ class TransformData:
encoding = encoding or self.encoding or "utf8" encoding = encoding or self.encoding or "utf8"
return self.decode().encode(encoding) return self.decode().encode(encoding)
def is_binary(self): def is_binary(self) -> bool:
return not is_text_mimetype(self.mimetype) or self.encoding in BINARY_ENCODINGS return not is_text_mimetype(self.mimetype) or self.encoding in BINARY_ENCODINGS
def check_encoding(self): def check_encoding(self) -> None:
if is_text_mimetype(self.mimetype) and self.is_binary(): if is_text_mimetype(self.mimetype) and self.is_binary():
raise TransformError() raise TransformError()
def binary_decode(self): def binary_decode(self): # type: ignore[no-untyped-def] # FIXME: is self.data a str or a byte?
if self.encoding == "gzip": if self.encoding == "gzip":
import gzip import gzip
...@@ -259,7 +264,9 @@ class TransformError(MtConverterError): ...@@ -259,7 +264,9 @@ class TransformError(MtConverterError):
""" """
def register_pil_transforms(engine, verb=True): def register_pil_transforms(
engine: Any, verb: bool = True
) -> bool: # FIXME: engine: TransformEngine
try: try:
from logilab.mtconverter.transforms import piltransforms from logilab.mtconverter.transforms import piltransforms
except ImportError: except ImportError:
...@@ -273,7 +280,9 @@ def register_pil_transforms(engine, verb=True): ...@@ -273,7 +280,9 @@ def register_pil_transforms(engine, verb=True):
return True return True
def register_pygments_transforms(engine, verb=True): def register_pygments_transforms(
engine: Any, verb: bool = True
) -> bool: # FIXME: engine: TransformEngine
try: try:
from logilab.mtconverter.transforms import pygmentstransforms from logilab.mtconverter.transforms import pygmentstransforms
except ImportError: except ImportError:
...@@ -287,7 +296,9 @@ def register_pygments_transforms(engine, verb=True): ...@@ -287,7 +296,9 @@ def register_pygments_transforms(engine, verb=True):
return True return True
def register_base_transforms(engine, verb=True): def register_base_transforms(
engine: Any, verb: bool = True
) -> bool: # FIXME: engine: TransformEngine
from logilab.mtconverter.transforms import ( from logilab.mtconverter.transforms import (
cmdtransforms, cmdtransforms,
text_to_text, text_to_text,
......
...@@ -17,11 +17,13 @@ ...@@ -17,11 +17,13 @@
# with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>. # with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>.
"""the transformation engine""" """the transformation engine"""
from logilab.mtconverter import TransformError from logilab.mtconverter import TransformData, TransformError
from logilab.mtconverter.transform import TransformsChain from logilab.mtconverter.transform import Transform, TransformsChain
from typing import Tuple, Dict, List, Optional
def split_mimetype(mimetype):
def split_mimetype(mimetype: str) -> Tuple[str, str]:
try: try:
main, sub = mimetype.split("/") main, sub = mimetype.split("/")
except ValueError: except ValueError:
...@@ -31,39 +33,32 @@ def split_mimetype(mimetype): ...@@ -31,39 +33,32 @@ def split_mimetype(mimetype):
return main, sub return main, sub
class TransformEngine(object): class TransformEngine:
"""mimetype oriented conversions engine""" """mimetype oriented conversions engine"""
def __init__(self): def __init__(self) -> None:
self._mtmap = {} self._mtmap: Dict[str, dict] = {}
self._mtmainmap = {} self._mtmainmap: Dict[str, dict] = {}
self.transforms = {} self.transforms: Dict[str, Transform] = {}
def add_transform(self, transform): def add_transform(self, transform: Transform) -> None:
"""register a new transform""" """register a new transform"""
self._map_transform(transform) self._map_transform(transform)
def remove_transform(self, name, *inputs): def remove_transform(self, name: str, *inputs: str) -> None:
""" unregister a transform """ unregister a transform
name is the name of a registered transform name is the name of a registered transform
""" """
self._unmap_transform(self.transforms[name], *inputs) self._unmap_transform(self.transforms[name], *inputs)
def has_input(self, mimetype): def has_input(self, mimetype: str) -> bool:
"""return True if the engine has a transformation taking the given """return True if the engine has a transformation taking the given
mimetype as input mimetype as input
""" """
if mimetype in self._mtmap: return mimetype in self._mtmap or split_mimetype(mimetype)[0] in self._mtmainmap
return True
if split_mimetype(mimetype)[0] in self._mtmainmap:
return True
return False
def convert(self, trdata, targetmimetype): def convert(self, trdata: TransformData, targetmimetype: str) -> TransformData:
"""convert the given data structure into the given mime type """convert the given data structure into the given mime type
:param trdata: `TransformData`
:rtype: `TransformData`
""" """
trdata.check_encoding() trdata.check_encoding()
# get a path to output mime type # get a path to output mime type
...@@ -84,7 +79,7 @@ class TransformEngine(object): ...@@ -84,7 +79,7 @@ class TransformEngine(object):
transform = path[0] transform = path[0]
return transform.convert(trdata) return transform.convert(trdata)
def _map_transform(self, transform): def _map_transform(self, transform: Transform) -> None:
"""map transform to internal structures""" """map transform to internal structures"""
if not (transform.inputs and transform.output): if not (transform.inputs and transform.output):
raise TransformError("transform is missing input or output") raise TransformError("transform is missing input or output")
...@@ -104,12 +99,13 @@ class TransformEngine(object): ...@@ -104,12 +99,13 @@ class TransformEngine(object):
inmap[transform.output].append(transform) inmap[transform.output].append(transform)
except KeyError: except KeyError:
inmap[transform.output] = [transform] inmap[transform.output] = [transform]
assert transform.name is not None
self.transforms[transform.name] = transform self.transforms[transform.name] = transform
def _unmap_transform(self, transform, *inputs): def _unmap_transform(self, transform: Transform, *inputs: str) -> None:
"""unmap transform from internal structures""" """unmap transform from internal structures"""
if not inputs: if not inputs:
inputs = transform.inputs inputs = transform.inputs # type: ignore[assignment]
for mt in inputs: for mt in inputs:
main, sub = split_mimetype(mt) main, sub = split_mimetype(mt)
if sub == "*": if sub == "*":
...@@ -117,9 +113,12 @@ class TransformEngine(object): ...@@ -117,9 +113,12 @@ class TransformEngine(object):
else: else:
inmap = self._mtmap[mt] inmap = self._mtmap[mt]
inmap[transform.output].remove(transform) inmap[transform.output].remove(transform)
assert transform.name is not None
del self.transforms[transform.name] del self.transforms[transform.name]
def find_path(self, orig, target, required_transforms=()): def find_path(
self, orig: str, target: str, required_transforms: List[str] = []
) -> Optional[TransformsChain]:
"""return the shortest path for transformation from orig mimetype to """return the shortest path for transformation from orig mimetype to
target mimetype target mimetype
""" """
...@@ -136,11 +135,15 @@ class TransformEngine(object): ...@@ -136,11 +135,15 @@ class TransformEngine(object):
shortest = len(path) shortest = len(path)
return winner return winner
def _get_paths(self, orig, target, requirements, path=None, result=None): def _get_paths( # type: ignore[no-untyped-def]
self, orig: str, target: str, requirements: List[str], path=None, result=None
) -> List[TransformsChain]:
"""return a all path for transformation from orig mimetype to """return a all path for transformation from orig mimetype to
target mimetype target mimetype
""" """
if path is None: if (
path is None
): # FIXME: why not empty list as default value for path & result?
result = [] result = []
path = [] path = []
requirements = list(requirements) requirements = list(requirements)
...@@ -157,7 +160,14 @@ class TransformEngine(object): ...@@ -157,7 +160,14 @@ class TransformEngine(object):
# we are done # we are done
return result return result
def _search_outputs(self, outputs, target, requirements, path, result): def _search_outputs(
self,
outputs: Dict[str, List[Transform]],
target: str,
requirements: List[str],
path: List[Optional[Transform]],
result: List[List[Optional[Transform]]],
) -> None:
path.append(None) path.append(None)
for outputmimetype, transforms in outputs.items(): for outputmimetype, transforms in outputs.items():
for transform in transforms: for transform in transforms:
...@@ -172,9 +182,11 @@ class TransformEngine(object): ...@@ -172,9 +182,11 @@ class TransformEngine(object):
path[-1] = transform path[-1] = transform
if outputmimetype == target: if outputmimetype == target:
if not requirements: if not requirements:
assert result is not None
result.append(path[:]) result.append(path[:])
else: else:
self._get_paths(outputmimetype, target, requirements, path, result) self._get_paths(outputmimetype, target, requirements, path, result)
if required: if required:
assert name is not None
requirements.append(name) requirements.append(name)
path.pop() path.pop()
...@@ -17,7 +17,12 @@ ...@@ -17,7 +17,12 @@
# with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>. # with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>.
"""base transformation objects""" """base transformation objects"""
__docformat__ = "restructuredtext en" from typing import Sequence, Optional
from typing import Any
from logilab.mtconverter import TransformData
from typing import Tuple
__docformat__: str = "restructuredtext en"
class Transform: class Transform:
...@@ -25,18 +30,18 @@ class Transform: ...@@ -25,18 +30,18 @@ class Transform:
into another MIME type into another MIME type
""" """
name = None name: Optional[str] = None
inputs = () inputs: Sequence[str] = []
output = None output: Optional[str] = None
input_encoding = None input_encoding: Optional[str] = None
output_encoding = None output_encoding: Optional[str] = None
def __init__(self, **kwargs): def __init__(self, **kwargs: Any) -> None:
self.__dict__.update(kwargs) self.__dict__.update(kwargs)
if not getattr(self, "name", None): if not getattr(self, "name", None):
self.name = self.__class__.__name__