Commit fd09164d authored by Nsukami Patrick's avatar Nsukami Patrick
Browse files

[mypy] Add typing

parent 9682dfd86437
Pipeline #10785 passed with stages
in 2 minutes and 10 seconds
......@@ -53,7 +53,7 @@ mypy:
variables:
- $TRIGGERED_FROM_OTHER_PROJECT
stage: tests
script: python3 -c 'print("skipped")' # tox -e mypy
script: tox -e mypy
trigger-cubicweb-pipeline:
stage: after-tests
......
......@@ -2,3 +2,5 @@ syntax: glob
*.pyc
__pycache__
.tox
.mypy_cache
*.egg-info
include ChangeLog
include COPYING
include COPYING.LESSER
recursive-include test *
include __pkginfo__.py
include doc/makefile
include announce.txt
include tox.ini
include mypy.ini
recursive-include logilab *.py
recursive-include test *
exclude .gitlab-ci.yml
prune debian
......@@ -39,6 +39,10 @@ install_requires = [
"html2text",
]
tests_require = [
"pdflatex",
]
classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",
......
......@@ -26,18 +26,16 @@
`Lesser General Public License version 2`
"""
__docformat__ = "restructuredtext en"
from _io import BytesIO
import locale
import mimetypes
import re
try:
maketrans = bytes.maketrans
except AttributeError:
from string import maketrans
import codecs
from io import BytesIO
from io import BytesIO # noqa: F811
from typing import Optional, Callable, Any, Tuple
from types import ModuleType
from logilab.common import deprecation
......@@ -45,13 +43,18 @@ from html.entities import name2codepoint
import pkg_resources
__version__ = pkg_resources.get_distribution("logilab-mtconverter").version
maketrans: Callable[[bytes, bytes], bytes] = bytes.maketrans
__version__: str = pkg_resources.get_distribution("logilab-mtconverter").version
__docformat__: str = "restructuredtext en"
try:
import chardet
except ImportError:
# chardet unvailable
chardet = None
# Name 'chardet' already defined (by an import)
chardet: Optional[ModuleType] = None # type: ignore[no-redef]
try:
DEFAULT_ENCODING = locale.getpreferredencoding()
......@@ -74,7 +77,7 @@ CHARDET_MIN_SIZE = 20
CHARDET_CONFIDENCE_THRESHOLD = 0.75
def need_guess(mimetype, encoding):
def need_guess(mimetype: str, encoding: str) -> bool:
"""return True if we can complete given mimetype / encoding information"""
if not mimetype:
return True
......@@ -83,11 +86,11 @@ def need_guess(mimetype, encoding):
return False
def is_text_mimetype(mimetype):
def is_text_mimetype(mimetype: str) -> bool:
return mimetype.startswith("text/") or mimetype in TEXT_MIMETYPES
def guess_encoding(buffer, fallbackencoding=None):
def guess_encoding(buffer: BytesIO, fallbackencoding: Optional[Any] = None) -> str:
"""try to guess encoding from a buffer"""
if hasattr(buffer, "getvalue"): # may be a StringIO
buffer = buffer.getvalue()
......@@ -113,13 +116,13 @@ def guess_encoding(buffer, fallbackencoding=None):
def guess_mimetype_and_encoding(
format=None,
encoding=None,
data=None,
filename=None,
fallbackencoding=None,
fallbackmimetype=u"application/octet-stream",
):
format: Optional[str] = None,
encoding: Optional[Any] = None,
data: Optional[bytes] = None,
filename: Optional[str] = None,
fallbackencoding: Optional[Any] = None,
fallbackmimetype: str = u"application/octet-stream",
) -> Tuple[Optional[str], Optional[str]]:
if format and format.split("/")[-1] in BINARY_ENCODINGS:
format = None # try to do better
if filename and not format:
......@@ -137,19 +140,19 @@ def guess_mimetype_and_encoding(
CONTROL_CHARS = [bytes((ci,)) for ci in range(32)]
TR_CONTROL_CHARS = [" "] * len(CONTROL_CHARS)
_TR_CONTROL_CHARS = [" "] * len(CONTROL_CHARS)
for c in ("\n", "\r", "\t"):
TR_CONTROL_CHARS[ord(c)] = c
TR_CONTROL_CHARS[ord("\f")] = "\n"
TR_CONTROL_CHARS[ord("\v")] = "\n"
TR_CONTROL_CHARS = [c.encode("ascii") for c in TR_CONTROL_CHARS]
_TR_CONTROL_CHARS[ord(c)] = c
_TR_CONTROL_CHARS[ord("\f")] = "\n"
_TR_CONTROL_CHARS[ord("\v")] = "\n"
TR_CONTROL_CHARS = [c.encode("ascii") for c in _TR_CONTROL_CHARS]
ESC_CAR_TABLE = maketrans(
"".encode("ascii").join(CONTROL_CHARS), "".encode("ascii").join(TR_CONTROL_CHARS)
)
ESC_UCAR_TABLE = ESC_CAR_TABLE.decode("latin1")
def xml_escape(data):
def xml_escape(data: str) -> str:
"""escapes XML forbidden characters in attributes and PCDATA"""
if isinstance(data, str):
data = data.translate(ESC_UCAR_TABLE)
......@@ -167,7 +170,7 @@ def xml_escape(data):
html_escape = deprecation.renamed("html_escape", xml_escape)
def html_unescape(data):
def html_unescape(data: str) -> str:
"""unescapes XML/HTML entities"""
for entityname, codepoint in name2codepoint.items():
data = data.replace("&%s;" % entityname, chr(codepoint))
......@@ -179,7 +182,9 @@ class TransformData:
type and encoding in case it applies
"""
def __init__(self, data, mimetype, encoding=None, **kwargs):
def __init__(
self, data: str, mimetype: str, encoding: Optional[str] = None, **kwargs: Any
) -> None:
self.__dict__.update(kwargs)
self.data = data
self.mimetype = mimetype
......@@ -187,11 +192,11 @@ class TransformData:
if not self.is_binary() and not encoding and not isinstance(self.data, str):
self.encoding = guess_encoding(data)
def get(self, attr, default=None):
def get(self, attr: str, default: Optional[Any] = None) -> Optional[Any]:
"""get an optional data attribute"""
return getattr(self, attr, default)
def decode(self, force=False):
def decode(self, force: bool = False) -> str:
"""return the data as an unicode string"""
if isinstance(self.data, str):
return self.data
......@@ -209,7 +214,7 @@ class TransformData:
encoding = guess_encoding(self.data)
return self.data.decode(encoding, UNICODE_POLICY)
def encode(self, encoding=None):
def encode(self, encoding: Optional[Any] = None) -> bytes:
"""return the data as an encoded string"""
if (encoding is None or self.encoding == encoding) and isinstance(
self.data, bytes
......@@ -218,14 +223,14 @@ class TransformData:
encoding = encoding or self.encoding or "utf8"
return self.decode().encode(encoding)
def is_binary(self):
def is_binary(self) -> bool:
return not is_text_mimetype(self.mimetype) or self.encoding in BINARY_ENCODINGS
def check_encoding(self):
def check_encoding(self) -> None:
if is_text_mimetype(self.mimetype) and self.is_binary():
raise TransformError()
def binary_decode(self):
def binary_decode(self): # type: ignore[no-untyped-def] # FIXME: is self.data a str or a byte?
if self.encoding == "gzip":
import gzip
......@@ -259,7 +264,9 @@ class TransformError(MtConverterError):
"""
def register_pil_transforms(engine, verb=True):
def register_pil_transforms(
engine: Any, verb: bool = True
) -> bool: # FIXME: engine: TransformEngine
try:
from logilab.mtconverter.transforms import piltransforms
except ImportError:
......@@ -273,7 +280,9 @@ def register_pil_transforms(engine, verb=True):
return True
def register_pygments_transforms(engine, verb=True):
def register_pygments_transforms(
engine: Any, verb: bool = True
) -> bool: # FIXME: engine: TransformEngine
try:
from logilab.mtconverter.transforms import pygmentstransforms
except ImportError:
......@@ -287,7 +296,9 @@ def register_pygments_transforms(engine, verb=True):
return True
def register_base_transforms(engine, verb=True):
def register_base_transforms(
engine: Any, verb: bool = True
) -> bool: # FIXME: engine: TransformEngine
from logilab.mtconverter.transforms import (
cmdtransforms,
text_to_text,
......
......@@ -17,11 +17,13 @@
# with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>.
"""the transformation engine"""
from logilab.mtconverter import TransformError
from logilab.mtconverter.transform import TransformsChain
from logilab.mtconverter import TransformData, TransformError
from logilab.mtconverter.transform import Transform, TransformsChain
from typing import Tuple, Dict, List, Optional
def split_mimetype(mimetype):
def split_mimetype(mimetype: str) -> Tuple[str, str]:
try:
main, sub = mimetype.split("/")
except ValueError:
......@@ -31,39 +33,32 @@ def split_mimetype(mimetype):
return main, sub
class TransformEngine(object):
class TransformEngine:
"""mimetype oriented conversions engine"""
def __init__(self):
self._mtmap = {}
self._mtmainmap = {}
self.transforms = {}
def __init__(self) -> None:
self._mtmap: Dict[str, dict] = {}
self._mtmainmap: Dict[str, dict] = {}
self.transforms: Dict[str, Transform] = {}
def add_transform(self, transform):
def add_transform(self, transform: Transform) -> None:
"""register a new transform"""
self._map_transform(transform)
def remove_transform(self, name, *inputs):
def remove_transform(self, name: str, *inputs: str) -> None:
""" unregister a transform
name is the name of a registered transform
"""
self._unmap_transform(self.transforms[name], *inputs)
def has_input(self, mimetype):
def has_input(self, mimetype: str) -> bool:
"""return True if the engine has a transformation taking the given
mimetype as input
"""
if mimetype in self._mtmap:
return True
if split_mimetype(mimetype)[0] in self._mtmainmap:
return True
return False
return mimetype in self._mtmap or split_mimetype(mimetype)[0] in self._mtmainmap
def convert(self, trdata, targetmimetype):
def convert(self, trdata: TransformData, targetmimetype: str) -> TransformData:
"""convert the given data structure into the given mime type
:param trdata: `TransformData`
:rtype: `TransformData`
"""
trdata.check_encoding()
# get a path to output mime type
......@@ -84,7 +79,7 @@ class TransformEngine(object):
transform = path[0]
return transform.convert(trdata)
def _map_transform(self, transform):
def _map_transform(self, transform: Transform) -> None:
"""map transform to internal structures"""
if not (transform.inputs and transform.output):
raise TransformError("transform is missing input or output")
......@@ -104,12 +99,13 @@ class TransformEngine(object):
inmap[transform.output].append(transform)
except KeyError:
inmap[transform.output] = [transform]
assert transform.name is not None
self.transforms[transform.name] = transform
def _unmap_transform(self, transform, *inputs):
def _unmap_transform(self, transform: Transform, *inputs: str) -> None:
"""unmap transform from internal structures"""
if not inputs:
inputs = transform.inputs
inputs = transform.inputs # type: ignore[assignment]
for mt in inputs:
main, sub = split_mimetype(mt)
if sub == "*":
......@@ -117,9 +113,12 @@ class TransformEngine(object):
else:
inmap = self._mtmap[mt]
inmap[transform.output].remove(transform)
assert transform.name is not None
del self.transforms[transform.name]
def find_path(self, orig, target, required_transforms=()):
def find_path(
self, orig: str, target: str, required_transforms: List[str] = []
) -> Optional[TransformsChain]:
"""return the shortest path for transformation from orig mimetype to
target mimetype
"""
......@@ -136,11 +135,15 @@ class TransformEngine(object):
shortest = len(path)
return winner
def _get_paths(self, orig, target, requirements, path=None, result=None):
def _get_paths( # type: ignore[no-untyped-def]
self, orig: str, target: str, requirements: List[str], path=None, result=None
) -> List[TransformsChain]:
"""return a all path for transformation from orig mimetype to
target mimetype
"""
if path is None:
if (
path is None
): # FIXME: why not empty list as default value for path & result?
result = []
path = []
requirements = list(requirements)
......@@ -157,7 +160,14 @@ class TransformEngine(object):
# we are done
return result
def _search_outputs(self, outputs, target, requirements, path, result):
def _search_outputs(
self,
outputs: Dict[str, List[Transform]],
target: str,
requirements: List[str],
path: List[Optional[Transform]],
result: List[List[Optional[Transform]]],
) -> None:
path.append(None)
for outputmimetype, transforms in outputs.items():
for transform in transforms:
......@@ -172,9 +182,11 @@ class TransformEngine(object):
path[-1] = transform
if outputmimetype == target:
if not requirements:
assert result is not None
result.append(path[:])
else:
self._get_paths(outputmimetype, target, requirements, path, result)
if required:
assert name is not None
requirements.append(name)
path.pop()
......@@ -17,7 +17,12 @@
# with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>.
"""base transformation objects"""
__docformat__ = "restructuredtext en"
from typing import Sequence, Optional
from typing import Any
from logilab.mtconverter import TransformData
from typing import Tuple
__docformat__: str = "restructuredtext en"
class Transform:
......@@ -25,18 +30,18 @@ class Transform:
into another MIME type
"""
name = None
inputs = ()
output = None
input_encoding = None
output_encoding = None
name: Optional[str] = None
inputs: Sequence[str] = []
output: Optional[str] = None
input_encoding: Optional[str] = None
output_encoding: Optional[str] = None
def __init__(self, **kwargs):
def __init__(self, **kwargs: Any) -> None:
self.__dict__.update(kwargs)
if not getattr(self, "name", None):
self.name = self.__class__.__name__
self.name: str = self.__class__.__name__
def convert(self, trdata):
def convert(self, trdata: TransformData) -> TransformData:
"""convert the given data structure into transform output's mime type
:param trdata: `TransformData`
......@@ -44,56 +49,57 @@ class Transform:
"""
# this is not true when transform accept wildcard
# assert trdata.mimetype in self.inputs
assert self.output is not None
trdata.data = self._convert(trdata)
trdata.mimetype = self.output
if self.output_encoding:
trdata.encoding = self.output_encoding
return trdata
def _convert(self, trdata):
def _convert(self, trdata: TransformData) -> NotImplemented:
raise NotImplementedError
class TransformsChain(list):
"""A chain of transforms used to transform data"""
inputs = ("application/octet-stream",)
output = "application/octet-stream"
name = None
inputs: Tuple[str, ...] = ("application/octet-stream",)
output: str = "application/octet-stream"
name: Optional[str] = None
def __init__(self, name=None, *args):
def __init__(self, name: str = None, *args: Any) -> None:
list.__init__(self, *args)
if name is not None:
self.name = name
if args:
self._update()
def convert(self, trdata):
def convert(self, trdata: TransformData) -> TransformData:
for transform in self:
trdata = transform.convert(trdata)
return trdata
def __setitem__(self, key, value):
def __setitem__(self, key, value) -> None: # type: ignore[no-untyped-def] # use Protocol?
list.__setitem__(self, key, value)
self._update()
def append(self, value):
def append(self, value) -> None: # type: ignore[no-untyped-def] # FIXME: is value a Transform?
list.append(self, value)
self._update()
def insert(self, *args):
def insert(self, *args) -> None: # type: ignore[no-untyped-def] # FIXME: function still used?
list.insert(*args)
self._update()
def remove(self, *args):
def remove(self, *args) -> None: # type: ignore[no-untyped-def] # FIXME: function still used?
list.remove(*args)
self._update()
def pop(self, *args):
def pop(self, *args) -> None: # type: ignore[no-untyped-def] # FIXME: function still used?
list.pop(*args)
self._update()
def _update(self):
def _update(self) -> None:
self.inputs = self[0].inputs
self.output = self[-1].output
for i in range(len(self)):
......
......@@ -30,31 +30,35 @@
"""some basic transformations (pure python)
"""
__docformat__ = "restructuredtext en"
import re
from typing import Tuple
from logilab.mtconverter import xml_escape
from logilab.mtconverter.transform import Transform
from logilab.mtconverter import TransformData
__docformat__: str = "restructuredtext en"
class IdentityTransform(Transform):
"""identity transform: leave the content unchanged"""
def _convert(self, trdata):
def _convert(self, trdata: TransformData) -> str:
return trdata.data
class text_to_text(IdentityTransform):
inputs = ("text/*",)
output = "text/plain"
inputs: Tuple[str, ...] = ("text/*",)
output: str = "text/plain"
class rest_to_text(Transform):
inputs = ("text/rest", "text/x-rst")
output = "text/plain"
inputs: Tuple[str, ...] = ("text/rest", "text/x-rst")
output: str = "text/plain"
def _convert(self, trdata):
def _convert(self, trdata: TransformData) -> str:
res = []
for line in trdata.data.splitlines():
sline = line.lstrip()
......@@ -68,18 +72,18 @@ _TAG_PROG = re.compile(r"</?.*?>", re.U)
class xml_to_text(Transform):
inputs = ("application/xml",)
output = "text/plain"
inputs: Tuple[str, ...] = ("application/xml",)
output: str = "text/plain"
def _convert(self, trdata):
def _convert(self, trdata: TransformData) -> str:
return _TAG_PROG.sub(" ", trdata.data)
class text_to_html(Transform):
inputs = ("text/plain",)
output = "text/html"
inputs: Tuple[str, ...] = ("text/plain",)
output: str = "text/html"
def _convert(self, trdata):
def _convert(self, trdata: TransformData) -> str:
res = ["<p>"]
for line in trdata.data.splitlines():
line = line.strip()
......@@ -96,10 +100,10 @@ class text_to_html_pre(Transform):
"""variant for text 2 html transformation : simply wrap text into pre tags
"""
inputs = ("text/plain",)
output = "text/html"
inputs: Tuple[str, ...] = ("text/plain",)
output: str = "text/html"
def _convert(self, trdata):
def _convert(self, trdata: TransformData) -> str:
res = ["<pre>"]
res.append(xml_escape(trdata.data))
res.append("</pre>")
......@@ -107,8 +111,8 @@ class text_to_html_pre(Transform):
class xlog_to_html(Transform):
inputs = ("text/x-log",)
output = "text/html"
inputs: Tuple[str, ...] = ("text/x-log",)
output: str = "text/html"
def _convert(self, trdata):