Source code for domdf_python_tools.utils

#!/usr/bin/env python
# cython: language_level=3
#
#  utils.py
"""
General utility functions.

.. versionchanged:: 1.0.0

	* Removed ``tuple2str`` and ``list2string``.
	  Use :func:`domdf_python_tools.utils.list2str` instead.
	* Removed ``as_text`` and ``word_join``.
	  Import from :mod:`domdf_python_tools.words` instead.
	* Removed ``splitLen``.
	  Use :func:`domdf_python_tools.iterative.split_len` instead.

.. versionchanged:: 2.0.0

	:func:`~domdf_python_tools.iterative.chunks`,
	:func:`~domdf_python_tools.iterative.permutations`,
	:func:`~domdf_python_tools.iterative.split_len`,
	:func:`~domdf_python_tools.iterative.Len`, and
	:func:`~domdf_python_tools.iterative.double_chain`
	moved to :func:`domdf_python_tools.iterative`.

.. versionchanged:: 2.3.0

	Removed :func:`domdf_python_tools.utils.deprecated`.
	Use the new `deprecation-alias <https://pypi.org/project/deprecation-alias/>`_ package instead.

"""
#
#  Copyright © 2018-2022 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#
#  as_text from https://stackoverflow.com/a/40935194
# 		Copyright © 2016 User3759685
# 		Available under the MIT License
#
#  strtobool based on the "distutils" module from CPython.
#  Some docstrings based on the Python documentation.
#  Licensed under the Python Software Foundation License Version 2.
#  Copyright © 2001-2020 Python Software Foundation. All rights reserved.
#  Copyright © 2000 BeOpen.com. All rights reserved.
#  Copyright © 1995-2000 Corporation for National Research Initiatives. All rights reserved.
#  Copyright © 1991-1995 Stichting Mathematisch Centrum. All rights reserved.
#

# stdlib
import contextlib
import inspect
import json
import re
import sys
from io import StringIO
from math import log10
from pprint import pformat
from types import MethodType
from typing import (
		IO,
		TYPE_CHECKING,
		Any,
		Callable,
		Dict,
		Iterable,
		Iterator,
		List,
		Optional,
		Pattern,
		Set,
		Tuple,
		TypeVar,
		Union,
		overload
		)

# this package
import domdf_python_tools.words
from domdf_python_tools.typing import HasHead, String, SupportsLessThan

if TYPE_CHECKING or domdf_python_tools.__docs:  # pragma: no cover
	# 3rd party
	from pandas import DataFrame, Series

	Series.__module__ = "pandas"
	DataFrame.__module__ = "pandas"

_T = TypeVar("_T")

SupportsLessThanT = TypeVar("SupportsLessThanT", bound=SupportsLessThan)

__all__ = [
		"pyversion",
		"SPACE_PLACEHOLDER",
		"cmp",
		"list2str",
		"printr",
		"printt",
		"stderr_writer",
		"printe",
		"str2tuple",
		"strtobool",
		"enquote_value",
		"posargs2kwargs",
		"convert_indents",
		"etc",
		"head",
		"magnitude",
		"trim_precision",
		"double_repr_string",
		"redirect_output",
		"divide",
		"redivide",
		"unique_sorted",
		"replace_nonprinting",
		]

#: The current major python version.
pyversion: int = int(sys.version_info.major)  # Python Version

#: The ``␣`` character.
SPACE_PLACEHOLDER = '␣'


[docs]def cmp(x, y) -> int: """ Implementation of ``cmp`` for Python 3. Compare the two objects x and y and return an integer according to the outcome. The return value is negative if ``x < y``, zero if ``x == y`` and strictly positive if ``x > y``. """ return int((x > y) - (x < y))
[docs]def list2str(the_list: Iterable[Any], sep: str = ',') -> str: """ Convert an iterable, such as a list, to a comma separated string. :param the_list: The iterable to convert to a string. :param sep: Separator to use for the string. :return: Comma separated string """ return sep.join([str(x) for x in the_list])
[docs]def printr( obj: Any, *values: object, sep: Optional[str] = ' ', end: Optional[str] = '\n', file: Optional[IO] = None, flush: bool = False, ) -> None: r""" Print the :func:`repr` of an object. If no objects are given, :func:`~.printr` will just write ``end``. :param obj: :param \*values: Additional values to print. These are printed verbatim. :param sep: The separator between values. :param end: The final value to print. Setting to ``''`` will leave the insertion point at the end of the printed text. :param file: The file to write to. If not present or :py:obj:`None`, :py:obj:`sys.stdout` will be used. :no-default file: :param flush: If :py:obj:`True` the stream is forcibly flushed after printing. """ print(repr(obj), *values, sep=sep, end=end, file=file, flush=flush)
[docs]def printt( obj: Any, *values: object, sep: Optional[str] = ' ', end: Optional[str] = '\n', file: Optional[IO] = None, flush: bool = False, ) -> None: r""" Print the type of an object. If no objects are given, :func:`~.printt` will just write ``end``. :param obj: :param \*values: Additional values to print. These are printed verbatim. :param sep: The separator between values. :param end: The final value to print. Setting to ``''`` will leave the insertion point at the end of the printed text. :param file: The file to write to. If not present or :py:obj:`None`, :py:obj:`sys.stdout` will be used. :no-default file: :param flush: If :py:obj:`True` the stream is forcibly flushed after printing. """ print(type(obj), *values, sep=sep, end=end, file=file, flush=flush)
[docs]def stderr_writer( *values: object, sep: Optional[str] = ' ', end: Optional[str] = '\n', ) -> None: r""" Print ``*values`` to :py:obj:`sys.stderr`, separated by ``sep`` and followed by ``end``. :py:obj:`sys.stdout` is flushed before printing, and :py:obj:`sys.stderr` is flushed afterwards. If no objects are given, :func:`~.stderr_writer` will just write ``end``. :param \*values: :param sep: The separator between values. :param end: The final value to print. Setting to ``''`` will leave the insertion point at the end of the printed text. :rtype: .. versionchanged:: 3.0.0 The only permitted keyword arguments are ``sep`` and ``end``. Previous versions allowed other keywords arguments supported by :func:`print` but they had no effect. """ sys.stdout.flush() print(*values, sep=sep, end=end, file=sys.stderr, flush=True) sys.stderr.flush()
#: Alias of :func:`~.stderr_writer` printe = stderr_writer
[docs]def str2tuple(input_string: str, sep: str = ',') -> Tuple[int, ...]: """ Convert a comma-separated string of integers into a tuple. .. latex:vspace:: -10px .. important:: The input string must represent a comma-separated series of integers. .. TODO:: Allow custom types, not just :class:`int` (making :class:`int` the default) .. latex:vspace:: -20px :param input_string: The string to be converted into a tuple :param sep: The separator in the string. """ return tuple(int(x) for x in input_string.split(sep))
[docs]def strtobool(val: Union[str, int]) -> bool: """ Convert a string representation of truth to :py:obj:`True` or :py:obj:`False`. If val is an integer then its boolean representation is returned. If val is a boolean it is returned as-is. :py:obj:`True` values are ``'y'``, ``'yes'``, ``'t'``, ``'true'``, ``'on'``, ``'1'``, and ``1``. :py:obj:`False` values are ``'n'``, ``'no'``, ``'f'``, ``'false'``, ``'off'``, ``'0'``, and ``0``. :raises: :py:exc:`ValueError` if ``val`` is anything else. """ if isinstance(val, int): return bool(val) val = val.lower() if val in {'y', "yes", 't', "true", "on", '1'}: return True elif val in {'n', "no", 'f', "false", "off", '0'}: return False else: raise ValueError(f"invalid truth value {val!r}")
[docs]def enquote_value(value: Any) -> Union[str, bool, float]: """ Adds single quotes (``'``) to the given value, suitable for use in a templating system such as Jinja2. :class:`Floats <float>`, :class:`integers <int>`, :class:`booleans <bool>`, :py:obj:`None`, and the strings ``'True'``, ``'False'`` and ``'None'`` are returned as-is. :param value: The value to enquote """ if value in {"True", "False", "None", True, False, None}: return value elif isinstance(value, (int, float)): return value elif isinstance(value, str): return repr(value) else: return f"'{value}'"
[docs]def posargs2kwargs( args: Iterable[Any], posarg_names: Union[Iterable[str], Callable], kwargs: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: """ Convert the positional args in ``args`` to kwargs, based on the relative order of ``args`` and ``posarg_names``. .. important:: Python 3.8's Positional-Only Parameters (:pep:`570`) are not supported. .. versionadded:: 0.4.10 :param args: List of positional arguments provided to a function. :param posarg_names: Either a list of positional argument names for the function, or the function object. :param kwargs: Optional mapping of keyword argument names to values. The arguments will be added to this dictionary if provided. :default kwargs: ``{}`` :return: Dictionary mapping argument names to values. .. versionchanged:: 2.8.0 The "self" argument for bound methods is ignored. For unbound methods (which are just functions) the behaviour is unchanged. """ if kwargs is None: kwargs = {} self_arg = None if isinstance(posarg_names, MethodType): self_arg, *posarg_names = inspect.getfullargspec(posarg_names).args elif callable(posarg_names): posarg_names = inspect.getfullargspec(posarg_names).args for name, arg_value in zip(posarg_names, args): if name in kwargs: if isinstance(posarg_names, MethodType): raise TypeError(f"{posarg_names.__name__}(): got multiple values for argument '{name}'") else: raise TypeError(f"got multiple values for argument '{name}'") kwargs.update(zip(posarg_names, args)) if self_arg is not None and self_arg in kwargs: del kwargs[self_arg] # TODO: positional only arguments return kwargs
[docs]def convert_indents(text: str, tab_width: int = 4, from_: str = '\t', to: str = ' ') -> str: r""" Convert indentation at the start of lines in ``text`` from tabs to spaces. :param text: The text to convert indents in. :param tab_width: The number of spaces per tab. :param from\_: The indent to convert from. :param to: The indent to convert to. """ output = [] tab = to * tab_width from_size = len(from_) for line in text.splitlines(): indent_count = 0 while line.startswith(from_): indent_count += 1 line = line[from_size:] output.append(f"{tab * indent_count}{line}") return '\n'.join(output)
class _Etcetera(str): __slots__ = () def __new__(cls): return str.__new__(cls, "...") def __repr__(self) -> str: return str(self) etc = _Etcetera() """ Object that provides an ellipsis string .. versionadded:: 0.8.0 """
[docs]def magnitude(x: float) -> int: """ Returns the magnitude of the given value. * For negative numbers the absolute magnitude is returned. * For decimal numbers below ``1`` the magnitude will be negative. .. versionadded:: 2.0.0 :param x: Numerical value to find the magnitude of. """ if x > 0.0: return int(log10(x)) elif x < 0.0: return int(log10(abs(x))) else: return 0
[docs]def trim_precision(value: float, precision: int = 4) -> float: """ Trim the precision of the given floating point value. For example, if you have the value `170.10000000000002` but really only care about it being ``\u2248 179.1``: .. code-block:: python >>> trim_precision(170.10000000000002, 2) 170.1 >>> type(trim_precision(170.10000000000002, 2)) <class 'float'> .. versionadded:: 2.0.0 :param value: :param precision: The number of decimal places to leave in the output. """ return float(format(value, f"0.{precision}f"))
[docs]def double_repr_string(string: str) -> str: """ Like :func:`repr(str) <repr>`, but tries to use double quotes instead. .. versionadded:: 2.5.0 :param string: """ # figure out which quote to use; double is preferred if '"' in string and "'" not in string: return repr(string) else: return json.dumps(string, ensure_ascii=False)
[docs]@contextlib.contextmanager def redirect_output(combine: bool = False) -> Iterator[Tuple[StringIO, StringIO]]: """ Context manager to redirect stdout and stderr to two :class:`io.StringIO` objects. These are assigned (as a :class:`tuple`) to the target the :keyword:`as` expression. Example: .. code-block:: python with redirect_output() as (stdout, stderr): ... .. versionadded:: 2.6.0 :param combine: If :py:obj:`True` ``stderr`` is combined with ``stdout``. """ if combine: stdout = stderr = StringIO() else: stdout = StringIO() stderr = StringIO() with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr): yield stdout, stderr
[docs]def divide(string: str, sep: str) -> Tuple[str, str]: """ Divide a string into two parts, about the given string. .. versionadded:: 2.7.0 :param string: :param sep: The separator to split at. """ if sep not in string: raise ValueError(f"{sep!r} not in {string!r}") parts = string.split(sep, 1) return tuple(parts) # type: ignore
[docs]def redivide(string: str, pat: Union[str, Pattern]) -> Tuple[str, str]: """ Divide a string into two parts, splitting on the given regular expression. .. versionadded:: 2.7.0 :param string: :param pat: :rtype: .. latex:clearpage:: """ if isinstance(pat, str): pat = re.compile(pat) if not pat.search(string): raise ValueError(f"{pat!r} has no matches in {string!r}") parts = pat.split(string, 1) return tuple(parts) # type: ignore
@overload def unique_sorted( elements: Iterable[SupportsLessThanT], *, key: None = ..., reverse: bool = ..., ) -> List[SupportsLessThanT]: ... @overload def unique_sorted( elements: Iterable[_T], *, key: Callable[[_T], SupportsLessThan], reverse: bool = ..., ) -> List[_T]: ...
[docs]def unique_sorted( elements: Iterable, *, key: Optional[Callable] = None, reverse: bool = False, ) -> List: """ Returns an ordered list of unique items from ``elements``. .. versionadded:: 3.0.0 :param elements: :param key: A function of one argument used to extract a comparison key from each item when sorting. For example, :meth:`key=str.lower <str.lower>`. The default value is :py:obj:`None`, which will compare the elements directly. :param reverse: If :py:obj:`True` the list elements are sorted as if each comparison were reversed. .. seealso:: :class:`set` and :func:`sorted` """ return sorted(set(elements), key=key, reverse=reverse)
[docs]def replace_nonprinting(string: str, exclude: Optional[Set[int]] = None) -> str: """ Replace nonprinting (control) characters in ``string`` with ``^`` and ``M-`` notation. .. versionadded:: 3.3.0 :param string: :param exclude: A set of codepoints to exclude. :rtype: .. seealso:: :wikipedia:`C0 and C1 control codes` on Wikipedia """ # https://stackoverflow.com/a/44952259 if exclude is None: exclude = set() translation_map = {} for codepoint in range(32): if codepoint not in exclude: translation_map[codepoint] = f"^{chr(64 + codepoint)}" if 127 not in exclude: translation_map[127] = "^?" for codepoint in range(128, 256): if codepoint not in exclude: translation_map[codepoint] = f"M+{chr(codepoint-64)}" return string.translate(translation_map)