Source code for cardinal_pythonlib.rnc_text

#!/usr/bin/env python
# cardinal_pythonlib/rnc_text.py

"""
===============================================================================

    Original code copyright (C) 2009-2022 Rudolf Cardinal (rudolf@pobox.com).

    This file is part of cardinal_pythonlib.

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        https://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.

===============================================================================

**Low-quality functions relating to textfile results storage/analysis.**

"""

import csv
import datetime
import logging
from typing import Any, Dict, Iterable, List, Optional, Sequence, TextIO, Tuple

from cardinal_pythonlib.logs import BraceStyleAdapter

log = BraceStyleAdapter(logging.getLogger(__name__))


[docs]def produce_csv_output( filehandle: TextIO, fields: Sequence[str], values: Iterable[str] ) -> None: """ Produce CSV output, without using ``csv.writer``, so the log can be used for lots of things. - ... eh? What was I talking about? - POOR; DEPRECATED. Args: filehandle: file to write to fields: field names values: values """ output_csv(filehandle, fields) for row in values: output_csv(filehandle, row)
[docs]def output_csv(filehandle: TextIO, values: Iterable[str]) -> None: """ Write a line of CSV. POOR; does not escape things properly. DEPRECATED. Args: filehandle: file to write to values: values """ line = ",".join(values) filehandle.write(line + "\n")
[docs]def get_what_follows_raw( s: str, prefix: str, onlyatstart: bool = True, stripwhitespace: bool = True ) -> Tuple[bool, str]: """ Find the part of ``s`` that is after ``prefix``. Args: s: string to analyse prefix: prefix to find onlyatstart: only accept the prefix if it is right at the start of ``s`` stripwhitespace: remove whitespace from the result Returns: tuple: ``(found, result)`` """ prefixstart = s.find(prefix) if (prefixstart == 0 and onlyatstart) or ( prefixstart != -1 and not onlyatstart ): # substring found resultstart = prefixstart + len(prefix) result = s[resultstart:] if stripwhitespace: result = result.strip() return True, result return False, ""
[docs]def get_what_follows( strings: Sequence[str], prefix: str, onlyatstart: bool = True, stripwhitespace: bool = True, precedingline: str = "", ) -> str: """ Find a string in ``strings`` that begins with ``prefix``; return the part that's after ``prefix``. Optionally, require that the preceding string (line) is ``precedingline``. Args: strings: strings to analyse prefix: prefix to find onlyatstart: only accept the prefix if it is right at the start of ``s`` stripwhitespace: remove whitespace from the result precedingline: if truthy, require that the preceding line be as specified here Returns: the line fragment """ if not precedingline: for s in strings: (found, result) = get_what_follows_raw( s, prefix, onlyatstart, stripwhitespace ) if found: return result return "" else: for i in range(1, len(strings)): # i indexes the second of a pair if strings[i - 1].find(precedingline) == 0: # ... if found at the start (found, result) = get_what_follows_raw( strings[i], prefix, onlyatstart, stripwhitespace ) if found: return result return ""
[docs]def get_string( strings: Sequence[str], prefix: str, ignoreleadingcolon: bool = False, precedingline: str = "", ) -> Optional[str]: """ Find a string as per :func:`get_what_follows`. Args: strings: see :func:`get_what_follows` prefix: see :func:`get_what_follows` ignoreleadingcolon: if ``True``, restrict the result to what comes after its first colon (and whitespace-strip that) precedingline: see :func:`get_what_follows` Returns: the line fragment """ s = get_what_follows(strings, prefix, precedingline=precedingline) if ignoreleadingcolon: f = s.find(":") if f != -1: s = s[f + 1 :].strip() if len(s) == 0: return None return s
[docs]def get_string_relative( strings: Sequence[str], prefix1: str, delta: int, prefix2: str, ignoreleadingcolon: bool = False, stripwhitespace: bool = True, ) -> Optional[str]: """ Finds a line (string) in ``strings`` beginning with ``prefix1``. Moves ``delta`` lines (strings) further. Returns the end of the line that begins with ``prefix2``, if found. Args: strings: as above prefix1: as above delta: as above prefix2: as above ignoreleadingcolon: restrict the result to the part after its first colon? stripwhitespace: strip whitespace from the start/end of the result? Returns: the line fragment """ for firstline in range(0, len(strings)): if strings[firstline].find(prefix1) == 0: # if found... secondline = firstline + delta if secondline < 0 or secondline >= len(strings): continue if strings[secondline].find(prefix2) == 0: s = strings[secondline][len(prefix2) :] if stripwhitespace: s = s.strip() if ignoreleadingcolon: f = s.find(":") if f != -1: s = s[f + 1 :].strip() if stripwhitespace: s = s.strip() if len(s) == 0: return None return s return None
[docs]def get_int( strings: Sequence[str], prefix: str, ignoreleadingcolon: bool = False, precedingline: str = "", ) -> Optional[int]: """ Fetches an integer parameter via :func:`get_string`. """ return get_int_raw( get_string( strings, prefix, ignoreleadingcolon=ignoreleadingcolon, precedingline=precedingline, ) )
[docs]def get_float( strings: Sequence[str], prefix: str, ignoreleadingcolon: bool = False, precedingline: str = "", ) -> Optional[float]: """ Fetches a float parameter via :func:`get_string`. """ return get_float_raw( get_string( strings, prefix, ignoreleadingcolon=ignoreleadingcolon, precedingline=precedingline, ) )
[docs]def get_int_raw(s: str) -> Optional[int]: """ Converts its input to an int. Args: s: string Returns: ``int(s)``, or ``None`` if ``s`` is ``None`` Raises: ValueError: if it's a bad string """ if s is None: return None return int(s)
[docs]def get_bool_raw(s: str) -> Optional[bool]: """ Maps ``"Y"``, ``"y"`` to ``True`` and ``"N"``, ``"n"`` to ``False``. """ if s == "Y" or s == "y": return True elif s == "N" or s == "n": return False return None
[docs]def get_float_raw(s: str) -> Optional[float]: """ Converts its input to a float. Args: s: string Returns: ``int(s)``, or ``None`` if ``s`` is ``None`` Raises: ValueError: if it's a bad string """ if s is None: return None return float(s)
[docs]def get_bool( strings: Sequence[str], prefix: str, ignoreleadingcolon: bool = False, precedingline: str = "", ) -> Optional[bool]: """ Fetches a boolean parameter via :func:`get_string`. """ return get_bool_raw( get_string( strings, prefix, ignoreleadingcolon=ignoreleadingcolon, precedingline=precedingline, ) )
[docs]def get_bool_relative( strings: Sequence[str], prefix1: str, delta: int, prefix2: str, ignoreleadingcolon: bool = False, ) -> Optional[bool]: """ Fetches a boolean parameter via :func:`get_string_relative`. """ return get_bool_raw( get_string_relative( strings, prefix1, delta, prefix2, ignoreleadingcolon=ignoreleadingcolon, ) )
[docs]def get_float_relative( strings: Sequence[str], prefix1: str, delta: int, prefix2: str, ignoreleadingcolon: bool = False, ) -> Optional[float]: """ Fetches a float parameter via :func:`get_string_relative`. """ return get_float_raw( get_string_relative( strings, prefix1, delta, prefix2, ignoreleadingcolon=ignoreleadingcolon, ) )
[docs]def get_int_relative( strings: Sequence[str], prefix1: str, delta: int, prefix2: str, ignoreleadingcolon: bool = False, ) -> Optional[int]: """ Fetches an int parameter via :func:`get_string_relative`. """ return get_int_raw( get_string_relative( strings, prefix1, delta, prefix2, ignoreleadingcolon=ignoreleadingcolon, ) )
[docs]def get_datetime( strings: Sequence[str], prefix: str, datetime_format_string: str, ignoreleadingcolon: bool = False, precedingline: str = "", ) -> Optional[datetime.datetime]: """ Fetches a ``datetime.datetime`` parameter via :func:`get_string`. """ x = get_string( strings, prefix, ignoreleadingcolon=ignoreleadingcolon, precedingline=precedingline, ) if len(x) == 0: return None # For the format strings you can pass to datetime.datetime.strptime, see # http://docs.python.org/library/datetime.html # A typical one is "%d-%b-%Y (%H:%M:%S)" d = datetime.datetime.strptime(x, datetime_format_string) return d
[docs]def find_line_beginning( strings: Sequence[str], linestart: Optional[str] ) -> int: """ Finds the index of the line in ``strings`` that begins with ``linestart``, or ``-1`` if none is found. If ``linestart is None``, match an empty line. """ if linestart is None: # match an empty line for i in range(len(strings)): if is_empty_string(strings[i]): return i return -1 for i in range(len(strings)): if strings[i].find(linestart) == 0: return i return -1
[docs]def find_line_containing(strings: Sequence[str], contents: str) -> int: """ Finds the index of the line in ``strings`` that contains ``contents``, or ``-1`` if none is found. """ for i in range(len(strings)): if strings[i].find(contents) != -1: return i return -1
[docs]def get_lines_from_to( strings: List[str], firstlinestart: str, list_of_lastline_starts: Iterable[Optional[str]], ) -> List[str]: """ Takes a list of ``strings``. Returns a list of strings FROM ``firstlinestart`` (inclusive) TO the first of ``list_of_lastline_starts`` (exclusive). To search to the end of the list, use ``list_of_lastline_starts = []``. To search to a blank line, use ``list_of_lastline_starts = [None]`` """ start_index = find_line_beginning(strings, firstlinestart) # log.debug("start_index: {}", start_index) if start_index == -1: return [] end_offset = None # itself a valid slice index for lls in list_of_lastline_starts: possible_end_offset = find_line_beginning(strings[start_index:], lls) # log.debug("lls {!r} -> possible_end_offset {}", # lls, possible_end_offset) if possible_end_offset != -1: # found one if end_offset is None or possible_end_offset < end_offset: end_offset = possible_end_offset end_index = None if end_offset is None else (start_index + end_offset) # log.debug("end_index: {}", end_index) return strings[start_index:end_index]
[docs]def is_empty_string(s: str) -> bool: """ Is the string empty (ignoring whitespace)? """ return len(s.strip()) == 0
[docs]def csv_to_list_of_fields( lines: List[str], csvheader: str, quotechar: str = '"' ) -> List[List[str]]: """ Extracts data from a list of CSV lines (starting with a defined header line) embedded in a longer text block but ending with a blank line. Used for processing e.g. MonkeyCantab rescue text output. Args: lines: CSV lines csvheader: CSV header line quotechar: ``quotechar`` parameter passed to :func:`csv.reader` Returns: list (by row) of lists (by value); see example Test code: .. code-block:: python import logging from cardinal_pythonlib.rnc_text import * logging.basicConfig(level=logging.DEBUG) myheader = "field1,field2,field3" mycsvlines = [ "irrelevant line", myheader, # header: START "row1value1,row1value2,row1value3", "row2value1,row2value2,row2value3", "", # terminating blank line: END "other irrelevant line", ] csv_to_list_of_fields(mycsvlines, myheader) # [['row1value1', 'row1value2', 'row1value3'], ['row2value1', 'row2value2', 'row2value3']] """ # noqa data = [] # type: List[List[str]] # an empty line marks the end of the block csvlines = get_lines_from_to(lines, csvheader, [None])[1:] # ... remove the CSV header reader = csv.reader(csvlines, quotechar=quotechar) for fields in reader: data.append(fields) return data
[docs]def csv_to_list_of_dicts( lines: List[str], csvheader: str, quotechar: str = '"' ) -> List[Dict[str, str]]: """ Extracts data from a list of CSV lines (starting with a defined header line) embedded in a longer text block but ending with a blank line. Args: lines: CSV lines csvheader: CSV header line quotechar: ``quotechar`` parameter passed to :func:`csv.reader` Returns: list of dictionaries mapping fieldnames (from the header) to values """ data = [] # type: List[Dict[str, str]] # an empty line marks the end of the block csvlines = get_lines_from_to(lines, csvheader, [None])[1:] # ... remove the CSV header headerfields = csvheader.split(",") reader = csv.reader(csvlines, quotechar=quotechar) for fields in reader: row = {} # type: Dict[str, str] for f in range(len(headerfields)): row[headerfields[f]] = fields[f] data.append(row) return data
[docs]def dictlist_convert_to_string(dict_list: Iterable[Dict], key: str) -> None: """ Process an iterable of dictionaries. For each dictionary ``d``, convert (in place) ``d[key]`` to a string form, ``str(d[key])``. If the result is a blank string, convert it to ``None``. """ for d in dict_list: d[key] = str(d[key]) if d[key] == "": d[key] = None
[docs]def dictlist_convert_to_datetime( dict_list: Iterable[Dict], key: str, datetime_format_string: str ) -> None: """ Process an iterable of dictionaries. For each dictionary ``d``, convert (in place) ``d[key]`` to a ``datetime.datetime`` form, using ``datetime_format_string`` as the format parameter to :func:`datetime.datetime.strptime`. """ for d in dict_list: d[key] = datetime.datetime.strptime(d[key], datetime_format_string)
[docs]def dictlist_convert_to_int(dict_list: Iterable[Dict], key: str) -> None: """ Process an iterable of dictionaries. For each dictionary ``d``, convert (in place) ``d[key]`` to an integer. If that fails, convert it to ``None``. """ for d in dict_list: try: d[key] = int(d[key]) except ValueError: d[key] = None
[docs]def dictlist_convert_to_float(dict_list: Iterable[Dict], key: str) -> None: """ Process an iterable of dictionaries. For each dictionary ``d``, convert (in place) ``d[key]`` to a float. If that fails, convert it to ``None``. """ for d in dict_list: try: d[key] = float(d[key]) except ValueError: d[key] = None
[docs]def dictlist_convert_to_bool(dict_list: Iterable[Dict], key: str) -> None: """ Process an iterable of dictionaries. For each dictionary ``d``, convert (in place) ``d[key]`` to a bool. If that fails, convert it to ``None``. """ for d in dict_list: # d[key] = True if d[key] == "Y" else False d[key] = 1 if d[key] == "Y" else 0
[docs]def dictlist_replace(dict_list: Iterable[Dict], key: str, value: Any) -> None: """ Process an iterable of dictionaries. For each dictionary ``d``, change (in place) ``d[key]`` to ``value``. """ for d in dict_list: d[key] = value
[docs]def dictlist_wipe_key(dict_list: Iterable[Dict], key: str) -> None: """ Process an iterable of dictionaries. For each dictionary ``d``, delete ``d[key]`` if it exists. """ for d in dict_list: d.pop(key, None)