#!/usr/bin/env python
# cardinal_pythonlib/tsv.py
"""
===============================================================================
Original code copyright (C) 2009-2022 Rudolf Cardinal (rudolf@pobox.com).
This file is part of cardinal_pythonlib.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
===============================================================================
**Trivial functions to make tab-separated value (TSV) files.**
"""
from typing import Any, Dict, List
from cardinal_pythonlib.lists import chunks
from cardinal_pythonlib.logs import get_brace_style_log_with_null_handler
from cardinal_pythonlib.text import unescape_tabs_newlines
log = get_brace_style_log_with_null_handler(__name__)
[docs]def tsv_escape(x: Any) -> str:
"""
Escape data for tab-separated value (TSV) format.
"""
if x is None:
return ""
x = str(x)
return x.replace("\t", "\\t").replace("\n", "\\n")
[docs]def make_tsv_row(values: List[Any]) -> str:
"""
From a list of values, make a TSV line.
"""
return "\t".join([tsv_escape(x) for x in values]) + "\n"
[docs]def dictlist_to_tsv(dictlist: List[Dict[str, Any]]) -> str:
"""
From a consistent list of dictionaries mapping fieldnames to values,
make a TSV file.
"""
if not dictlist:
return ""
fieldnames = dictlist[0].keys()
tsv = "\t".join([tsv_escape(f) for f in fieldnames]) + "\n"
for d in dictlist:
tsv += "\t".join([tsv_escape(v) for v in d.values()]) + "\n"
return tsv
[docs]def tsv_pairs_to_dict(line: str, key_lower: bool = True) -> Dict[str, str]:
r"""
Converts a TSV line into sequential key/value pairs as a dictionary.
For example,
.. code-block:: none
field1\tvalue1\tfield2\tvalue2
becomes
.. code-block:: none
{"field1": "value1", "field2": "value2"}
Args:
line: the line
key_lower: should the keys be forced to lower case?
Sometimes we get lines that end in a tab. This is valid. Check with these:
.. code-block:: python
import logging
from cardinal_pythonlib.tsv import tsv_pairs_to_dict
logging.basicConfig(level=logging.DEBUG)
print(tsv_pairs_to_dict("a\t1\tb\t2\tc\t3")) # OK
print(tsv_pairs_to_dict("a\t1\tb\t2\tc\t")) # OK
print(tsv_pairs_to_dict("a\t1\tb\t2\tc")) # not OK; orphan 'c'
print(tsv_pairs_to_dict("a\t1\tb\t2\tc\t\n")) # OK
Beware using :func:`rstrip` prior to a call to this function, because that
will also strip trailing tabs.
"""
items = line.split("\t")
d = {} # type: Dict[str, str]
for chunk in chunks(items, 2):
if len(chunk) < 2:
log.warning("Bad chunk, not of length 2: {!r}", chunk)
continue
key = chunk[0]
value = unescape_tabs_newlines(chunk[1])
if key_lower:
key = key.lower()
d[key] = value
return d