#!/usr/bin/env python
# cardinal_pythonlib/spreadsheets.py
"""
===============================================================================
Original code copyright (C) 2009-2022 Rudolf Cardinal (rudolf@pobox.com).
This file is part of cardinal_pythonlib.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
===============================================================================
**Manipulate spreadsheets.**
Note:
- openpyxl is dreadfully slow. Its results are picklable, but not sensibly so
(e.g. generating a >500Mb picklefile from a 12Mb spreadsheet.
- xlrd is much faster, but we can't pickle its results.
"""
import datetime
import decimal
from decimal import Decimal
import logging
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
from cardinal_pythonlib.datetimefunc import (
coerce_to_pendulum,
pendulum_to_datetime_stripping_tz,
)
from cardinal_pythonlib.progress import ActivityCounter
from cardinal_pythonlib.reprfunc import simple_repr
try:
# noinspection PyPackageRequirements
import xlrd
# noinspection PyPackageRequirements
from xlrd import Book
# noinspection PyPackageRequirements
from xlrd.sheet import Cell, Sheet
except ImportError:
raise ImportError("You must install the 'xlrd' package.")
log = logging.getLogger(__name__)
# =============================================================================
# Consistency checks
# =============================================================================
[docs]def all_same(items: Iterable[Any]) -> bool:
"""
Are all the items the same?
https://stackoverflow.com/questions/3787908/python-determine-if-all-items-of-a-list-are-the-same-item
... though we will also allow "no items" to pass the test.
"""
return len(set(items)) <= 1
[docs]def values_by_attr(items: Sequence[Any], attr: str) -> List[Any]:
"""
Returns the values of a given attribute for each of the ``items``.
Args:
items:
Items to check
attr:
Name of attribute whose value should be taken across items.
"""
return [getattr(item, attr) for item in items]
[docs]def attr_all_same(items: Sequence[Any], attr: str) -> bool:
"""
Returns whether the value of an attribute is the same across a collection
of items.
Args:
items:
Items to check
attr:
Name of attribute whose value should be compared across items.
"""
return all_same(values_by_attr(items, attr))
[docs]def check_attr_all_same(
items: Sequence[Any],
attr: str,
id_attr: str = None,
fail_if_different: bool = True,
ignore_none: bool = False,
) -> None:
"""
Checks if the value of an attribute is the same across a collection of
items, and takes some action if not.
Args:
items:
Items to check
attr:
Name of attribute whose value should be compared across items.
id_attr:
If the attributes are not all the same, use the value of this
attribute from the first item to give some identifying context
to the failure message.
fail_if_different:
If true, raises :exc:`ValueError` on failure; otherwise, prints a
warning to the log.
ignore_none:
Ignore ``None`` values?
"""
values = values_by_attr(items, attr)
if ignore_none:
values = [v for v in values if v is not None]
if all_same(values):
return
# The rest of this function is about producing an error or a warning.
first_item = items[0]
if id_attr:
identity = f"For {id_attr}={getattr(first_item, id_attr)!r}, "
else:
identity = ""
msg = f"{identity}attribute {attr} is inconsistent: {values}"
if fail_if_different:
raise ValueError(msg)
else:
log.warning(msg)
[docs]def require_attr_all_same(
items: Sequence[Any], attr: str, id_attr: str, ignore_none: bool = False
) -> None:
"""
Raise if the ``attr`` attribute of each item in ``items`` is not the same.
See :func:`check_attr_all_same`.
"""
check_attr_all_same(
items, attr, id_attr, fail_if_different=True, ignore_none=ignore_none
)
[docs]def prefer_attr_all_same(
items: Sequence[Any], attr: str, id_attr: str, ignore_none: bool = False
) -> None:
"""
Warn if the ``attr`` attribute of each item in ``items`` is not the same.
See :func:`check_attr_all_same`.
"""
check_attr_all_same(
items, attr, id_attr, fail_if_different=False, ignore_none=ignore_none
)
# =============================================================================
# Spreadsheet operations: xlrd
# =============================================================================
[docs]def load_workbook(spreadsheet_filename: str) -> Book:
"""
Load a workbook.
"""
# Pickling creates massive files; skip it
log.info(f"Loading: {spreadsheet_filename!r}...")
book = xlrd.open_workbook(
filename=spreadsheet_filename,
on_demand=True, # may affect rather little, but we can try
)
log.info("... done")
return book
[docs]def read_value_row(row: Sequence[Cell], colnum: int) -> Any:
"""
Retrieves a value from a cell of a spreadsheet, given a row.
AVOID: slower than index access (see :class:`SheetHolder`,
:class:`RowHolder`).
"""
return row[colnum].value
[docs]def read_int_row(row: Sequence[Cell], colnum: int) -> Optional[int]:
"""
Reads an integer from a spreadsheet, given a row.
AVOID: slower than index access (see :class:`SheetHolder`,
:class:`RowHolder`).
"""
v = read_value_row(row, colnum)
if v is None:
return None
return int(v)
# =============================================================================
# Helper functions
# =============================================================================
[docs]def none_or_blank_string(x: Any) -> bool:
"""
Is ``x`` either ``None`` or a string that is empty or contains nothing but
whitespace?
"""
if x is None:
return True
elif isinstance(x, str) and not x.strip():
return True
else:
return False
[docs]def column_lettering(colnum: int) -> str:
"""
Converts a zero-based column index into a spreadsheet-style column name
(A[0] to Z[25], then AA[26] to AZ[51], etc). Basically, it's almost base
26, but without a proper sense of zero (in that A is zero, but AA is 26).
"""
assert colnum >= 0
base = 26
zero_char = ord("A")
reversed_chars = ""
while True:
big, small = divmod(colnum, base)
reversed_chars += chr(zero_char + small)
if big == 0:
break
colnum = big - 1
return reversed_chars[::-1] # reverse again to get the final answer
[docs]def colnum_zb_from_alphacol(alphacol: str) -> int:
"""
Reverses :func:`column_lettering`, generating a zero-based column index
from an alphabetical name (A to Z, AA to AZ, etc.).
"""
base = 26
zero_char = ord("A")
total = 0
reversed_chars = alphacol[::-1]
for pos, char in enumerate(reversed_chars):
digit_value = ord(char) - zero_char # e.g. 0 for A, 25 for Z
assert 0 <= digit_value < base
if pos > 0:
digit_value += 1
total += digit_value * pow(base, pos)
return total
# =============================================================================
# SheetHolder
# =============================================================================
[docs]class SheetHolder(object):
"""
Class to read from an Excel spreadsheet.
"""
SHEET_NAME = "" # may be overridden
HEADER_ROW_ZERO_BASED = 0 # 0 is the first row (usually a header row)
FIRST_DATA_ROW_ZERO_BASED = 1
NULL_VALUES = [None, ""]
BOOL_TRUE_VALUES_LOWERCASE = [1, "t", "true", "y", "yes"]
BOOL_FALSE_VALUES_LOWERCASE = [0, "f", "false", "n", "no"]
BOOL_UNKNOWN_VALUES_LOWERCASE = [None, "", "?", "not known", "unknown"]
def __init__(
self,
book: Book = None,
sheet_name: str = None,
sheet_index: int = None,
sheet: Sheet = None,
header_row_zero_based: int = None,
first_data_row_zero_based: int = None,
null_values: List[Any] = None,
bool_true_values_lowercase: List[Any] = None,
bool_false_values_lowercase: List[Any] = None,
bool_unknown_values_lowercase: List[Any] = None,
debug_max_rows_per_sheet: int = None,
) -> None:
"""
There are two ways to specify the sheet:
1. Provide a workbook via ``book`` and...
(a) a sheet number, or
(b) a sheet name.
2. Provide a worksheet directly via ``sheet``.
You can specify the following as ``_init__`` parameters or (via their
capitalized versions) by subclassing:
- sheet_name
- header_row_zero_based
- null_values
- bool_true_values_lowercase
- bool_false_values_lowercase
- bool_unknown_values_lowercase
Initialization parameters take priority over subclassed values.
Args:
book:
Workbook, from which a worksheet should be selected.
sheet_name:
Name of a sheet to select from within ``book``.
sheet_index:
Index (zero-based) of a sheet to select from within ``book``.
sheet:
Worksheet, provided directly.
header_row_zero_based:
Row number (zero-based) of the header row.
first_data_row_zero_based:
Row number (zero-based) of the first row containing data.
null_values:
Values to treat as null (blank) values, converted to Python
``None``.
bool_true_values_lowercase:
Values to treat, by default, as ``True`` in Boolean columns.
bool_false_values_lowercase:
Values to treat, by default, as ``False`` in Boolean columns.
bool_unknown_values_lowercase:
Values to treat, by default, as missing/unknown in Boolean
columns.
debug_max_rows_per_sheet:
Debugging option: the maximum number of data rows to
process.
"""
# Establish worksheet
if book:
assert sheet is None, (
f"You specified 'book', so must not specify 'sheet', but "
f"'sheet' was specified as: {sheet!r}"
)
if sheet_index is not None:
self.sheet = book.sheet_by_index(sheet_index)
self.sheet_description = (
f"book={book!r}, sheet_index={sheet_index!r}"
)
else:
sheet_name = sheet_name or self.SHEET_NAME
assert sheet_name, "Provide sheet_name or override SHEET_NAME"
self.sheet = book.sheet_by_name(sheet_name)
self.sheet_description = (
f"book={book!r}, sheet_name={sheet_name!r}"
)
else:
assert sheet is not None, (
"You didn't specify 'book', so must specify 'sheet', but it "
"was None"
)
self.sheet = sheet
self.sheet_description = f"sheet={sheet!r}"
# Other parameters
self.header_row_zero_based = (
header_row_zero_based
if header_row_zero_based is not None
else self.HEADER_ROW_ZERO_BASED
)
self.first_data_row_zero_based = (
first_data_row_zero_based
if first_data_row_zero_based is not None
else self.FIRST_DATA_ROW_ZERO_BASED
)
self.null_values = (
null_values if null_values is not None else self.NULL_VALUES
)
self.bool_true_values_lowercase = (
bool_true_values_lowercase
if bool_true_values_lowercase is not None
else self.BOOL_TRUE_VALUES_LOWERCASE
)
self.bool_false_values_lowercase = (
bool_false_values_lowercase
if bool_false_values_lowercase is not None
else self.BOOL_FALSE_VALUES_LOWERCASE
)
self.bool_unknown_values_lowercase = (
bool_unknown_values_lowercase
if bool_unknown_values_lowercase is not None
else self.BOOL_UNKNOWN_VALUES_LOWERCASE
)
self.debug_max_rows_per_sheet = debug_max_rows_per_sheet
# Establish workbook
self.book = self.sheet.book
# Cached information
self._headings = None # type: Optional[List[str]]
self._checked_headers = {} # type: Dict[int, str]
# -------------------------------------------------------------------------
# Information
# -------------------------------------------------------------------------
def __str__(self) -> str:
return (
f"<Sheet {self.sheet_name!r}, specifed as "
f"{self.sheet_description}>"
)
@property
def n_rows(self) -> int:
"""
Total number of rows.
"""
return self.sheet.nrows
@property
def n_data_rows(self) -> int:
"""
Total number of data rows (below any header row).
"""
return self.n_rows - self.first_data_row_zero_based
@property
def sheet_name(self) -> str:
"""
Name of the sheet within the workbook (file).
"""
return self.sheet.name
@property
def headers(self) -> List[str]:
"""
Returns all headings.
"""
if self._headings is None:
self._headings = [
str(cell.value)
for cell in self.sheet.row(self.header_row_zero_based)
]
return self._headings
@property
def headings(self) -> List[str]:
"""
Synonym for :data:`headers`.
"""
return self.headers
# -------------------------------------------------------------------------
# Validation
# -------------------------------------------------------------------------
def _locinfo(self, row: int, col: int) -> str:
"""
Location info for a cell, for errors.
Args:
row: zero-based row index
col: zero-based column index
"""
return (
f" [sheet_name={self.sheet_name!r}, "
f"row(1-based)={row+1}, "
f"column(1-based)={col+1} ({column_lettering(col)})]"
)
[docs] def ensure_heading(
self, col: int, heading: Union[str, Sequence[str]]
) -> None:
"""
Synonym for :meth:`ensure_header`.
"""
return self.ensure_header(col, heading)
# -------------------------------------------------------------------------
# Reading
# -------------------------------------------------------------------------
[docs] def read_value(
self,
row: int,
col: int,
check_header: Union[str, Sequence[str]] = None,
) -> Any:
"""
Retrieves a value from a cell of a spreadsheet.
Optionally, check that the heading for this column is correct (see
:meth:`ensure_header`).
"""
if check_header is not None:
try:
self.ensure_header(col, check_header)
except ValueError as e:
raise ValueError(
f"When reading row with zero-based index {row}: {e}"
)
v = self.sheet.cell_value(row, col)
if v in self.null_values:
return None
return v
[docs] def read_datetime(
self,
row: int,
col: int,
default: datetime.datetime = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[datetime.datetime]:
"""
Reads a datetime from an Excel spreadsheet via xlrd.
https://stackoverflow.com/questions/32430679/how-to-read-dates-using-xlrd
"""
v = self.read_value(row, col, check_header=check_header)
if none_or_blank_string(v):
return default
try:
if isinstance(v, str):
# Sometimes we get strings, like "1800-01-01". These are
# outside the Excel range (1900 or 1904 onwards).
p_datetime = coerce_to_pendulum(v)
if p_datetime is None:
raise ValueError
return pendulum_to_datetime_stripping_tz(p_datetime)
else:
# xlrd.xldate_as_tuple() converts an Excel number into a
# tuple: (year, month, day, hour, minute, nearest_second)
return datetime.datetime(
*xlrd.xldate_as_tuple(v, self.book.datemode)
)
except (TypeError, ValueError):
raise ValueError(f"Bad date/time: {v!r}" + self._locinfo(row, col))
[docs] def read_date(
self,
row: int,
col: int,
default: datetime.date = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[datetime.date]:
"""
Reads a date from an Excel spreadsheet
https://stackoverflow.com/questions/32430679/how-to-read-dates-using-xlrd
"""
dt = self.read_datetime(row, col, check_header=check_header)
if dt:
return dt.date()
return default
[docs] def read_int(
self,
row: int,
col: int,
default: int = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[int]:
"""
Reads an integer from a spreadsheet.
"""
v = self.read_value(row, col, check_header=check_header)
if none_or_blank_string(v):
return default
try:
# - Now, the danger is that e.g. v == 7.6, which will cheerfully
# give int(v) == 7.
# - [Note that int("7.6") will raise ValueError, whereas
# int(7.6) will succeed.]
# - However, we want it to be an error to read a float/decimal as
# an integer accidentally, because otherwise we may lose data.
# - Finally, sometimes str(v) is e.g. "92.0" when v is a perfectly
# valid integer, so we can't use 'if "." in str(v)' as a test.
# - But comparing int(v) to float(v) works.
iv = int(v) # may raise
if iv != float(v): # this picks up non-integer values
raise ValueError
return iv
except (TypeError, ValueError):
raise ValueError(f"Bad int: {v!r}" + self._locinfo(row, col))
[docs] def read_float(
self,
row: int,
col: int,
default: float = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[float]:
"""
Reads a float from the spreadsheet.
"""
v = self.read_value(row, col, check_header=check_header)
if none_or_blank_string(v):
return default
try:
return float(v)
except (TypeError, ValueError):
raise ValueError(f"Bad float: {v!r}" + self._locinfo(row, col))
[docs] def read_decimal(
self,
row: int,
col: int,
default: Decimal = None,
check_header: Union[str, Sequence[str]] = None,
dp: int = None,
rounding: str = decimal.ROUND_HALF_UP,
) -> Optional[Decimal]:
"""
Reads a Decimal from the spreadsheet.
If ``dp`` is not ``None``, force the result to a specified number of
decimal places, using the specified rounding method.
"""
v = self.read_value(row, col, check_header=check_header)
if none_or_blank_string(v):
return default
try:
x = Decimal(str(v))
# ... better than Decimal(v), which converts e.g. 7.4 to
# Decimal('7.4000000000000003552713678800500929355621337890625')
except (TypeError, decimal.InvalidOperation):
raise ValueError(f"Bad decimal: {v!r}" + self._locinfo(row, col))
if dp is not None:
nplaces = Decimal(10) ** (-dp)
x = x.quantize(exp=nplaces, rounding=rounding)
return x
[docs] def read_str(
self,
row: int,
col: int,
default: str = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[str]:
"""
Reads a string from a spreadsheet.
"""
v = self.read_value(row, col, check_header=check_header)
if none_or_blank_string(v):
return default
return str(v).strip()
[docs] def read_str_int(
self,
row: int,
col: int,
default: str = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[str]:
"""
Reads a string version of an integer. (This prevents e.g. "2" being
read as a floating-point value of "2.0" then converted to a string.)
"""
v_int = self.read_int(row, col, check_header=check_header)
if v_int is None:
return default
return str(v_int)
[docs] def read_str_nonfloat(
self,
row: int,
col: int,
default: str = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[str]:
"""
Reads something that may be a string or numeric, but if it's numeric,
it's integer (not float). (This prevents e.g. "2" being read as a
floating-point value of "2.0" then converted to a string.)
"""
v = self.read_value(row, col, check_header=check_header)
if none_or_blank_string(v):
return default
# See read_int() for more on the logic used here.
try:
fv = float(v) # may raise
except (TypeError, ValueError):
# Not numeric
return v # return the string version
# If we get here, v is numeric (e.g. "7" or "6.5" or 3.5).
try:
iv = int(v)
except (TypeError, ValueError):
# Numeric, but not integer
raise ValueError
if iv != fv: # this picks up non-integer values
raise ValueError
return str(iv) # string version of int
[docs] def read_bool(
self,
row: int,
col: int,
default: bool = None,
true_values_lowercase: List[Any] = None,
false_values_lowercase: List[Any] = None,
unknown_values_lowercase: List[Any] = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[bool]:
"""
Reads a boolean value.
"""
if true_values_lowercase is None:
true_values_lowercase = self.bool_true_values_lowercase
if false_values_lowercase is None:
false_values_lowercase = self.bool_false_values_lowercase
if unknown_values_lowercase is None:
unknown_values_lowercase = self.bool_unknown_values_lowercase
raw_v = self.read_value(row, col, check_header=check_header)
if none_or_blank_string(raw_v):
v = None
else:
try:
v = int(raw_v)
except (TypeError, ValueError):
v = str(raw_v).lower()
if v in true_values_lowercase:
return True
elif v in false_values_lowercase:
return False
elif v in unknown_values_lowercase:
return default
else:
raise ValueError(f"Bad bool: {raw_v!r}" + self._locinfo(row, col))
[docs] def read_none(
self,
row: int,
col: int,
check_header: Union[str, Sequence[str]] = None,
) -> None:
"""
Reads a value, and checks that it is a none/null value
"""
v = self.read_value(row, col, check_header=check_header)
if v is not None:
raise ValueError(
f"Value should be none/null but is {v!r}"
+ self._locinfo(row, col)
)
return None
# -------------------------------------------------------------------------
# Row generators
# -------------------------------------------------------------------------
def _setup_for_gen(
self, with_counter: bool = True
) -> Tuple[int, Optional["ActivityCounter"]]:
n_rows = self.sheet.nrows
if with_counter:
counter = ActivityCounter(
activity="Reading row", n_total=self.n_data_rows
)
else:
counter = None
if self.debug_max_rows_per_sheet is not None:
log.debug(
f"Debug option: limiting to {self.debug_max_rows_per_sheet} "
f"rows from spreadsheet {self}"
)
end = min(n_rows, self.debug_max_rows_per_sheet + 1)
else:
end = n_rows
return end, counter
# =============================================================================
# RowHolder
# =============================================================================
[docs]class RowHolder(object):
"""
Class to read from a single row of a spreadsheet.
The intended use is to create something like a dataclass, but one that
knows its spreadsheet structure. Like this:
.. code-block:: python
from cardinal_pythonlib.spreadsheets import RowHolder, SheetHolder
class ReferralSheetHolder(SheetHolder):
SHEET_NAME = "Patient Referrals 2018-19"
def gen_referral_rows(self) -> Iterable["ReferralRow"]:
for rownum in self.gen_row_numbers_excluding_header_row():
yield ReferralRow(self, rownum)
class ReferralRow(RowHolder):
def __init__(self, sheetholder: SheetHolder, row: int) -> None:
super().__init__(sheetholder, row)
self.inc_next_col() # column 0: query period; ignore
self.patient_id = self.str_int_pp()
self.referral_id_within_patient = self.int_pp()
self.age_at_referral_int = self.int_pp()
self.ethnicity = self.str_pp()
self.gender = self.str_pp(check_header="Gender")
def import_referrals(book: Book) -> None:
sheet = ReferralSheetHolder(book)
for referral in sheet.gen_referral_rows():
pass # do something useful here
"""
def __init__(self, sheetholder: SheetHolder, row: int) -> None:
self.sheetholder = sheetholder
self.row = row # zero-based index of our row
self._next_col = 0
# ... zero-based column index of the next column to read
# -------------------------------------------------------------------------
# Information
# -------------------------------------------------------------------------
@property
def sheet_name(self) -> str:
return self.sheetholder.sheet_name
def _get_relevant_attrs(self) -> List[str]:
"""
Attributes added by the user, and row number.
:return:
"""
avoid = ["sheetholder", "row", "_next_col"]
user_attrs = [k for k in self.__dict__.keys() if k not in avoid]
attrs = ["sheet_name", "row"] + user_attrs
return attrs
def __str__(self) -> str:
return simple_repr(self, self._get_relevant_attrs())
def __repr__(self) -> str:
return simple_repr(self, ["sheetholder", "row"])
@property
def row_zero_based(self) -> int:
"""
Zero-based row number.
"""
return self.row
@property
def row_one_based(self) -> int:
"""
One-based row number.
"""
return self.row + 1
# -------------------------------------------------------------------------
# Checks
# -------------------------------------------------------------------------
# More commonly, this is done via the "read...()" functions.
[docs] def ensure_heading(
self, col: int, header: Union[str, Sequence[str]]
) -> None:
"""
Synonym for :meth:`ensure_header`.
"""
self.ensure_header(col, header)
# -------------------------------------------------------------------------
# Read operations, given a column number
# -------------------------------------------------------------------------
# Compare equivalents in SheetHolder.
def read_value(
self, col: int, check_header: Union[str, Sequence[str]] = None
) -> Any:
return self.sheetholder.read_value(
self.row, col, check_header=check_header
)
def read_datetime(
self,
col: int,
default: Any = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[datetime.date]:
return self.sheetholder.read_datetime(
self.row, col, default, check_header=check_header
)
def read_date(
self,
col: int,
default: Any = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[datetime.date]:
return self.sheetholder.read_date(
self.row, col, default, check_header=check_header
)
def read_int(
self,
col: int,
default: int = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[int]:
return self.sheetholder.read_int(
self.row, col, default, check_header=check_header
)
def read_float(
self,
col: int,
default: float = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[float]:
return self.sheetholder.read_float(
self.row, col, default, check_header=check_header
)
def read_decimal(
self,
col: int,
default: Decimal = None,
check_header: Union[str, Sequence[str]] = None,
dp: int = None,
rounding: str = decimal.ROUND_HALF_UP,
) -> Optional[Decimal]:
return self.sheetholder.read_decimal(
self.row,
col,
default,
check_header=check_header,
dp=dp,
rounding=rounding,
)
def read_str(
self,
col: int,
default: str = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[str]:
return self.sheetholder.read_str(
self.row, col, default, check_header=check_header
)
def read_str_int(
self,
col: int,
default: str = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[str]:
return self.sheetholder.read_str_int(
self.row, col, default, check_header=check_header
)
def read_str_nonfloat(
self,
col: int,
default: str = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[str]:
return self.sheetholder.read_str_nonfloat(
self.row, col, default, check_header=check_header
)
def read_bool(
self,
col: int,
default: bool = None,
true_values_lowercase: List[Any] = None,
false_values_lowercase: List[Any] = None,
unknown_values_lowercase: List[Any] = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[bool]:
return self.sheetholder.read_bool(
row=self.row,
col=col,
default=default,
true_values_lowercase=true_values_lowercase,
false_values_lowercase=false_values_lowercase,
unknown_values_lowercase=unknown_values_lowercase,
check_header=check_header,
)
def read_none(
self, col: int, check_header: Union[str, Sequence[str]] = None
) -> None:
return self.sheetholder.read_none(
row=self.row, col=col, check_header=check_header
)
# -------------------------------------------------------------------------
# Check the next column
# -------------------------------------------------------------------------
[docs] def ensure_next_col_heading(
self, header: Union[str, Sequence[str]]
) -> None:
"""
Synonym for :meth:`ensure_next_col_header`.
"""
self.ensure_next_col_header(header)
# -------------------------------------------------------------------------
# Read operations, incrementing the next column number automatically.
# -------------------------------------------------------------------------
# "pp" for "++" post-increment, like C.
@property
def next_col(self) -> int:
"""
Returns the column number (0-based) that will be used by the next
automatic read operation.
"""
return self._next_col
[docs] def set_next_col(self, col: int) -> None:
"""
Resets the next column to be read automatically.
"""
self._next_col = col
[docs] def inc_next_col(self) -> None:
"""
Increments the next column to be read.
"""
self._next_col += 1
[docs] def value_pp(self, check_header: Union[str, Sequence[str]] = None) -> Any:
"""
Reads a value, then increments the "current" column.
Optionally, checks that the header for this column is as expected.
"""
try:
v = self.read_value(self._next_col, check_header=check_header)
finally:
self.inc_next_col()
return v
[docs] def datetime_pp(
self,
default: datetime.datetime = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[datetime.datetime]:
"""
Reads a datetime, then increments the "current" column.
Optionally, checks that the header for this column is as expected.
"""
try:
v = self.read_datetime(
self._next_col, default, check_header=check_header
)
finally:
self.inc_next_col()
return v
[docs] def date_pp(
self,
default: datetime.date = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[datetime.date]:
"""
Reads a date, then increments the "current" column.
Optionally, checks that the header for this column is as expected.
"""
try:
v = self.read_date(
self._next_col, default, check_header=check_header
)
finally:
self.inc_next_col()
return v
[docs] def int_pp(
self,
default: int = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[int]:
"""
Reads an int, then increments the "current" column.
Optionally, checks that the header for this column is as expected.
"""
try:
v = self.read_int(
self._next_col, default, check_header=check_header
)
finally:
self.inc_next_col()
return v
[docs] def float_pp(
self,
default: float = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[float]:
"""
Reads a float, then increments the "current" column.
Optionally, checks that the header for this column is as expected.
"""
try:
v = self.read_float(
self._next_col, default, check_header=check_header
)
finally:
self.inc_next_col()
return v
[docs] def decimal_pp(
self,
default: float = None,
check_header: Union[str, Sequence[str]] = None,
dp: int = None,
rounding: str = decimal.ROUND_HALF_UP,
) -> Optional[Decimal]:
"""
Reads a Decimal, then increments the "current" column.
Optionally, checks that the header for this column is as expected.
"""
try:
v = self.read_decimal(
self._next_col,
default=default,
check_header=check_header,
dp=dp,
rounding=rounding,
)
finally:
self.inc_next_col()
return v
[docs] def str_pp(
self,
default: str = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[str]:
"""
Reads a string, then increments the "current" column.
Optionally, checks that the header for this column is as expected.
"""
try:
v = self.read_str(
self._next_col, default, check_header=check_header
)
finally:
self.inc_next_col()
return v
[docs] def str_int_pp(
self,
default: str = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[str]:
"""
Reads an integer as a string, then increments the "current" column.
Optionally, checks that the header for this column is as expected.
"""
try:
v = self.read_str_int(
self._next_col, default, check_header=check_header
)
finally:
self.inc_next_col()
return v
[docs] def str_nonfloat_pp(
self,
default: str = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[str]:
"""
Reads something that may be a string or numeric, but if it's numeric,
it's integer (not float). Then increments the "current" column.
Optionally, checks that the header for this column is as expected.
"""
try:
v = self.read_str_nonfloat(
self._next_col, default, check_header=check_header
)
finally:
self.inc_next_col()
return v
[docs] def bool_pp(
self,
default: bool = None,
true_values_lowercase: List[Any] = None,
false_values_lowercase: List[Any] = None,
unknown_values_lowercase: List[Any] = None,
check_header: Union[str, Sequence[str]] = None,
) -> Optional[bool]:
"""
Reads a boolean value, then increments the "current" column.
Optionally, checks that the header for this column is as expected.
"""
try:
v = self.read_bool(
col=self._next_col,
default=default,
true_values_lowercase=true_values_lowercase,
false_values_lowercase=false_values_lowercase,
unknown_values_lowercase=unknown_values_lowercase,
check_header=check_header,
)
finally:
self.inc_next_col()
return v
[docs] def none_pp(self, check_header: Union[str, Sequence[str]] = None) -> None:
"""
Reads a null value, and ensures that it is null; then increments the
"current" column. Optionally, checks that the header for this column is
as expected.
"""
try:
self.read_none(col=self._next_col, check_header=check_header)
finally:
self.inc_next_col()
return None